postgres
diff --git a/‎src/backend/access/transam/xlog.c
Lines changed: 33 additions & 0 deletions b/‎src/backend/access/transam/xlog.c
Lines changed: 33 additions & 0 deletions
diff --git a/‎src/backend/backup/basebackup_incremental.c
Lines changed: 6 additions & 84 deletions b/‎src/backend/backup/basebackup_incremental.c
Lines changed: 6 additions & 84 deletions
diff --git a/‎src/backend/postmaster/walsummarizer.c
Lines changed: 118 additions & 24 deletions b/‎src/backend/postmaster/walsummarizer.c
Lines changed: 118 additions & 24 deletions
diff --git a/‎src/bin/pg_combinebackup/meson.build
Lines changed: 1 addition & 0 deletions b/‎src/bin/pg_combinebackup/meson.build
Lines changed: 1 addition & 0 deletions
@@ -498,6 +498,11 @@ typedef struct XLogCtlData
 	 * If we create a new timeline when the system was started up,
 	 * PrevTimeLineID is the old timeline's ID that we forked off from.
 	 * Otherwise it's equal to InsertTimeLineID.
+	 *
+	 * We set these fields while holding info_lck. Most that reads these
+	 * values knows that recovery is no longer in progress and so can safely
+	 * read the value without a lock, but code that could be run either during
+	 * or after recovery can take info_lck while reading these values.
 	 */
 	TimeLineID	InsertTimeLineID;
 	TimeLineID	PrevTimeLineID;
@@ -5315,6 +5320,13 @@ CleanupAfterArchiveRecovery(TimeLineID EndOfLogTLI, XLogRecPtr EndOfLog,
 			char		partialfname[MAXFNAMELEN];
 			char		partialpath[MAXPGPATH];
 
+			/*
+			 * If we're summarizing WAL, we can't rename the partial file
+			 * until the summarizer finishes with it, else it will fail.
+			 */
+			if (summarize_wal)
+				WaitForWalSummarization(EndOfLog);
+
 			XLogFilePath(origpath, EndOfLogTLI, endLogSegNo, wal_segment_size);
 			snprintf(partialfname, MAXFNAMELEN, "%s.partial", origfname);
 			snprintf(partialpath, MAXPGPATH, "%s.partial", origpath);
@@ -5945,8 +5957,10 @@ StartupXLOG(void)
 	}
 
 	/* Save the selected TimeLineID in shared memory, too */
+	SpinLockAcquire(&XLogCtl->info_lck);
 	XLogCtl->InsertTimeLineID = newTLI;
 	XLogCtl->PrevTimeLineID = endOfRecoveryInfo->lastRecTLI;
+	SpinLockRelease(&XLogCtl->info_lck);
 
 	/*
 	 * Actually, if WAL ended in an incomplete record, skip the parts that
@@ -6481,6 +6495,25 @@ GetWALInsertionTimeLine(void)
 	return XLogCtl->InsertTimeLineID;
 }
 
+/*
+ * GetWALInsertionTimeLineIfSet -- If the system is not in recovery, returns
+ * the WAL insertion timeline; else, returns 0. Wherever possible, use
+ * GetWALInsertionTimeLine() instead, since it's cheaper. Note that this
+ * function decides recovery has ended as soon as the insert TLI is set, which
+ * happens before we set XLogCtl->SharedRecoveryState to RECOVERY_STATE_DONE.
+ */
+TimeLineID
+GetWALInsertionTimeLineIfSet(void)
+{
+	TimeLineID	insertTLI;
+
+	SpinLockAcquire(&XLogCtl->info_lck);
+	insertTLI = XLogCtl->InsertTimeLineID;
+	SpinLockRelease(&XLogCtl->info_lck);
+
+	return insertTLI;
+}
+
 /*
  * GetLastImportantRecPtr -- Returns the LSN of the last important record
  * inserted. All records not explicitly marked as unimportant are considered
 
@@ -277,12 +277,6 @@ PrepareForIncrementalBackup(IncrementalBackupInfo *ib,
 	TimeLineID	earliest_wal_range_tli = 0;
 	XLogRecPtr	earliest_wal_range_start_lsn = InvalidXLogRecPtr;
 	TimeLineID	latest_wal_range_tli = 0;
-	XLogRecPtr	summarized_lsn;
-	XLogRecPtr	pending_lsn;
-	XLogRecPtr	prior_pending_lsn = InvalidXLogRecPtr;
-	int			deadcycles = 0;
-	TimestampTz initial_time,
-				current_time;
 
 	Assert(ib->buf.data == NULL);
 
@@ -458,85 +452,13 @@ PrepareForIncrementalBackup(IncrementalBackupInfo *ib,
 	}
 
 	/*
-	 * Wait for WAL summarization to catch up to the backup start LSN (but
-	 * time out if it doesn't do so quickly enough).
+	 * Wait for WAL summarization to catch up to the backup start LSN. This
+	 * will throw an error if the WAL summarizer appears to be stuck. If WAL
+	 * summarization gets disabled while we're waiting, this will return
+	 * immediately, and we'll error out further down if the WAL summaries are
+	 * incomplete.
 	 */
-	initial_time = current_time = GetCurrentTimestamp();
-	while (1)
-	{
-		long		timeout_in_ms = 10000;
-		long		elapsed_seconds;
-
-		/*
-		 * Align the wait time to prevent drift. This doesn't really matter,
-		 * but we'd like the warnings about how long we've been waiting to say
-		 * 10 seconds, 20 seconds, 30 seconds, 40 seconds ... without ever
-		 * drifting to something that is not a multiple of ten.
-		 */
-		timeout_in_ms -=
-			TimestampDifferenceMilliseconds(initial_time, current_time) %
-			timeout_in_ms;
-
-		/* Wait for up to 10 seconds. */
-		summarized_lsn = WaitForWalSummarization(backup_state->startpoint,
-												 timeout_in_ms, &pending_lsn);
-
-		/* If WAL summarization has progressed sufficiently, stop waiting. */
-		if (summarized_lsn >= backup_state->startpoint)
-			break;
-
-		/*
-		 * Keep track of the number of cycles during which there has been no
-		 * progression of pending_lsn. If pending_lsn is not advancing, that
-		 * means that not only are no new files appearing on disk, but we're
-		 * not even incorporating new records into the in-memory state.
-		 */
-		if (pending_lsn > prior_pending_lsn)
-		{
-			prior_pending_lsn = pending_lsn;
-			deadcycles = 0;
-		}
-		else
-			++deadcycles;
-
-		/*
-		 * If we've managed to wait for an entire minute without the WAL
-		 * summarizer absorbing a single WAL record, error out; probably
-		 * something is wrong.
-		 *
-		 * We could consider also erroring out if the summarizer is taking too
-		 * long to catch up, but it's not clear what rate of progress would be
-		 * acceptable and what would be too slow. So instead, we just try to
-		 * error out in the case where there's no progress at all. That seems
-		 * likely to catch a reasonable number of the things that can go wrong
-		 * in practice (e.g. the summarizer process is completely hung, say
-		 * because somebody hooked up a debugger to it or something) without
-		 * giving up too quickly when the system is just slow.
-		 */
-		if (deadcycles >= 6)
-			ereport(ERROR,
-					(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
-					 errmsg("WAL summarization is not progressing"),
-					 errdetail("Summarization is needed through %X/%X, but is stuck at %X/%X on disk and %X/%X in memory.",
-							   LSN_FORMAT_ARGS(backup_state->startpoint),
-							   LSN_FORMAT_ARGS(summarized_lsn),
-							   LSN_FORMAT_ARGS(pending_lsn))));
-
-		/*
-		 * Otherwise, just let the user know what's happening.
-		 */
-		current_time = GetCurrentTimestamp();
-		elapsed_seconds =
-			TimestampDifferenceMilliseconds(initial_time, current_time) / 1000;
-		ereport(WARNING,
-				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
-				 errmsg("still waiting for WAL summarization through %X/%X after %ld seconds",
-						LSN_FORMAT_ARGS(backup_state->startpoint),
-						elapsed_seconds),
-				 errdetail("Summarization has reached %X/%X on disk and %X/%X in memory.",
-						   LSN_FORMAT_ARGS(summarized_lsn),
-						   LSN_FORMAT_ARGS(pending_lsn))));
-	}
+	WaitForWalSummarization(backup_state->startpoint);
 
 	/*
 	 * Retrieve a list of all WAL summaries on any timeline that overlap with
 
@@ -650,54 +650,132 @@ SetWalSummarizerLatch(void)
 }
 
 /*
- * Wait until WAL summarization reaches the given LSN, but not longer than
- * the given timeout.
+ * Wait until WAL summarization reaches the given LSN, but time out with an
+ * error if the summarizer seems to be stick.
  *
- * The return value is the first still-unsummarized LSN. If it's greater than
- * or equal to the passed LSN, then that LSN was reached. If not, we timed out.
- *
- * Either way, *pending_lsn is set to the value taken from WalSummarizerCtl.
+ * Returns immediately if summarize_wal is turned off while we wait. Caller
+ * is expected to handle this case, if necessary.
  */
-XLogRecPtr
-WaitForWalSummarization(XLogRecPtr lsn, long timeout, XLogRecPtr *pending_lsn)
+void
+WaitForWalSummarization(XLogRecPtr lsn)
 {
-	TimestampTz start_time = GetCurrentTimestamp();
-	TimestampTz deadline = TimestampTzPlusMilliseconds(start_time, timeout);
-	XLogRecPtr	summarized_lsn;
+	TimestampTz initial_time,
+				cycle_time,
+				current_time;
+	XLogRecPtr	prior_pending_lsn = InvalidXLogRecPtr;
+	int			deadcycles = 0;
 
-	Assert(!XLogRecPtrIsInvalid(lsn));
-	Assert(timeout > 0);
+	initial_time = cycle_time = GetCurrentTimestamp();
 
 	while (1)
 	{
-		TimestampTz now;
-		long		remaining_timeout;
+		long		timeout_in_ms = 10000;
+		XLogRecPtr	summarized_lsn;
+		XLogRecPtr	pending_lsn;
+
+		CHECK_FOR_INTERRUPTS();
+
+		/* If WAL summarization is disabled while we're waiting, give up. */
+		if (!summarize_wal)
+			return;
 
 		/*
 		 * If the LSN summarized on disk has reached the target value, stop.
 		 */
 		LWLockAcquire(WALSummarizerLock, LW_EXCLUSIVE);
 		summarized_lsn = WalSummarizerCtl->summarized_lsn;
-		*pending_lsn = WalSummarizerCtl->pending_lsn;
+		pending_lsn = WalSummarizerCtl->pending_lsn;
 		LWLockRelease(WALSummarizerLock);
+
+		/* If WAL summarization has progressed sufficiently, stop waiting. */
 		if (summarized_lsn >= lsn)
 			break;
 
-		/* Timeout reached? If yes, stop. */
-		now = GetCurrentTimestamp();
-		remaining_timeout = TimestampDifferenceMilliseconds(now, deadline);
-		if (remaining_timeout <= 0)
-			break;
+		/* Recheck current time. */
+		current_time = GetCurrentTimestamp();
+
+		/* Have we finished the current cycle of waiting? */
+		if (TimestampDifferenceMilliseconds(cycle_time,
+											current_time) >= timeout_in_ms)
+		{
+			long		elapsed_seconds;
+
+			/* Begin new wait cycle. */
+			cycle_time = TimestampTzPlusMilliseconds(cycle_time,
+													 timeout_in_ms);
+
+			/*
+			 * Keep track of the number of cycles during which there has been
+			 * no progression of pending_lsn. If pending_lsn is not advancing,
+			 * that means that not only are no new files appearing on disk,
+			 * but we're not even incorporating new records into the in-memory
+			 * state.
+			 */
+			if (pending_lsn > prior_pending_lsn)
+			{
+				prior_pending_lsn = pending_lsn;
+				deadcycles = 0;
+			}
+			else
+				++deadcycles;
+
+			/*
+			 * If we've managed to wait for an entire minute without the WAL
+			 * summarizer absorbing a single WAL record, error out; probably
+			 * something is wrong.
+			 *
+			 * We could consider also erroring out if the summarizer is taking
+			 * too long to catch up, but it's not clear what rate of progress
+			 * would be acceptable and what would be too slow. So instead, we
+			 * just try to error out in the case where there's no progress at
+			 * all. That seems likely to catch a reasonable number of the
+			 * things that can go wrong in practice (e.g. the summarizer
+			 * process is completely hung, say because somebody hooked up a
+			 * debugger to it or something) without giving up too quickly when
+			 * the system is just slow.
+			 */
+			if (deadcycles >= 6)
+				ereport(ERROR,
+						(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+						 errmsg("WAL summarization is not progressing"),
+						 errdetail("Summarization is needed through %X/%X, but is stuck at %X/%X on disk and %X/%X in memory.",
+								   LSN_FORMAT_ARGS(lsn),
+								   LSN_FORMAT_ARGS(summarized_lsn),
+								   LSN_FORMAT_ARGS(pending_lsn))));
+
+
+			/*
+			 * Otherwise, just let the user know what's happening.
+			 */
+			elapsed_seconds =
+				TimestampDifferenceMilliseconds(initial_time,
+												current_time) / 1000;
+			ereport(WARNING,
+					(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+					 errmsg("still waiting for WAL summarization through %X/%X after %ld seconds",
+							LSN_FORMAT_ARGS(lsn),
+							elapsed_seconds),
+					 errdetail("Summarization has reached %X/%X on disk and %X/%X in memory.",
+							   LSN_FORMAT_ARGS(summarized_lsn),
+							   LSN_FORMAT_ARGS(pending_lsn))));
+		}
+
+		/*
+		 * Align the wait time to prevent drift. This doesn't really matter,
+		 * but we'd like the warnings about how long we've been waiting to say
+		 * 10 seconds, 20 seconds, 30 seconds, 40 seconds ... without ever
+		 * drifting to something that is not a multiple of ten.
+		 */
+		timeout_in_ms -=
+			TimestampDifferenceMilliseconds(cycle_time, current_time);
 
 		/* Wait and see. */
 		ConditionVariableTimedSleep(&WalSummarizerCtl->summary_file_cv,
-									remaining_timeout,
+									timeout_in_ms,
 									WAIT_EVENT_WAL_SUMMARY_READY);
 	}
 
 	ConditionVariableCancelSleep();
-
-	return summarized_lsn;
 }
 
 /*
@@ -730,6 +808,22 @@ GetLatestLSN(TimeLineID *tli)
 		TimeLineID	flush_tli;
 		XLogRecPtr	replay_lsn;
 		TimeLineID	replay_tli;
+		TimeLineID	insert_tli;
+
+		/*
+		 * After the insert TLI has been set and before the control file has
+		 * been updated to show the DB in production, RecoveryInProgress()
+		 * will return true, because it's not yet safe for all backends to
+		 * begin writing WAL. However, replay has already ceased, so from our
+		 * point of view, recovery is already over. We should summarize up to
+		 * where replay stopped and then prepare to resume at the start of the
+		 * insert timeline.
+		 */
+		if ((insert_tli = GetWALInsertionTimeLineIfSet()) != 0)
+		{
+			*tli = insert_tli;
+			return GetXLogReplayRecPtr(NULL);
+		}
 
 		/*
 		 * What we really want to know is how much WAL has been flushed to
 
@@ -35,6 +35,7 @@ tests += {
       't/005_integrity.pl',
       't/006_db_file_copy.pl',
       't/007_wal_level_minimal.pl',
+      't/008_promote.pl',
     ],
   }
 }
Original file line number	Diff line number	Diff line change
`@@ -35,6 +35,7 @@ tests += {`
`35`	`35`	`'t/005_integrity.pl',`
`36`	`36`	`'t/006_db_file_copy.pl',`
`37`	`37`	`'t/007_wal_level_minimal.pl',`
	`38`	`+ 't/008_promote.pl',`
`38`	`39`	`],`
`39`	`40`	`}`
`40`	`41`	`}`