From 38b602b0289fe1dbaf31d5737fba2d42a1e90371 Mon Sep 17 00:00:00 2001
From: Nathan Bossart <nathan@postgresql.org>
Date: Wed, 3 Sep 2025 13:57:48 -0500
Subject: [PATCH 01/73] Move dynamically-allocated LWLock tranche names to
 shared memory.

There are two ways for shared libraries to allocate their own
LWLock tranches.  One way is to call RequestNamedLWLockTranche() in
a shmem_request_hook, which requires the library to be loaded via
shared_preload_libraries.  The other way is to call
LWLockNewTrancheId(), which is not subject to the same
restrictions.  However, LWLockNewTrancheId() does require each
backend to store the tranche's name in backend-local memory via
LWLockRegisterTranche().  This API is a little cumbersome and leads
to things like unhelpful pg_stat_activity.wait_event values in
backends that haven't loaded the library.

This commit moves these LWLock tranche names to shared memory, thus
eliminating the need for each backend to call
LWLockRegisterTranche().  Instead, the tranche name must be
provided to LWLockNewTrancheId(), which immediately makes the name
available to all backends.  Since the tranche name array is
append-only, lookups can ordinarily avoid locking as long as their
local copy of the LWLock counter is greater than the requested
tranche ID.

One downside of this approach is that we now have a hard limit on
both the length of tranche names (NAMEDATALEN-1 bytes) and the
number of dynamically-allocated tranches (256).  Besides a limit of
NAMEDATALEN-1 bytes for tranche names registered via
RequestNamedLWLockTranche(), no such limits previously existed.  We
could avoid these new limits by using dynamic shared memory, but
the complexity involved didn't seem worth it.  We briefly
considered making the tranche limit user-configurable but
ultimately decided against that, too.  Since there is still a lot
of time left in the v19 development cycle, it's possible we will
revisit this choice.

Author: Sami Imseih <samimseih@gmail.com>
Reviewed-by: Bertrand Drouvot <bertranddrouvot.pg@gmail.com>
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Reviewed-by: Rahila Syed <rahilasyed90@gmail.com>
Reviewed-by: Andres Freund <andres@anarazel.de>
Discussion: https://postgr.es/m/CAA5RZ0vvED3naph8My8Szv6DL4AxOVK3eTPS0qXsaKi%3DbVdW2A%40mail.gmail.com
---
 contrib/pg_prewarm/autoprewarm.c              |   3 +-
 doc/src/sgml/xfunc.sgml                       |  15 +-
 src/backend/postmaster/launch_backend.c       |   6 +-
 src/backend/storage/ipc/dsm_registry.c        |  12 +-
 src/backend/storage/lmgr/lwlock.c             | 216 +++++++++---------
 src/include/storage/lwlock.h                  |  25 +-
 src/test/modules/test_dsa/test_dsa.c          |   6 +-
 .../test_dsm_registry/test_dsm_registry.c     |   3 +-
 .../modules/test_radixtree/test_radixtree.c   |   9 +-
 src/test/modules/test_slru/test_slru.c        |   6 +-
 .../modules/test_tidstore/test_tidstore.c     |   3 +-
 11 files changed, 130 insertions(+), 174 deletions(-)
diff --git a/contrib/pg_prewarm/autoprewarm.c b/contrib/pg_prewarm/autoprewarm.c
index c01b9c7e6a4d6..880e897796a1e 100644
--- a/contrib/pg_prewarm/autoprewarm.c
+++ b/contrib/pg_prewarm/autoprewarm.c
@@ -864,7 +864,7 @@ apw_init_state(void *ptr)
 {
 	AutoPrewarmSharedState *state = (AutoPrewarmSharedState *) ptr;
 
-	LWLockInitialize(&state->lock, LWLockNewTrancheId());
+	LWLockInitialize(&state->lock, LWLockNewTrancheId("autoprewarm"));
 	state->bgworker_pid = InvalidPid;
 	state->pid_using_dumpfile = InvalidPid;
 }
@@ -883,7 +883,6 @@ apw_init_shmem(void)
 								   sizeof(AutoPrewarmSharedState),
 								   apw_init_state,
 								   &found);
-	LWLockRegisterTranche(apw_state->lock.tranche, "autoprewarm");
 
 	return found;
 }
diff --git a/doc/src/sgml/xfunc.sgml b/doc/src/sgml/xfunc.sgml
index f116d0648e559..da21ef5689184 100644
--- a/doc/src/sgml/xfunc.sgml
+++ b/doc/src/sgml/xfunc.sgml
@@ -3759,7 +3759,7 @@ LWLockPadded *GetNamedLWLockTranche(const char *tranche_name)
       <literal>shmem_request_hook</literal>.  To do so, first allocate a
       <literal>tranche_id</literal> by calling:
 <programlisting>
-int LWLockNewTrancheId(void)
+int LWLockNewTrancheId(const char *name)
 </programlisting>
       Next, initialize each LWLock, passing the new
       <literal>tranche_id</literal> as an argument:
@@ -3777,17 +3777,8 @@ void LWLockInitialize(LWLock *lock, int tranche_id)
      </para>
 
      <para>
-      Finally, each backend using the <literal>tranche_id</literal> should
-      associate it with a <literal>tranche_name</literal> by calling:
-<programlisting>
-void LWLockRegisterTranche(int tranche_id, const char *tranche_name)
-</programlisting>
-     </para>
-
-     <para>
-      A complete usage example of <function>LWLockNewTrancheId</function>,
-      <function>LWLockInitialize</function>, and
-      <function>LWLockRegisterTranche</function> can be found in
+      A complete usage example of <function>LWLockNewTrancheId</function> and
+      <function>LWLockInitialize</function> can be found in
       <filename>contrib/pg_prewarm/autoprewarm.c</filename> in the
       <productname>PostgreSQL</productname> source tree.
      </para>
diff --git a/src/backend/postmaster/launch_backend.c b/src/backend/postmaster/launch_backend.c
index cd9547b03a32b..a38979c50e4bb 100644
--- a/src/backend/postmaster/launch_backend.c
+++ b/src/backend/postmaster/launch_backend.c
@@ -101,7 +101,7 @@ typedef struct
 	struct InjectionPointsCtl *ActiveInjectionPoints;
 #endif
 	int			NamedLWLockTrancheRequests;
-	NamedLWLockTranche *NamedLWLockTrancheArray;
+	char	  **LWLockTrancheNames;
 	int		   *LWLockCounter;
 	LWLockPadded *MainLWLockArray;
 	slock_t    *ProcStructLock;
@@ -761,7 +761,7 @@ save_backend_variables(BackendParameters *param,
 #endif
 
 	param->NamedLWLockTrancheRequests = NamedLWLockTrancheRequests;
-	param->NamedLWLockTrancheArray = NamedLWLockTrancheArray;
+	param->LWLockTrancheNames = LWLockTrancheNames;
 	param->LWLockCounter = LWLockCounter;
 	param->MainLWLockArray = MainLWLockArray;
 	param->ProcStructLock = ProcStructLock;
@@ -1022,7 +1022,7 @@ restore_backend_variables(BackendParameters *param)
 #endif
 
 	NamedLWLockTrancheRequests = param->NamedLWLockTrancheRequests;
-	NamedLWLockTrancheArray = param->NamedLWLockTrancheArray;
+	LWLockTrancheNames = param->LWLockTrancheNames;
 	LWLockCounter = param->LWLockCounter;
 	MainLWLockArray = param->MainLWLockArray;
 	ProcStructLock = param->ProcStructLock;
diff --git a/src/backend/storage/ipc/dsm_registry.c b/src/backend/storage/ipc/dsm_registry.c
index ca12815f4a854..971309251062d 100644
--- a/src/backend/storage/ipc/dsm_registry.c
+++ b/src/backend/storage/ipc/dsm_registry.c
@@ -299,8 +299,7 @@ GetNamedDSA(const char *name, bool *found)
 		entry->type = DSMR_ENTRY_TYPE_DSA;
 
 		/* Initialize the LWLock tranche for the DSA. */
-		state->tranche = LWLockNewTrancheId();
-		LWLockRegisterTranche(state->tranche, name);
+		state->tranche = LWLockNewTrancheId(name);
 
 		/* Initialize the DSA. */
 		ret = dsa_create(state->tranche);
@@ -321,9 +320,6 @@ GetNamedDSA(const char *name, bool *found)
 			ereport(ERROR,
 					(errmsg("requested DSA already attached to current process")));
 
-		/* Initialize existing LWLock tranche for the DSA. */
-		LWLockRegisterTranche(state->tranche, name);
-
 		/* Attach to existing DSA. */
 		ret = dsa_attach(state->handle);
 		dsa_pin_mapping(ret);
@@ -378,8 +374,7 @@ GetNamedDSHash(const char *name, const dshash_parameters *params, bool *found)
 		entry->type = DSMR_ENTRY_TYPE_DSH;
 
 		/* Initialize the LWLock tranche for the hash table. */
-		dsh_state->tranche = LWLockNewTrancheId();
-		LWLockRegisterTranche(dsh_state->tranche, name);
+		dsh_state->tranche = LWLockNewTrancheId(name);
 
 		/* Initialize the DSA for the hash table. */
 		dsa = dsa_create(dsh_state->tranche);
@@ -409,9 +404,6 @@ GetNamedDSHash(const char *name, const dshash_parameters *params, bool *found)
 			ereport(ERROR,
 					(errmsg("requested DSHash already attached to current process")));
 
-		/* Initialize existing LWLock tranche for the hash table. */
-		LWLockRegisterTranche(dsh_state->tranche, name);
-
 		/* Attach to existing DSA for the hash table. */
 		dsa = dsa_attach(dsh_state->dsa_handle);
 		dsa_pin_mapping(dsa);
diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c
index a4aecd1fbc34f..258cdebd0f5c9 100644
--- a/src/backend/storage/lmgr/lwlock.c
+++ b/src/backend/storage/lmgr/lwlock.c
@@ -126,8 +126,8 @@ StaticAssertDecl((LW_VAL_EXCLUSIVE & LW_FLAG_MASK) == 0,
  * in lwlocklist.h.  We absorb the names of these tranches, too.
  *
  * 3. Extensions can create new tranches, via either RequestNamedLWLockTranche
- * or LWLockRegisterTranche.  The names of these that are known in the current
- * process appear in LWLockTrancheNames[].
+ * or LWLockNewTrancheId.  These names are stored in shared memory and can be
+ * accessed via LWLockTrancheNames.
  *
  * All these names are user-visible as wait event names, so choose with care
  * ... and do not forget to update the documentation's list of wait events.
@@ -146,11 +146,12 @@ StaticAssertDecl(lengthof(BuiltinTrancheNames) ==
 
 /*
  * This is indexed by tranche ID minus LWTRANCHE_FIRST_USER_DEFINED, and
- * stores the names of all dynamically-created tranches known to the current
- * process.  Any unused entries in the array will contain NULL.
+ * points to the shared memory locations of the names of all
+ * dynamically-created tranches.  Backends inherit the pointer by fork from the
+ * postmaster (except in the EXEC_BACKEND case, where we have special measures
+ * to pass it down).
  */
-static const char **LWLockTrancheNames = NULL;
-static int	LWLockTrancheNamesAllocated = 0;
+char	  **LWLockTrancheNames = NULL;
 
 /*
  * This points to the main array of LWLocks in shared memory.  Backends inherit
@@ -184,20 +185,22 @@ typedef struct NamedLWLockTrancheRequest
 } NamedLWLockTrancheRequest;
 
 static NamedLWLockTrancheRequest *NamedLWLockTrancheRequestArray = NULL;
-static int	NamedLWLockTrancheRequestsAllocated = 0;
 
 /*
- * NamedLWLockTrancheRequests is both the valid length of the request array,
- * and the length of the shared-memory NamedLWLockTrancheArray later on.
- * This variable and NamedLWLockTrancheArray are non-static so that
- * postmaster.c can copy them to child processes in EXEC_BACKEND builds.
+ * NamedLWLockTrancheRequests is the valid length of the request array.  This
+ * variable is non-static so that postmaster.c can copy them to child processes
+ * in EXEC_BACKEND builds.
  */
 int			NamedLWLockTrancheRequests = 0;
 
-/* points to data in shared memory: */
-NamedLWLockTranche *NamedLWLockTrancheArray = NULL;
+/* shared memory counter of registered tranches */
 int		   *LWLockCounter = NULL;
 
+/* backend-local counter of registered tranches */
+static int	LocalLWLockCounter;
+
+#define MAX_NAMED_TRANCHES 256
+
 static void InitializeLWLocks(void);
 static inline void LWLockReportWaitStart(LWLock *lock);
 static inline void LWLockReportWaitEnd(void);
@@ -392,31 +395,28 @@ Size
 LWLockShmemSize(void)
 {
 	Size		size;
-	int			i;
 	int			numLocks = NUM_FIXED_LWLOCKS;
 
 	/* Calculate total number of locks needed in the main array. */
 	numLocks += NumLWLocksForNamedTranches();
 
-	/* Space for dynamic allocation counter, plus room for alignment. */
-	size = sizeof(int) + LWLOCK_PADDED_SIZE;
+	/* Space for dynamic allocation counter. */
+	size = MAXALIGN(sizeof(int));
 
-	/* Space for the LWLock array. */
-	size = add_size(size, mul_size(numLocks, sizeof(LWLockPadded)));
+	/* Space for named tranches. */
+	size = add_size(size, mul_size(MAX_NAMED_TRANCHES, sizeof(char *)));
+	size = add_size(size, mul_size(MAX_NAMED_TRANCHES, NAMEDATALEN));
 
-	/* space for named tranches. */
-	size = add_size(size, mul_size(NamedLWLockTrancheRequests, sizeof(NamedLWLockTranche)));
-
-	/* space for name of each tranche. */
-	for (i = 0; i < NamedLWLockTrancheRequests; i++)
-		size = add_size(size, strlen(NamedLWLockTrancheRequestArray[i].tranche_name) + 1);
+	/* Space for the LWLock array, plus room for cache line alignment. */
+	size = add_size(size, LWLOCK_PADDED_SIZE);
+	size = add_size(size, mul_size(numLocks, sizeof(LWLockPadded)));
 
 	return size;
 }
 
 /*
  * Allocate shmem space for the main LWLock array and all tranches and
- * initialize it.  We also register extension LWLock tranches here.
+ * initialize it.
  */
 void
 CreateLWLocks(void)
@@ -432,7 +432,16 @@ CreateLWLocks(void)
 		/* Initialize the dynamic-allocation counter for tranches */
 		LWLockCounter = (int *) ptr;
 		*LWLockCounter = LWTRANCHE_FIRST_USER_DEFINED;
-		ptr += sizeof(int);
+		ptr += MAXALIGN(sizeof(int));
+
+		/* Initialize tranche names */
+		LWLockTrancheNames = (char **) ptr;
+		ptr += MAX_NAMED_TRANCHES * sizeof(char *);
+		for (int i = 0; i < MAX_NAMED_TRANCHES; i++)
+		{
+			LWLockTrancheNames[i] = ptr;
+			ptr += NAMEDATALEN;
+		}
 
 		/* Ensure desired alignment of LWLock array */
 		ptr += LWLOCK_PADDED_SIZE - ((uintptr_t) ptr) % LWLOCK_PADDED_SIZE;
@@ -441,11 +450,6 @@ CreateLWLocks(void)
 		/* Initialize all LWLocks */
 		InitializeLWLocks();
 	}
-
-	/* Register named extension LWLock tranches in the current process. */
-	for (int i = 0; i < NamedLWLockTrancheRequests; i++)
-		LWLockRegisterTranche(NamedLWLockTrancheArray[i].trancheId,
-							  NamedLWLockTrancheArray[i].trancheName);
 }
 
 /*
@@ -454,7 +458,6 @@ CreateLWLocks(void)
 static void
 InitializeLWLocks(void)
 {
-	int			numNamedLocks = NumLWLocksForNamedTranches();
 	int			id;
 	int			i;
 	int			j;
@@ -485,32 +488,18 @@ InitializeLWLocks(void)
 	 */
 	if (NamedLWLockTrancheRequests > 0)
 	{
-		char	   *trancheNames;
-
-		NamedLWLockTrancheArray = (NamedLWLockTranche *)
-			&MainLWLockArray[NUM_FIXED_LWLOCKS + numNamedLocks];
-
-		trancheNames = (char *) NamedLWLockTrancheArray +
-			(NamedLWLockTrancheRequests * sizeof(NamedLWLockTranche));
 		lock = &MainLWLockArray[NUM_FIXED_LWLOCKS];
 
 		for (i = 0; i < NamedLWLockTrancheRequests; i++)
 		{
 			NamedLWLockTrancheRequest *request;
-			NamedLWLockTranche *tranche;
-			char	   *name;
+			int			tranche;
 
 			request = &NamedLWLockTrancheRequestArray[i];
-			tranche = &NamedLWLockTrancheArray[i];
-
-			name = trancheNames;
-			trancheNames += strlen(request->tranche_name) + 1;
-			strcpy(name, request->tranche_name);
-			tranche->trancheId = LWLockNewTrancheId();
-			tranche->trancheName = name;
+			tranche = LWLockNewTrancheId(request->tranche_name);
 
 			for (j = 0; j < request->num_lwlocks; j++, lock++)
-				LWLockInitialize(&lock->lock, tranche->trancheId);
+				LWLockInitialize(&lock->lock, tranche);
 		}
 	}
 }
@@ -562,59 +551,47 @@ GetNamedLWLockTranche(const char *tranche_name)
 }
 
 /*
- * Allocate a new tranche ID.
+ * Allocate a new tranche ID with the provided name.
  */
 int
-LWLockNewTrancheId(void)
+LWLockNewTrancheId(const char *name)
 {
 	int			result;
 
-	/* We use the ShmemLock spinlock to protect LWLockCounter */
-	SpinLockAcquire(ShmemLock);
-	result = (*LWLockCounter)++;
-	SpinLockRelease(ShmemLock);
+	if (!name)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_NAME),
+				 errmsg("tranche name cannot be NULL")));
 
-	return result;
-}
+	if (strlen(name) >= NAMEDATALEN)
+		ereport(ERROR,
+				(errcode(ERRCODE_NAME_TOO_LONG),
+				 errmsg("tranche name too long"),
+				 errdetail("LWLock tranche names must be no longer than %d bytes.",
+						   NAMEDATALEN - 1)));
 
-/*
- * Register a dynamic tranche name in the lookup table of the current process.
- *
- * This routine will save a pointer to the tranche name passed as an argument,
- * so the name should be allocated in a backend-lifetime context
- * (shared memory, TopMemoryContext, static constant, or similar).
- *
- * The tranche name will be user-visible as a wait event name, so try to
- * use a name that fits the style for those.
- */
-void
-LWLockRegisterTranche(int tranche_id, const char *tranche_name)
-{
-	/* This should only be called for user-defined tranches. */
-	if (tranche_id < LWTRANCHE_FIRST_USER_DEFINED)
-		return;
-
-	/* Convert to array index. */
-	tranche_id -= LWTRANCHE_FIRST_USER_DEFINED;
+	/*
+	 * We use the ShmemLock spinlock to protect LWLockCounter and
+	 * LWLockTrancheNames.
+	 */
+	SpinLockAcquire(ShmemLock);
 
-	/* If necessary, create or enlarge array. */
-	if (tranche_id >= LWLockTrancheNamesAllocated)
+	if (*LWLockCounter - LWTRANCHE_FIRST_USER_DEFINED >= MAX_NAMED_TRANCHES)
 	{
-		int			newalloc;
+		SpinLockRelease(ShmemLock);
+		ereport(ERROR,
+				(errmsg("maximum number of tranches already registered"),
+				 errdetail("No more than %d tranches may be registered.",
+						   MAX_NAMED_TRANCHES)));
+	}
 
-		newalloc = pg_nextpower2_32(Max(8, tranche_id + 1));
+	result = (*LWLockCounter)++;
+	LocalLWLockCounter = *LWLockCounter;
+	strlcpy(LWLockTrancheNames[result - LWTRANCHE_FIRST_USER_DEFINED], name, NAMEDATALEN);
 
-		if (LWLockTrancheNames == NULL)
-			LWLockTrancheNames = (const char **)
-				MemoryContextAllocZero(TopMemoryContext,
-									   newalloc * sizeof(char *));
-		else
-			LWLockTrancheNames =
-				repalloc0_array(LWLockTrancheNames, const char *, LWLockTrancheNamesAllocated, newalloc);
-		LWLockTrancheNamesAllocated = newalloc;
-	}
+	SpinLockRelease(ShmemLock);
 
-	LWLockTrancheNames[tranche_id] = tranche_name;
+	return result;
 }
 
 /*
@@ -637,27 +614,33 @@ RequestNamedLWLockTranche(const char *tranche_name, int num_lwlocks)
 	if (!process_shmem_requests_in_progress)
 		elog(FATAL, "cannot request additional LWLocks outside shmem_request_hook");
 
+	if (!tranche_name)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_NAME),
+				 errmsg("tranche name cannot be NULL")));
+
+	if (strlen(tranche_name) >= NAMEDATALEN)
+		ereport(ERROR,
+				(errcode(ERRCODE_NAME_TOO_LONG),
+				 errmsg("tranche name too long"),
+				 errdetail("LWLock tranche names must be no longer than %d bytes.",
+						   NAMEDATALEN - 1)));
+
 	if (NamedLWLockTrancheRequestArray == NULL)
 	{
-		NamedLWLockTrancheRequestsAllocated = 16;
 		NamedLWLockTrancheRequestArray = (NamedLWLockTrancheRequest *)
 			MemoryContextAlloc(TopMemoryContext,
-							   NamedLWLockTrancheRequestsAllocated
+							   MAX_NAMED_TRANCHES
 							   * sizeof(NamedLWLockTrancheRequest));
 	}
 
-	if (NamedLWLockTrancheRequests >= NamedLWLockTrancheRequestsAllocated)
-	{
-		int			i = pg_nextpower2_32(NamedLWLockTrancheRequests + 1);
-
-		NamedLWLockTrancheRequestArray = (NamedLWLockTrancheRequest *)
-			repalloc(NamedLWLockTrancheRequestArray,
-					 i * sizeof(NamedLWLockTrancheRequest));
-		NamedLWLockTrancheRequestsAllocated = i;
-	}
+	if (NamedLWLockTrancheRequests >= MAX_NAMED_TRANCHES)
+		ereport(ERROR,
+				(errmsg("maximum number of tranches already registered"),
+				 errdetail("No more than %d tranches may be registered.",
+						   MAX_NAMED_TRANCHES)));
 
 	request = &NamedLWLockTrancheRequestArray[NamedLWLockTrancheRequests];
-	Assert(strlen(tranche_name) + 1 <= NAMEDATALEN);
 	strlcpy(request->tranche_name, tranche_name, NAMEDATALEN);
 	request->num_lwlocks = num_lwlocks;
 	NamedLWLockTrancheRequests++;
@@ -669,6 +652,9 @@ RequestNamedLWLockTranche(const char *tranche_name, int num_lwlocks)
 void
 LWLockInitialize(LWLock *lock, int tranche_id)
 {
+	/* verify the tranche_id is valid */
+	(void) GetLWTrancheName(tranche_id);
+
 	pg_atomic_init_u32(&lock->state, LW_FLAG_RELEASE_OK);
 #ifdef LOCK_DEBUG
 	pg_atomic_init_u32(&lock->nwaiters, 0);
@@ -710,15 +696,27 @@ GetLWTrancheName(uint16 trancheId)
 		return BuiltinTrancheNames[trancheId];
 
 	/*
-	 * It's an extension tranche, so look in LWLockTrancheNames[].  However,
-	 * it's possible that the tranche has never been registered in the current
-	 * process, in which case give up and return "extension".
+	 * We only ever add new entries to LWLockTrancheNames, so most lookups can
+	 * avoid taking the spinlock as long as the backend-local counter
+	 * (LocalLWLockCounter) is greater than the requested tranche ID.  Else,
+	 * we need to first update the backend-local counter with ShmemLock held
+	 * before attempting the lookup again.  In practice, the latter case is
+	 * probably rare.
 	 */
-	trancheId -= LWTRANCHE_FIRST_USER_DEFINED;
+	if (trancheId >= LocalLWLockCounter)
+	{
+		SpinLockAcquire(ShmemLock);
+		LocalLWLockCounter = *LWLockCounter;
+		SpinLockRelease(ShmemLock);
+
+		if (trancheId >= LocalLWLockCounter)
+			elog(ERROR, "tranche %d is not registered", trancheId);
+	}
 
-	if (trancheId >= LWLockTrancheNamesAllocated ||
-		LWLockTrancheNames[trancheId] == NULL)
-		return "extension";
+	/*
+	 * It's an extension tranche, so look in LWLockTrancheNames.
+	 */
+	trancheId -= LWTRANCHE_FIRST_USER_DEFINED;
 
 	return LWLockTrancheNames[trancheId];
 }
diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h
index f9cf57f8d266d..0e9cf81a4c766 100644
--- a/src/include/storage/lwlock.h
+++ b/src/include/storage/lwlock.h
@@ -73,14 +73,7 @@ typedef union LWLockPadded
 
 extern PGDLLIMPORT LWLockPadded *MainLWLockArray;
 
-/* struct for storing named tranche information */
-typedef struct NamedLWLockTranche
-{
-	int			trancheId;
-	char	   *trancheName;
-} NamedLWLockTranche;
-
-extern PGDLLIMPORT NamedLWLockTranche *NamedLWLockTrancheArray;
+extern PGDLLIMPORT char **LWLockTrancheNames;
 extern PGDLLIMPORT int NamedLWLockTrancheRequests;
 extern PGDLLIMPORT int *LWLockCounter;
 
@@ -158,19 +151,11 @@ extern LWLockPadded *GetNamedLWLockTranche(const char *tranche_name);
 
 /*
  * There is another, more flexible method of obtaining lwlocks. First, call
- * LWLockNewTrancheId just once to obtain a tranche ID; this allocates from
- * a shared counter.  Next, each individual process using the tranche should
- * call LWLockRegisterTranche() to associate that tranche ID with a name.
- * Finally, LWLockInitialize should be called just once per lwlock, passing
- * the tranche ID as an argument.
- *
- * It may seem strange that each process using the tranche must register it
- * separately, but dynamic shared memory segments aren't guaranteed to be
- * mapped at the same address in all coordinating backends, so storing the
- * registration in the main shared memory segment wouldn't work for that case.
+ * LWLockNewTrancheId to obtain a tranche ID; this allocates from a shared
+ * counter.  Second, LWLockInitialize should be called just once per lwlock,
+ * passing the tranche ID as an argument.
  */
-extern int	LWLockNewTrancheId(void);
-extern void LWLockRegisterTranche(int tranche_id, const char *tranche_name);
+extern int	LWLockNewTrancheId(const char *name);
 extern void LWLockInitialize(LWLock *lock, int tranche_id);
 
 /*
diff --git a/src/test/modules/test_dsa/test_dsa.c b/src/test/modules/test_dsa/test_dsa.c
index cd24d0f48736d..01d5c6fa67f0e 100644
--- a/src/test/modules/test_dsa/test_dsa.c
+++ b/src/test/modules/test_dsa/test_dsa.c
@@ -29,8 +29,7 @@ test_dsa_basic(PG_FUNCTION_ARGS)
 	dsa_pointer p[100];
 
 	/* XXX: this tranche is leaked */
-	tranche_id = LWLockNewTrancheId();
-	LWLockRegisterTranche(tranche_id, "test_dsa");
+	tranche_id = LWLockNewTrancheId("test_dsa");
 
 	a = dsa_create(tranche_id);
 	for (int i = 0; i < 100; i++)
@@ -70,8 +69,7 @@ test_dsa_resowners(PG_FUNCTION_ARGS)
 	ResourceOwner childowner;
 
 	/* XXX: this tranche is leaked */
-	tranche_id = LWLockNewTrancheId();
-	LWLockRegisterTranche(tranche_id, "test_dsa");
+	tranche_id = LWLockNewTrancheId("test_dsa");
 
 	/* Create DSA in parent resource owner */
 	a = dsa_create(tranche_id);
diff --git a/src/test/modules/test_dsm_registry/test_dsm_registry.c b/src/test/modules/test_dsm_registry/test_dsm_registry.c
index 141c8ed1b34e3..4cc2ccdac3f11 100644
--- a/src/test/modules/test_dsm_registry/test_dsm_registry.c
+++ b/src/test/modules/test_dsm_registry/test_dsm_registry.c
@@ -48,7 +48,7 @@ init_tdr_dsm(void *ptr)
 {
 	TestDSMRegistryStruct *dsm = (TestDSMRegistryStruct *) ptr;
 
-	LWLockInitialize(&dsm->lck, LWLockNewTrancheId());
+	LWLockInitialize(&dsm->lck, LWLockNewTrancheId("test_dsm_registry"));
 	dsm->val = 0;
 }
 
@@ -61,7 +61,6 @@ tdr_attach_shmem(void)
 								 sizeof(TestDSMRegistryStruct),
 								 init_tdr_dsm,
 								 &found);
-	LWLockRegisterTranche(tdr_dsm->lck.tranche, "test_dsm_registry");
 
 	if (tdr_dsa == NULL)
 		tdr_dsa = GetNamedDSA("test_dsm_registry_dsa", &found);
diff --git a/src/test/modules/test_radixtree/test_radixtree.c b/src/test/modules/test_radixtree/test_radixtree.c
index 80ad029616473..787162c879330 100644
--- a/src/test/modules/test_radixtree/test_radixtree.c
+++ b/src/test/modules/test_radixtree/test_radixtree.c
@@ -124,10 +124,9 @@ test_empty(void)
 	rt_iter    *iter;
 	uint64		key;
 #ifdef TEST_SHARED_RT
-	int			tranche_id = LWLockNewTrancheId();
+	int			tranche_id = LWLockNewTrancheId("test_radix_tree");
 	dsa_area   *dsa;
 
-	LWLockRegisterTranche(tranche_id, "test_radix_tree");
 	dsa = dsa_create(tranche_id);
 	radixtree = rt_create(dsa, tranche_id);
 #else
@@ -167,10 +166,9 @@ test_basic(rt_node_class_test_elem *test_info, int shift, bool asc)
 	uint64	   *keys;
 	int			children = test_info->nkeys;
 #ifdef TEST_SHARED_RT
-	int			tranche_id = LWLockNewTrancheId();
+	int			tranche_id = LWLockNewTrancheId("test_radix_tree");
 	dsa_area   *dsa;
 
-	LWLockRegisterTranche(tranche_id, "test_radix_tree");
 	dsa = dsa_create(tranche_id);
 	radixtree = rt_create(dsa, tranche_id);
 #else
@@ -304,10 +302,9 @@ test_random(void)
 	int			num_keys = 100000;
 	uint64	   *keys;
 #ifdef TEST_SHARED_RT
-	int			tranche_id = LWLockNewTrancheId();
+	int			tranche_id = LWLockNewTrancheId("test_radix_tree");
 	dsa_area   *dsa;
 
-	LWLockRegisterTranche(tranche_id, "test_radix_tree");
 	dsa = dsa_create(tranche_id);
 	radixtree = rt_create(dsa, tranche_id);
 #else
diff --git a/src/test/modules/test_slru/test_slru.c b/src/test/modules/test_slru/test_slru.c
index 32750930e433d..8c0367eeee424 100644
--- a/src/test/modules/test_slru/test_slru.c
+++ b/src/test/modules/test_slru/test_slru.c
@@ -232,11 +232,9 @@ test_slru_shmem_startup(void)
 	(void) MakePGDirectory(slru_dir_name);
 
 	/* initialize the SLRU facility */
-	test_tranche_id = LWLockNewTrancheId();
-	LWLockRegisterTranche(test_tranche_id, "test_slru_tranche");
+	test_tranche_id = LWLockNewTrancheId("test_slru_tranche");
 
-	test_buffer_tranche_id = LWLockNewTrancheId();
-	LWLockRegisterTranche(test_tranche_id, "test_buffer_tranche");
+	test_buffer_tranche_id = LWLockNewTrancheId("test_buffer_tranche");
 
 	TestSlruCtl->PagePrecedes = test_slru_page_precedes_logically;
 	SimpleLruInit(TestSlruCtl, "TestSLRU",
diff --git a/src/test/modules/test_tidstore/test_tidstore.c b/src/test/modules/test_tidstore/test_tidstore.c
index eb16e0fbfa647..0c8f43867e55e 100644
--- a/src/test/modules/test_tidstore/test_tidstore.c
+++ b/src/test/modules/test_tidstore/test_tidstore.c
@@ -103,8 +103,7 @@ test_create(PG_FUNCTION_ARGS)
 	{
 		int			tranche_id;
 
-		tranche_id = LWLockNewTrancheId();
-		LWLockRegisterTranche(tranche_id, "test_tidstore");
+		tranche_id = LWLockNewTrancheId("test_tidstore");
 
 		tidstore = TidStoreCreateShared(tidstore_max_size, tranche_id);
 

From e351e5c4fea463d9f96557913f7f767af3795c32 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Wed, 3 Sep 2025 16:07:57 -0400
Subject: [PATCH 02/73] Make libpq_pipeline.c shorter and more uniform via
 helper functions.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There are many places in this test program that need to consume a
PGresult while checking that its PQresultStatus is as-expected, or
related tasks such as checking that PQgetResult has nothing more to
return.  These tasks were open-coded in a rather inconsistent way,
leading to some outright bugs, some memory leakage, and frequent
inconsistencies about what would be reported in event of an error.
Invent a few helper functions to standardize the behavior and
reduce code duplication.  Also, rename the one pre-existing helper
function from confirm_query_canceled to consume_query_cancel, per
Álvaro's suggestion that "confirm" is a poor choice of verb for a
function that will discard the PGresult.

While at it, clean up assorted other places that were leaking
PGresults or even server connections.  This is pure neatnik-ism,
since the test doesn't run long enough for those leaks to be of
any real-world concern.

While this fixes some things that are clearly bugs, it's only
a test program, and none of the bugs seem serious enough to
justify back-patching.

Bug: #18960
Reported-by: Dmitry Kovalenko <d.kovalenko@postgrespro.ru>
Author: Tom Lane <tgl@sss.pgh.pa.us>
Reviewed-by: Álvaro Herrera <alvherre@kurilemu.de>
Discussion: https://postgr.es/m/18960-09cd4a5100152e58@postgresql.org
---
 .../modules/libpq_pipeline/libpq_pipeline.c   | 549 +++++++-----------
 1 file changed, 201 insertions(+), 348 deletions(-)

diff --git a/src/test/modules/libpq_pipeline/libpq_pipeline.c b/src/test/modules/libpq_pipeline/libpq_pipeline.c
index 9a3c0236325c6..b3af70fa09bf8 100644
--- a/src/test/modules/libpq_pipeline/libpq_pipeline.c
+++ b/src/test/modules/libpq_pipeline/libpq_pipeline.c
@@ -88,20 +88,67 @@ pg_fatal_impl(int line, const char *fmt,...)
 }
 
 /*
- * Check that the query on the given connection got canceled.
+ * Check that libpq next returns a PGresult with the specified status,
+ * returning the PGresult so that caller can perform additional checks.
  */
-#define confirm_query_canceled(conn) confirm_query_canceled_impl(__LINE__, conn)
-static void
-confirm_query_canceled_impl(int line, PGconn *conn)
+#define confirm_result_status(conn, status) confirm_result_status_impl(__LINE__, conn, status)
+static PGresult *
+confirm_result_status_impl(int line, PGconn *conn, ExecStatusType status)
 {
-	PGresult   *res = NULL;
+	PGresult   *res;
 
 	res = PQgetResult(conn);
 	if (res == NULL)
-		pg_fatal_impl(line, "PQgetResult returned null: %s",
+		pg_fatal_impl(line, "PQgetResult returned null unexpectedly: %s",
 					  PQerrorMessage(conn));
-	if (PQresultStatus(res) != PGRES_FATAL_ERROR)
-		pg_fatal_impl(line, "query did not fail when it was expected");
+	if (PQresultStatus(res) != status)
+		pg_fatal_impl(line, "PQgetResult returned status %s, expected %s: %s",
+					  PQresStatus(PQresultStatus(res)),
+					  PQresStatus(status),
+					  PQerrorMessage(conn));
+	return res;
+}
+
+/*
+ * Check that libpq next returns a PGresult with the specified status,
+ * then free the PGresult.
+ */
+#define consume_result_status(conn, status) consume_result_status_impl(__LINE__, conn, status)
+static void
+consume_result_status_impl(int line, PGconn *conn, ExecStatusType status)
+{
+	PGresult   *res;
+
+	res = confirm_result_status_impl(line, conn, status);
+	PQclear(res);
+}
+
+/*
+ * Check that libpq next returns a null PGresult.
+ */
+#define consume_null_result(conn) consume_null_result_impl(__LINE__, conn)
+static void
+consume_null_result_impl(int line, PGconn *conn)
+{
+	PGresult   *res;
+
+	res = PQgetResult(conn);
+	if (res != NULL)
+		pg_fatal_impl(line, "expected NULL PGresult, got %s: %s",
+					  PQresStatus(PQresultStatus(res)),
+					  PQerrorMessage(conn));
+}
+
+/*
+ * Check that the query on the given connection got canceled.
+ */
+#define consume_query_cancel(conn) consume_query_cancel_impl(__LINE__, conn)
+static void
+consume_query_cancel_impl(int line, PGconn *conn)
+{
+	PGresult   *res;
+
+	res = confirm_result_status_impl(line, conn, PGRES_FATAL_ERROR);
 	if (strcmp(PQresultErrorField(res, PG_DIAG_SQLSTATE), "57014") != 0)
 		pg_fatal_impl(line, "query failed with a different error than cancellation: %s",
 					  PQerrorMessage(conn));
@@ -234,6 +281,10 @@ copy_connection(PGconn *conn)
 		pg_fatal("Connection to database failed: %s",
 				 PQerrorMessage(copyConn));
 
+	pfree(keywords);
+	pfree(vals);
+	PQconninfoFree(opts);
+
 	return copyConn;
 }
 
@@ -265,13 +316,13 @@ test_cancel(PGconn *conn)
 	cancel = PQgetCancel(conn);
 	if (!PQcancel(cancel, errorbuf, sizeof(errorbuf)))
 		pg_fatal("failed to run PQcancel: %s", errorbuf);
-	confirm_query_canceled(conn);
+	consume_query_cancel(conn);
 
 	/* PGcancel object can be reused for the next query */
 	send_cancellable_query(conn, monitorConn);
 	if (!PQcancel(cancel, errorbuf, sizeof(errorbuf)))
 		pg_fatal("failed to run PQcancel: %s", errorbuf);
-	confirm_query_canceled(conn);
+	consume_query_cancel(conn);
 
 	PQfreeCancel(cancel);
 
@@ -279,14 +330,14 @@ test_cancel(PGconn *conn)
 	send_cancellable_query(conn, monitorConn);
 	if (!PQrequestCancel(conn))
 		pg_fatal("failed to run PQrequestCancel: %s", PQerrorMessage(conn));
-	confirm_query_canceled(conn);
+	consume_query_cancel(conn);
 
 	/* test PQcancelBlocking */
 	send_cancellable_query(conn, monitorConn);
 	cancelConn = PQcancelCreate(conn);
 	if (!PQcancelBlocking(cancelConn))
 		pg_fatal("failed to run PQcancelBlocking: %s", PQcancelErrorMessage(cancelConn));
-	confirm_query_canceled(conn);
+	consume_query_cancel(conn);
 	PQcancelFinish(cancelConn);
 
 	/* test PQcancelCreate and then polling with PQcancelPoll */
@@ -340,7 +391,7 @@ test_cancel(PGconn *conn)
 	}
 	if (PQcancelStatus(cancelConn) != CONNECTION_OK)
 		pg_fatal("unexpected cancel connection status: %s", PQcancelErrorMessage(cancelConn));
-	confirm_query_canceled(conn);
+	consume_query_cancel(conn);
 
 	/*
 	 * test PQcancelReset works on the cancel connection and it can be reused
@@ -397,9 +448,10 @@ test_cancel(PGconn *conn)
 	}
 	if (PQcancelStatus(cancelConn) != CONNECTION_OK)
 		pg_fatal("unexpected cancel connection status: %s", PQcancelErrorMessage(cancelConn));
-	confirm_query_canceled(conn);
+	consume_query_cancel(conn);
 
 	PQcancelFinish(cancelConn);
+	PQfinish(monitorConn);
 
 	fprintf(stderr, "ok\n");
 }
@@ -428,6 +480,7 @@ test_disallowed_in_pipeline(PGconn *conn)
 			   "synchronous command execution functions are not allowed in pipeline mode\n") != 0)
 		pg_fatal("did not get expected error message; got: \"%s\"",
 				 PQerrorMessage(conn));
+	PQclear(res);
 
 	/* PQsendQuery should fail in pipeline mode */
 	if (PQsendQuery(conn, "SELECT 1") != 0)
@@ -460,6 +513,7 @@ test_disallowed_in_pipeline(PGconn *conn)
 	if (PQresultStatus(res) != PGRES_TUPLES_OK)
 		pg_fatal("PQexec should succeed after exiting pipeline mode but failed with: %s",
 				 PQerrorMessage(conn));
+	PQclear(res);
 
 	fprintf(stderr, "ok\n");
 }
@@ -467,7 +521,6 @@ test_disallowed_in_pipeline(PGconn *conn)
 static void
 test_multi_pipelines(PGconn *conn)
 {
-	PGresult   *res = NULL;
 	const char *dummy_params[1] = {"1"};
 	Oid			dummy_param_oids[1] = {INT4OID};
 
@@ -508,87 +561,31 @@ test_multi_pipelines(PGconn *conn)
 	/* OK, start processing the results */
 
 	/* first pipeline */
+	consume_result_status(conn, PGRES_TUPLES_OK);
 
-	res = PQgetResult(conn);
-	if (res == NULL)
-		pg_fatal("PQgetResult returned null when there's a pipeline item: %s",
-				 PQerrorMessage(conn));
-
-	if (PQresultStatus(res) != PGRES_TUPLES_OK)
-		pg_fatal("Unexpected result code %s from first pipeline item",
-				 PQresStatus(PQresultStatus(res)));
-	PQclear(res);
-	res = NULL;
-
-	if (PQgetResult(conn) != NULL)
-		pg_fatal("PQgetResult returned something extra after first result");
+	consume_null_result(conn);
 
 	if (PQexitPipelineMode(conn) != 0)
 		pg_fatal("exiting pipeline mode after query but before sync succeeded incorrectly");
 
-	res = PQgetResult(conn);
-	if (res == NULL)
-		pg_fatal("PQgetResult returned null when sync result expected: %s",
-				 PQerrorMessage(conn));
-
-	if (PQresultStatus(res) != PGRES_PIPELINE_SYNC)
-		pg_fatal("Unexpected result code %s instead of sync result, error: %s",
-				 PQresStatus(PQresultStatus(res)), PQerrorMessage(conn));
-	PQclear(res);
+	consume_result_status(conn, PGRES_PIPELINE_SYNC);
 
 	/* second pipeline */
+	consume_result_status(conn, PGRES_TUPLES_OK);
 
-	res = PQgetResult(conn);
-	if (res == NULL)
-		pg_fatal("PQgetResult returned null when there's a pipeline item: %s",
-				 PQerrorMessage(conn));
-
-	if (PQresultStatus(res) != PGRES_TUPLES_OK)
-		pg_fatal("Unexpected result code %s from second pipeline item",
-				 PQresStatus(PQresultStatus(res)));
-	PQclear(res);
-	res = NULL;
-
-	if (PQgetResult(conn) != NULL)
-		pg_fatal("PQgetResult returned something extra after first result");
+	consume_null_result(conn);
 
 	if (PQexitPipelineMode(conn) != 0)
 		pg_fatal("exiting pipeline mode after query but before sync succeeded incorrectly");
 
-	res = PQgetResult(conn);
-	if (res == NULL)
-		pg_fatal("PQgetResult returned null when sync result expected: %s",
-				 PQerrorMessage(conn));
-
-	if (PQresultStatus(res) != PGRES_PIPELINE_SYNC)
-		pg_fatal("Unexpected result code %s instead of sync result, error: %s",
-				 PQresStatus(PQresultStatus(res)), PQerrorMessage(conn));
-	PQclear(res);
+	consume_result_status(conn, PGRES_PIPELINE_SYNC);
 
 	/* third pipeline */
+	consume_result_status(conn, PGRES_TUPLES_OK);
 
-	res = PQgetResult(conn);
-	if (res == NULL)
-		pg_fatal("PQgetResult returned null when there's a pipeline item: %s",
-				 PQerrorMessage(conn));
-
-	if (PQresultStatus(res) != PGRES_TUPLES_OK)
-		pg_fatal("Unexpected result code %s from third pipeline item",
-				 PQresStatus(PQresultStatus(res)));
-
-	res = PQgetResult(conn);
-	if (res != NULL)
-		pg_fatal("Expected null result, got %s",
-				 PQresStatus(PQresultStatus(res)));
+	consume_null_result(conn);
 
-	res = PQgetResult(conn);
-	if (res == NULL)
-		pg_fatal("PQgetResult returned null when there's a pipeline item: %s",
-				 PQerrorMessage(conn));
-
-	if (PQresultStatus(res) != PGRES_PIPELINE_SYNC)
-		pg_fatal("Unexpected result code %s from second pipeline sync",
-				 PQresStatus(PQresultStatus(res)));
+	consume_result_status(conn, PGRES_PIPELINE_SYNC);
 
 	/* We're still in pipeline mode ... */
 	if (PQpipelineStatus(conn) == PQ_PIPELINE_OFF)
@@ -657,36 +654,17 @@ test_nosync(PGconn *conn)
 	/* Now read all results */
 	for (;;)
 	{
-		PGresult   *res;
-
-		res = PQgetResult(conn);
-
-		/* NULL results are only expected after TUPLES_OK */
-		if (res == NULL)
-			pg_fatal("got unexpected NULL result after %d results", results);
-
 		/* We expect exactly one TUPLES_OK result for each query we sent */
-		if (PQresultStatus(res) == PGRES_TUPLES_OK)
-		{
-			PGresult   *res2;
-
-			/* and one NULL result should follow each */
-			res2 = PQgetResult(conn);
-			if (res2 != NULL)
-				pg_fatal("expected NULL, got %s",
-						 PQresStatus(PQresultStatus(res2)));
-			PQclear(res);
-			results++;
+		consume_result_status(conn, PGRES_TUPLES_OK);
 
-			/* if we're done, we're done */
-			if (results == numqueries)
-				break;
+		/* and one NULL result should follow each */
+		consume_null_result(conn);
 
-			continue;
-		}
+		results++;
 
-		/* anything else is unexpected */
-		pg_fatal("got unexpected %s\n", PQresStatus(PQresultStatus(res)));
+		/* if we're done, we're done */
+		if (results == numqueries)
+			break;
 	}
 
 	fprintf(stderr, "ok\n");
@@ -716,10 +694,12 @@ test_pipeline_abort(PGconn *conn)
 	res = PQexec(conn, drop_table_sql);
 	if (PQresultStatus(res) != PGRES_COMMAND_OK)
 		pg_fatal("dispatching DROP TABLE failed: %s", PQerrorMessage(conn));
+	PQclear(res);
 
 	res = PQexec(conn, create_table_sql);
 	if (PQresultStatus(res) != PGRES_COMMAND_OK)
 		pg_fatal("dispatching CREATE TABLE failed: %s", PQerrorMessage(conn));
+	PQclear(res);
 
 	/*
 	 * Queue up a couple of small pipelines and process each without returning
@@ -763,33 +743,16 @@ test_pipeline_abort(PGconn *conn)
 	 * a pipeline aborted message for the second insert, a pipeline-end, then
 	 * a command-ok and a pipeline-ok for the second pipeline operation.
 	 */
-	res = PQgetResult(conn);
-	if (res == NULL)
-		pg_fatal("Unexpected NULL result: %s", PQerrorMessage(conn));
-	if (PQresultStatus(res) != PGRES_COMMAND_OK)
-		pg_fatal("Unexpected result status %s: %s",
-				 PQresStatus(PQresultStatus(res)),
-				 PQresultErrorMessage(res));
-	PQclear(res);
+	consume_result_status(conn, PGRES_COMMAND_OK);
 
 	/* NULL result to signal end-of-results for this command */
-	if ((res = PQgetResult(conn)) != NULL)
-		pg_fatal("Expected null result, got %s",
-				 PQresStatus(PQresultStatus(res)));
+	consume_null_result(conn);
 
 	/* Second query caused error, so we expect an error next */
-	res = PQgetResult(conn);
-	if (res == NULL)
-		pg_fatal("Unexpected NULL result: %s", PQerrorMessage(conn));
-	if (PQresultStatus(res) != PGRES_FATAL_ERROR)
-		pg_fatal("Unexpected result code -- expected PGRES_FATAL_ERROR, got %s",
-				 PQresStatus(PQresultStatus(res)));
-	PQclear(res);
+	consume_result_status(conn, PGRES_FATAL_ERROR);
 
 	/* NULL result to signal end-of-results for this command */
-	if ((res = PQgetResult(conn)) != NULL)
-		pg_fatal("Expected null result, got %s",
-				 PQresStatus(PQresultStatus(res)));
+	consume_null_result(conn);
 
 	/*
 	 * pipeline should now be aborted.
@@ -802,17 +765,10 @@ test_pipeline_abort(PGconn *conn)
 		pg_fatal("pipeline should be flagged as aborted but isn't");
 
 	/* third query in pipeline, the second insert */
-	res = PQgetResult(conn);
-	if (res == NULL)
-		pg_fatal("Unexpected NULL result: %s", PQerrorMessage(conn));
-	if (PQresultStatus(res) != PGRES_PIPELINE_ABORTED)
-		pg_fatal("Unexpected result code -- expected PGRES_PIPELINE_ABORTED, got %s",
-				 PQresStatus(PQresultStatus(res)));
-	PQclear(res);
+	consume_result_status(conn, PGRES_PIPELINE_ABORTED);
 
 	/* NULL result to signal end-of-results for this command */
-	if ((res = PQgetResult(conn)) != NULL)
-		pg_fatal("Expected null result, got %s", PQresStatus(PQresultStatus(res)));
+	consume_null_result(conn);
 
 	if (PQpipelineStatus(conn) != PQ_PIPELINE_ABORTED)
 		pg_fatal("pipeline should be flagged as aborted but isn't");
@@ -827,14 +783,7 @@ test_pipeline_abort(PGconn *conn)
 	 * (This is so clients know to start processing results normally again and
 	 * can tell the difference between skipped commands and the sync.)
 	 */
-	res = PQgetResult(conn);
-	if (res == NULL)
-		pg_fatal("Unexpected NULL result: %s", PQerrorMessage(conn));
-	if (PQresultStatus(res) != PGRES_PIPELINE_SYNC)
-		pg_fatal("Unexpected result code from first pipeline sync\n"
-				 "Expected PGRES_PIPELINE_SYNC, got %s",
-				 PQresStatus(PQresultStatus(res)));
-	PQclear(res);
+	consume_result_status(conn, PGRES_PIPELINE_SYNC);
 
 	if (PQpipelineStatus(conn) == PQ_PIPELINE_ABORTED)
 		pg_fatal("sync should've cleared the aborted flag but didn't");
@@ -844,30 +793,16 @@ test_pipeline_abort(PGconn *conn)
 		pg_fatal("Fell out of pipeline mode somehow");
 
 	/* the insert from the second pipeline */
-	res = PQgetResult(conn);
-	if (res == NULL)
-		pg_fatal("Unexpected NULL result: %s", PQerrorMessage(conn));
-	if (PQresultStatus(res) != PGRES_COMMAND_OK)
-		pg_fatal("Unexpected result code %s from first item in second pipeline",
-				 PQresStatus(PQresultStatus(res)));
-	PQclear(res);
+	consume_result_status(conn, PGRES_COMMAND_OK);
 
 	/* Read the NULL result at the end of the command */
-	if ((res = PQgetResult(conn)) != NULL)
-		pg_fatal("Expected null result, got %s", PQresStatus(PQresultStatus(res)));
+	consume_null_result(conn);
 
 	/* the second pipeline sync */
-	if ((res = PQgetResult(conn)) == NULL)
-		pg_fatal("Unexpected NULL result: %s", PQerrorMessage(conn));
-	if (PQresultStatus(res) != PGRES_PIPELINE_SYNC)
-		pg_fatal("Unexpected result code %s from second pipeline sync",
-				 PQresStatus(PQresultStatus(res)));
-	PQclear(res);
+	consume_result_status(conn, PGRES_PIPELINE_SYNC);
 
-	if ((res = PQgetResult(conn)) != NULL)
-		pg_fatal("Expected null result, got %s: %s",
-				 PQresStatus(PQresultStatus(res)),
-				 PQerrorMessage(conn));
+	/* Read the NULL result at the end of the command */
+	consume_null_result(conn);
 
 	/* Try to send two queries in one command */
 	if (PQsendQueryParams(conn, "SELECT 1; SELECT 2", 0, NULL, NULL, NULL, NULL, 0) != 1)
@@ -890,15 +825,14 @@ test_pipeline_abort(PGconn *conn)
 				pg_fatal("got unexpected status %s", PQresStatus(PQresultStatus(res)));
 				break;
 		}
+		PQclear(res);
 	}
 	if (!goterror)
 		pg_fatal("did not get cannot-insert-multiple-commands error");
-	res = PQgetResult(conn);
-	if (res == NULL)
-		pg_fatal("got NULL result");
-	if (PQresultStatus(res) != PGRES_PIPELINE_SYNC)
-		pg_fatal("Unexpected result code %s from pipeline sync",
-				 PQresStatus(PQresultStatus(res)));
+
+	/* the second pipeline sync */
+	consume_result_status(conn, PGRES_PIPELINE_SYNC);
+
 	fprintf(stderr, "ok\n");
 
 	/* Test single-row mode with an error partways */
@@ -935,13 +869,9 @@ test_pipeline_abort(PGconn *conn)
 		pg_fatal("did not get division-by-zero error");
 	if (gotrows != 3)
 		pg_fatal("did not get three rows");
+
 	/* the third pipeline sync */
-	if ((res = PQgetResult(conn)) == NULL)
-		pg_fatal("Unexpected NULL result: %s", PQerrorMessage(conn));
-	if (PQresultStatus(res) != PGRES_PIPELINE_SYNC)
-		pg_fatal("Unexpected result code %s from third pipeline sync",
-				 PQresStatus(PQresultStatus(res)));
-	PQclear(res);
+	consume_result_status(conn, PGRES_PIPELINE_SYNC);
 
 	/* We're still in pipeline mode... */
 	if (PQpipelineStatus(conn) == PQ_PIPELINE_OFF)
@@ -1274,21 +1204,11 @@ test_prepared(PGconn *conn)
 	if (PQpipelineSync(conn) != 1)
 		pg_fatal("pipeline sync failed: %s", PQerrorMessage(conn));
 
-	res = PQgetResult(conn);
-	if (res == NULL)
-		pg_fatal("PQgetResult returned null");
-	if (PQresultStatus(res) != PGRES_COMMAND_OK)
-		pg_fatal("expected COMMAND_OK, got %s", PQresStatus(PQresultStatus(res)));
-	PQclear(res);
-	res = PQgetResult(conn);
-	if (res != NULL)
-		pg_fatal("expected NULL result");
+	consume_result_status(conn, PGRES_COMMAND_OK);
 
-	res = PQgetResult(conn);
-	if (res == NULL)
-		pg_fatal("PQgetResult returned NULL");
-	if (PQresultStatus(res) != PGRES_COMMAND_OK)
-		pg_fatal("expected COMMAND_OK, got %s", PQresStatus(PQresultStatus(res)));
+	consume_null_result(conn);
+
+	res = confirm_result_status(conn, PGRES_COMMAND_OK);
 	if (PQnfields(res) != lengthof(expected_oids))
 		pg_fatal("expected %zu columns, got %d",
 				 lengthof(expected_oids), PQnfields(res));
@@ -1300,13 +1220,10 @@ test_prepared(PGconn *conn)
 					 i, expected_oids[i], typ);
 	}
 	PQclear(res);
-	res = PQgetResult(conn);
-	if (res != NULL)
-		pg_fatal("expected NULL result");
 
-	res = PQgetResult(conn);
-	if (PQresultStatus(res) != PGRES_PIPELINE_SYNC)
-		pg_fatal("expected PGRES_PIPELINE_SYNC, got %s", PQresStatus(PQresultStatus(res)));
+	consume_null_result(conn);
+
+	consume_result_status(conn, PGRES_PIPELINE_SYNC);
 
 	fprintf(stderr, "closing statement..");
 	if (PQsendClosePrepared(conn, "select_one") != 1)
@@ -1314,18 +1231,11 @@ test_prepared(PGconn *conn)
 	if (PQpipelineSync(conn) != 1)
 		pg_fatal("pipeline sync failed: %s", PQerrorMessage(conn));
 
-	res = PQgetResult(conn);
-	if (res == NULL)
-		pg_fatal("expected non-NULL result");
-	if (PQresultStatus(res) != PGRES_COMMAND_OK)
-		pg_fatal("expected COMMAND_OK, got %s", PQresStatus(PQresultStatus(res)));
-	PQclear(res);
-	res = PQgetResult(conn);
-	if (res != NULL)
-		pg_fatal("expected NULL result");
-	res = PQgetResult(conn);
-	if (PQresultStatus(res) != PGRES_PIPELINE_SYNC)
-		pg_fatal("expected PGRES_PIPELINE_SYNC, got %s", PQresStatus(PQresultStatus(res)));
+	consume_result_status(conn, PGRES_COMMAND_OK);
+
+	consume_null_result(conn);
+
+	consume_result_status(conn, PGRES_PIPELINE_SYNC);
 
 	if (PQexitPipelineMode(conn) != 1)
 		pg_fatal("could not exit pipeline mode: %s", PQerrorMessage(conn));
@@ -1334,6 +1244,7 @@ test_prepared(PGconn *conn)
 	res = PQdescribePrepared(conn, "select_one");
 	if (PQresultStatus(res) != PGRES_FATAL_ERROR)
 		pg_fatal("expected FATAL_ERROR, got %s", PQresStatus(PQresultStatus(res)));
+	PQclear(res);
 
 	/*
 	 * Also test the blocking close, this should not fail since closing a
@@ -1342,32 +1253,36 @@ test_prepared(PGconn *conn)
 	res = PQclosePrepared(conn, "select_one");
 	if (PQresultStatus(res) != PGRES_COMMAND_OK)
 		pg_fatal("expected COMMAND_OK, got %s", PQresStatus(PQresultStatus(res)));
+	PQclear(res);
 
 	fprintf(stderr, "creating portal... ");
-	PQexec(conn, "BEGIN");
-	PQexec(conn, "DECLARE cursor_one CURSOR FOR SELECT 1");
+
+	res = PQexec(conn, "BEGIN");
+	if (PQresultStatus(res) != PGRES_COMMAND_OK)
+		pg_fatal("BEGIN failed: %s", PQerrorMessage(conn));
+	PQclear(res);
+
+	res = PQexec(conn, "DECLARE cursor_one CURSOR FOR SELECT 1");
+	if (PQresultStatus(res) != PGRES_COMMAND_OK)
+		pg_fatal("DECLARE CURSOR failed: %s", PQerrorMessage(conn));
+	PQclear(res);
+
 	PQenterPipelineMode(conn);
 	if (PQsendDescribePortal(conn, "cursor_one") != 1)
 		pg_fatal("PQsendDescribePortal failed: %s", PQerrorMessage(conn));
 	if (PQpipelineSync(conn) != 1)
 		pg_fatal("pipeline sync failed: %s", PQerrorMessage(conn));
-	res = PQgetResult(conn);
-	if (res == NULL)
-		pg_fatal("PQgetResult returned null");
-	if (PQresultStatus(res) != PGRES_COMMAND_OK)
-		pg_fatal("expected COMMAND_OK, got %s", PQresStatus(PQresultStatus(res)));
 
+	res = confirm_result_status(conn, PGRES_COMMAND_OK);
 	typ = PQftype(res, 0);
 	if (typ != INT4OID)
 		pg_fatal("portal: expected type %u, got %u",
 				 INT4OID, typ);
 	PQclear(res);
-	res = PQgetResult(conn);
-	if (res != NULL)
-		pg_fatal("expected NULL result");
-	res = PQgetResult(conn);
-	if (PQresultStatus(res) != PGRES_PIPELINE_SYNC)
-		pg_fatal("expected PGRES_PIPELINE_SYNC, got %s", PQresStatus(PQresultStatus(res)));
+
+	consume_null_result(conn);
+
+	consume_result_status(conn, PGRES_PIPELINE_SYNC);
 
 	fprintf(stderr, "closing portal... ");
 	if (PQsendClosePortal(conn, "cursor_one") != 1)
@@ -1375,18 +1290,11 @@ test_prepared(PGconn *conn)
 	if (PQpipelineSync(conn) != 1)
 		pg_fatal("pipeline sync failed: %s", PQerrorMessage(conn));
 
-	res = PQgetResult(conn);
-	if (res == NULL)
-		pg_fatal("expected non-NULL result");
-	if (PQresultStatus(res) != PGRES_COMMAND_OK)
-		pg_fatal("expected COMMAND_OK, got %s", PQresStatus(PQresultStatus(res)));
-	PQclear(res);
-	res = PQgetResult(conn);
-	if (res != NULL)
-		pg_fatal("expected NULL result");
-	res = PQgetResult(conn);
-	if (PQresultStatus(res) != PGRES_PIPELINE_SYNC)
-		pg_fatal("expected PGRES_PIPELINE_SYNC, got %s", PQresStatus(PQresultStatus(res)));
+	consume_result_status(conn, PGRES_COMMAND_OK);
+
+	consume_null_result(conn);
+
+	consume_result_status(conn, PGRES_PIPELINE_SYNC);
 
 	if (PQexitPipelineMode(conn) != 1)
 		pg_fatal("could not exit pipeline mode: %s", PQerrorMessage(conn));
@@ -1395,6 +1303,7 @@ test_prepared(PGconn *conn)
 	res = PQdescribePortal(conn, "cursor_one");
 	if (PQresultStatus(res) != PGRES_FATAL_ERROR)
 		pg_fatal("expected FATAL_ERROR, got %s", PQresStatus(PQresultStatus(res)));
+	PQclear(res);
 
 	/*
 	 * Also test the blocking close, this should not fail since closing a
@@ -1403,6 +1312,7 @@ test_prepared(PGconn *conn)
 	res = PQclosePortal(conn, "cursor_one");
 	if (PQresultStatus(res) != PGRES_COMMAND_OK)
 		pg_fatal("expected COMMAND_OK, got %s", PQresStatus(PQresultStatus(res)));
+	PQclear(res);
 
 	fprintf(stderr, "ok\n");
 }
@@ -1509,6 +1419,10 @@ test_protocol_version(PGconn *conn)
 		pg_fatal("expected 30002, got %d", protocol_version);
 
 	PQfinish(conn);
+
+	pfree(keywords);
+	pfree(vals);
+	PQconninfoFree(opts);
 }
 
 /* Notice processor: print notices, and count how many we got */
@@ -1525,7 +1439,6 @@ notice_processor(void *arg, const char *message)
 static void
 test_pipeline_idle(PGconn *conn)
 {
-	PGresult   *res;
 	int			n_notices = 0;
 
 	fprintf(stderr, "\npipeline idle...\n");
@@ -1538,17 +1451,11 @@ test_pipeline_idle(PGconn *conn)
 	if (PQsendQueryParams(conn, "SELECT 1", 0, NULL, NULL, NULL, NULL, 0) != 1)
 		pg_fatal("failed to send query: %s", PQerrorMessage(conn));
 	PQsendFlushRequest(conn);
-	res = PQgetResult(conn);
-	if (res == NULL)
-		pg_fatal("PQgetResult returned null when there's a pipeline item: %s",
-				 PQerrorMessage(conn));
-	if (PQresultStatus(res) != PGRES_TUPLES_OK)
-		pg_fatal("unexpected result code %s from first pipeline item",
-				 PQresStatus(PQresultStatus(res)));
-	PQclear(res);
-	res = PQgetResult(conn);
-	if (res != NULL)
-		pg_fatal("did not receive terminating NULL");
+
+	consume_result_status(conn, PGRES_TUPLES_OK);
+
+	consume_null_result(conn);
+
 	if (PQsendQueryParams(conn, "SELECT 2", 0, NULL, NULL, NULL, NULL, 0) != 1)
 		pg_fatal("failed to send query: %s", PQerrorMessage(conn));
 	if (PQexitPipelineMode(conn) == 1)
@@ -1558,14 +1465,11 @@ test_pipeline_idle(PGconn *conn)
 		pg_fatal("did not get expected error; got: %s",
 				 PQerrorMessage(conn));
 	PQsendFlushRequest(conn);
-	res = PQgetResult(conn);
-	if (PQresultStatus(res) != PGRES_TUPLES_OK)
-		pg_fatal("unexpected result code %s from second pipeline item",
-				 PQresStatus(PQresultStatus(res)));
-	PQclear(res);
-	res = PQgetResult(conn);
-	if (res != NULL)
-		pg_fatal("did not receive terminating NULL");
+
+	consume_result_status(conn, PGRES_TUPLES_OK);
+
+	consume_null_result(conn);
+
 	if (PQexitPipelineMode(conn) != 1)
 		pg_fatal("exiting pipeline failed: %s", PQerrorMessage(conn));
 
@@ -1579,11 +1483,9 @@ test_pipeline_idle(PGconn *conn)
 	if (PQsendQueryParams(conn, "SELECT pg_catalog.pg_advisory_unlock(1,1)", 0, NULL, NULL, NULL, NULL, 0) != 1)
 		pg_fatal("failed to send query: %s", PQerrorMessage(conn));
 	PQsendFlushRequest(conn);
-	res = PQgetResult(conn);
-	if (res == NULL)
-		pg_fatal("unexpected NULL result received");
-	if (PQresultStatus(res) != PGRES_TUPLES_OK)
-		pg_fatal("unexpected result code %s", PQresStatus(PQresultStatus(res)));
+
+	consume_result_status(conn, PGRES_TUPLES_OK);
+
 	if (PQexitPipelineMode(conn) != 1)
 		pg_fatal("failed to exit pipeline mode: %s", PQerrorMessage(conn));
 	fprintf(stderr, "ok - 2\n");
@@ -1592,7 +1494,6 @@ test_pipeline_idle(PGconn *conn)
 static void
 test_simple_pipeline(PGconn *conn)
 {
-	PGresult   *res = NULL;
 	const char *dummy_params[1] = {"1"};
 	Oid			dummy_param_oids[1] = {INT4OID};
 
@@ -1623,20 +1524,9 @@ test_simple_pipeline(PGconn *conn)
 	if (PQpipelineSync(conn) != 1)
 		pg_fatal("pipeline sync failed: %s", PQerrorMessage(conn));
 
-	res = PQgetResult(conn);
-	if (res == NULL)
-		pg_fatal("PQgetResult returned null when there's a pipeline item: %s",
-				 PQerrorMessage(conn));
-
-	if (PQresultStatus(res) != PGRES_TUPLES_OK)
-		pg_fatal("Unexpected result code %s from first pipeline item",
-				 PQresStatus(PQresultStatus(res)));
-
-	PQclear(res);
-	res = NULL;
+	consume_result_status(conn, PGRES_TUPLES_OK);
 
-	if (PQgetResult(conn) != NULL)
-		pg_fatal("PQgetResult returned something extra after first query result.");
+	consume_null_result(conn);
 
 	/*
 	 * Even though we've processed the result there's still a sync to come and
@@ -1645,21 +1535,9 @@ test_simple_pipeline(PGconn *conn)
 	if (PQexitPipelineMode(conn) != 0)
 		pg_fatal("exiting pipeline mode after query but before sync succeeded incorrectly");
 
-	res = PQgetResult(conn);
-	if (res == NULL)
-		pg_fatal("PQgetResult returned null when sync result PGRES_PIPELINE_SYNC expected: %s",
-				 PQerrorMessage(conn));
-
-	if (PQresultStatus(res) != PGRES_PIPELINE_SYNC)
-		pg_fatal("Unexpected result code %s instead of PGRES_PIPELINE_SYNC, error: %s",
-				 PQresStatus(PQresultStatus(res)), PQerrorMessage(conn));
-
-	PQclear(res);
-	res = NULL;
+	consume_result_status(conn, PGRES_PIPELINE_SYNC);
 
-	if (PQgetResult(conn) != NULL)
-		pg_fatal("PQgetResult returned something extra after pipeline end: %s",
-				 PQresStatus(PQresultStatus(res)));
+	consume_null_result(conn);
 
 	/* We're still in pipeline mode... */
 	if (PQpipelineStatus(conn) == PQ_PIPELINE_OFF)
@@ -1792,20 +1670,12 @@ test_singlerowmode(PGconn *conn)
 		pg_fatal("failed to send flush request");
 	if (PQsetSingleRowMode(conn) != 1)
 		pg_fatal("PQsetSingleRowMode() failed");
-	res = PQgetResult(conn);
-	if (res == NULL)
-		pg_fatal("unexpected NULL");
-	if (PQresultStatus(res) != PGRES_SINGLE_TUPLE)
-		pg_fatal("Expected PGRES_SINGLE_TUPLE, got %s",
-				 PQresStatus(PQresultStatus(res)));
-	res = PQgetResult(conn);
-	if (res == NULL)
-		pg_fatal("unexpected NULL");
-	if (PQresultStatus(res) != PGRES_TUPLES_OK)
-		pg_fatal("Expected PGRES_TUPLES_OK, got %s",
-				 PQresStatus(PQresultStatus(res)));
-	if (PQgetResult(conn) != NULL)
-		pg_fatal("expected NULL result");
+
+	consume_result_status(conn, PGRES_SINGLE_TUPLE);
+
+	consume_result_status(conn, PGRES_TUPLES_OK);
+
+	consume_null_result(conn);
 
 	if (PQsendQueryParams(conn, "SELECT 1",
 						  0, NULL, NULL, NULL, NULL, 0) != 1)
@@ -1813,14 +1683,10 @@ test_singlerowmode(PGconn *conn)
 				 PQerrorMessage(conn));
 	if (PQsendFlushRequest(conn) != 1)
 		pg_fatal("failed to send flush request");
-	res = PQgetResult(conn);
-	if (res == NULL)
-		pg_fatal("unexpected NULL");
-	if (PQresultStatus(res) != PGRES_TUPLES_OK)
-		pg_fatal("Expected PGRES_TUPLES_OK, got %s",
-				 PQresStatus(PQresultStatus(res)));
-	if (PQgetResult(conn) != NULL)
-		pg_fatal("expected NULL result");
+
+	consume_result_status(conn, PGRES_TUPLES_OK);
+
+	consume_null_result(conn);
 
 	/*
 	 * Try chunked mode as well; make sure that it correctly delivers a
@@ -1834,33 +1700,23 @@ test_singlerowmode(PGconn *conn)
 		pg_fatal("failed to send flush request");
 	if (PQsetChunkedRowsMode(conn, 3) != 1)
 		pg_fatal("PQsetChunkedRowsMode() failed");
-	res = PQgetResult(conn);
-	if (res == NULL)
-		pg_fatal("unexpected NULL");
-	if (PQresultStatus(res) != PGRES_TUPLES_CHUNK)
-		pg_fatal("Expected PGRES_TUPLES_CHUNK, got %s: %s",
-				 PQresStatus(PQresultStatus(res)),
-				 PQerrorMessage(conn));
+
+	res = confirm_result_status(conn, PGRES_TUPLES_CHUNK);
 	if (PQntuples(res) != 3)
 		pg_fatal("Expected 3 rows, got %d", PQntuples(res));
-	res = PQgetResult(conn);
-	if (res == NULL)
-		pg_fatal("unexpected NULL");
-	if (PQresultStatus(res) != PGRES_TUPLES_CHUNK)
-		pg_fatal("Expected PGRES_TUPLES_CHUNK, got %s",
-				 PQresStatus(PQresultStatus(res)));
+	PQclear(res);
+
+	res = confirm_result_status(conn, PGRES_TUPLES_CHUNK);
 	if (PQntuples(res) != 2)
 		pg_fatal("Expected 2 rows, got %d", PQntuples(res));
-	res = PQgetResult(conn);
-	if (res == NULL)
-		pg_fatal("unexpected NULL");
-	if (PQresultStatus(res) != PGRES_TUPLES_OK)
-		pg_fatal("Expected PGRES_TUPLES_OK, got %s",
-				 PQresStatus(PQresultStatus(res)));
+	PQclear(res);
+
+	res = confirm_result_status(conn, PGRES_TUPLES_OK);
 	if (PQntuples(res) != 0)
 		pg_fatal("Expected 0 rows, got %d", PQntuples(res));
-	if (PQgetResult(conn) != NULL)
-		pg_fatal("expected NULL result");
+	PQclear(res);
+
+	consume_null_result(conn);
 
 	if (PQexitPipelineMode(conn) != 1)
 		pg_fatal("failed to end pipeline mode: %s", PQerrorMessage(conn));
@@ -1995,9 +1851,8 @@ test_transaction(PGconn *conn)
 		if (num_syncs <= 0)
 			break;
 	}
-	if (PQgetResult(conn) != NULL)
-		pg_fatal("returned something extra after all the syncs: %s",
-				 PQresStatus(PQresultStatus(res)));
+
+	consume_null_result(conn);
 
 	if (PQexitPipelineMode(conn) != 1)
 		pg_fatal("failed to end pipeline mode: %s", PQerrorMessage(conn));
@@ -2053,16 +1908,19 @@ test_uniqviol(PGconn *conn)
 				 "create table ppln_uniqviol(id bigint primary key, idata bigint)");
 	if (PQresultStatus(res) != PGRES_COMMAND_OK)
 		pg_fatal("failed to create table: %s", PQerrorMessage(conn));
+	PQclear(res);
 
 	res = PQexec(conn, "begin");
 	if (PQresultStatus(res) != PGRES_COMMAND_OK)
 		pg_fatal("failed to begin transaction: %s", PQerrorMessage(conn));
+	PQclear(res);
 
 	res = PQprepare(conn, "insertion",
 					"insert into ppln_uniqviol values ($1, $2) returning id",
 					2, paramTypes);
-	if (res == NULL || PQresultStatus(res) != PGRES_COMMAND_OK)
+	if (PQresultStatus(res) != PGRES_COMMAND_OK)
 		pg_fatal("failed to prepare query: %s", PQerrorMessage(conn));
+	PQclear(res);
 
 	if (PQenterPipelineMode(conn) != 1)
 		pg_fatal("failed to enter pipeline mode");
@@ -2191,7 +2049,6 @@ test_uniqviol(PGconn *conn)
 static bool
 process_result(PGconn *conn, PGresult *res, int results, int numsent)
 {
-	PGresult   *res2;
 	bool		got_error = false;
 
 	if (res == NULL)
@@ -2203,29 +2060,19 @@ process_result(PGconn *conn, PGresult *res, int results, int numsent)
 			got_error = true;
 			fprintf(stderr, "result %d/%d (error): %s\n", results, numsent, PQerrorMessage(conn));
 			PQclear(res);
-
-			res2 = PQgetResult(conn);
-			if (res2 != NULL)
-				pg_fatal("expected NULL, got %s",
-						 PQresStatus(PQresultStatus(res2)));
+			consume_null_result(conn);
 			break;
 
 		case PGRES_TUPLES_OK:
 			fprintf(stderr, "result %d/%d: %s\n", results, numsent, PQgetvalue(res, 0, 0));
 			PQclear(res);
-
-			res2 = PQgetResult(conn);
-			if (res2 != NULL)
-				pg_fatal("expected NULL, got %s",
-						 PQresStatus(PQresultStatus(res2)));
+			consume_null_result(conn);
 			break;
 
 		case PGRES_PIPELINE_ABORTED:
 			fprintf(stderr, "result %d/%d: pipeline aborted\n", results, numsent);
-			res2 = PQgetResult(conn);
-			if (res2 != NULL)
-				pg_fatal("expected NULL, got %s",
-						 PQresStatus(PQresultStatus(res2)));
+			PQclear(res);
+			consume_null_result(conn);
 			break;
 
 		default:
@@ -2271,7 +2118,7 @@ main(int argc, char **argv)
 {
 	const char *conninfo = "";
 	PGconn	   *conn;
-	FILE	   *trace;
+	FILE	   *trace = NULL;
 	char	   *testname;
 	int			numrows = 10000;
 	PGresult   *res;
@@ -2332,9 +2179,11 @@ main(int argc, char **argv)
 	res = PQexec(conn, "SET lc_messages TO \"C\"");
 	if (PQresultStatus(res) != PGRES_COMMAND_OK)
 		pg_fatal("failed to set \"lc_messages\": %s", PQerrorMessage(conn));
+	PQclear(res);
 	res = PQexec(conn, "SET debug_parallel_query = off");
 	if (PQresultStatus(res) != PGRES_COMMAND_OK)
 		pg_fatal("failed to set \"debug_parallel_query\": %s", PQerrorMessage(conn));
+	PQclear(res);
 
 	/* Set the trace file, if requested */
 	if (tracefile != NULL)
@@ -2388,5 +2237,9 @@ main(int argc, char **argv)
 
 	/* close the connection to the database and cleanup */
 	PQfinish(conn);
+
+	if (trace && trace != stdout)
+		fclose(trace);
+
 	return 0;
 }

From 09119238a18191dea3deed635a2b2a6ffe904932 Mon Sep 17 00:00:00 2001
From: Michael Paquier <michael@paquier.xyz>
Date: Thu, 4 Sep 2025 08:34:51 +0900
Subject: [PATCH 03/73] Fix incorrect comment in pgstat_backend.c

The counters saved from pgWalUsage, used for the difference calculations
when flushing the backend WAL stats, are updated when calling
pgstat_flush_backend() under PGSTAT_BACKEND_FLUSH_WAL, and not
pgstat_report_wal().  The comment updated in this commit referenced the
latter, but it is perfectly OK to flush the backend stats independently
of the WAL stats.

Noticed while looking at this area of the code, introduced by
76def4cdd7c2 as a copy-pasto.

Backpatch-through: 18
---
 src/backend/utils/activity/pgstat_backend.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/backend/utils/activity/pgstat_backend.c b/src/backend/utils/activity/pgstat_backend.c
index 8714a85e2d936..07a1116671b18 100644
--- a/src/backend/utils/activity/pgstat_backend.c
+++ b/src/backend/utils/activity/pgstat_backend.c
@@ -41,9 +41,9 @@ static bool backend_has_iostats = false;
 
 /*
  * WAL usage counters saved from pgWalUsage at the previous call to
- * pgstat_report_wal().  This is used to calculate how much WAL usage
- * happens between pgstat_report_wal() calls, by subtracting the previous
- * counters from the current ones.
+ * pgstat_flush_backend().  This is used to calculate how much WAL usage
+ * happens between pgstat_flush_backend() calls, by subtracting the
+ * previous counters from the current ones.
  */
 static WalUsage prevBackendWalUsage;
 

From 5386bfb9c1f566887d084e4ea2e350e7efd188c1 Mon Sep 17 00:00:00 2001
From: Dean Rasheed <dean.a.rasheed@gmail.com>
Date: Thu, 4 Sep 2025 11:27:53 +0100
Subject: [PATCH 04/73] Fix replica identity check for INSERT ON CONFLICT DO
 UPDATE.

If an INSERT has an ON CONFLICT DO UPDATE clause, the executor must
check that the target relation supports UPDATE as well as INSERT. In
particular, it must check that the target relation has a REPLICA
IDENTITY if it publishes updates. Formerly, it was not doing this
check, which could lead to silently breaking replication.

Fix by adding such a check to CheckValidResultRel(), which requires
adding a new onConflictAction argument. In back-branches, preserve ABI
compatibility by introducing a wrapper function with the original
signature.

Author: Zhijie Hou <houzj.fnst@fujitsu.com>
Reviewed-by: Ashutosh Bapat <ashutosh.bapat.oss@gmail.com>
Reviewed-by: Dean Rasheed <dean.a.rasheed@gmail.com>
Tested-by: Chao Li <li.evan.chao@gmail.com>
Discussion: https://postgr.es/m/OS3PR01MB57180C87E43A679A730482DF94B62@OS3PR01MB5718.jpnprd01.prod.outlook.com
Backpatch-through: 13
---
 src/backend/commands/copyfrom.c           |  2 +-
 src/backend/executor/execMain.c           | 12 +++++++++-
 src/backend/executor/execPartition.c      |  9 ++++++--
 src/backend/executor/nodeModifyTable.c    |  3 ++-
 src/include/executor/executor.h           |  1 +
 src/test/regress/expected/publication.out | 23 +++++++++++++++++++
 src/test/regress/sql/publication.sql      | 27 +++++++++++++++++++++++
 7 files changed, 72 insertions(+), 5 deletions(-)

diff --git a/src/backend/commands/copyfrom.c b/src/backend/commands/copyfrom.c
index fbbbc09a97b17..12781963b4f95 100644
--- a/src/backend/commands/copyfrom.c
+++ b/src/backend/commands/copyfrom.c
@@ -919,7 +919,7 @@ CopyFrom(CopyFromState cstate)
 	ExecInitResultRelation(estate, resultRelInfo, 1);
 
 	/* Verify the named relation is a valid target for INSERT */
-	CheckValidResultRel(resultRelInfo, CMD_INSERT, NIL);
+	CheckValidResultRel(resultRelInfo, CMD_INSERT, ONCONFLICT_NONE, NIL);
 
 	ExecOpenIndices(resultRelInfo, false);
 
diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c
index b8b9d2a85f76c..8f56d5e312ec2 100644
--- a/src/backend/executor/execMain.c
+++ b/src/backend/executor/execMain.c
@@ -1036,6 +1036,9 @@ InitPlan(QueryDesc *queryDesc, int eflags)
  * Generally the parser and/or planner should have noticed any such mistake
  * already, but let's make sure.
  *
+ * For INSERT ON CONFLICT, the result relation is required to support the
+ * onConflictAction, regardless of whether a conflict actually occurs.
+ *
  * For MERGE, mergeActions is the list of actions that may be performed.  The
  * result relation is required to support every action, regardless of whether
  * or not they are all executed.
@@ -1045,7 +1048,7 @@ InitPlan(QueryDesc *queryDesc, int eflags)
  */
 void
 CheckValidResultRel(ResultRelInfo *resultRelInfo, CmdType operation,
-					List *mergeActions)
+					OnConflictAction onConflictAction, List *mergeActions)
 {
 	Relation	resultRel = resultRelInfo->ri_RelationDesc;
 	FdwRoutine *fdwroutine;
@@ -1059,6 +1062,13 @@ CheckValidResultRel(ResultRelInfo *resultRelInfo, CmdType operation,
 		case RELKIND_RELATION:
 		case RELKIND_PARTITIONED_TABLE:
 			CheckCmdReplicaIdentity(resultRel, operation);
+
+			/*
+			 * For INSERT ON CONFLICT DO UPDATE, additionally check that the
+			 * target relation supports UPDATE.
+			 */
+			if (onConflictAction == ONCONFLICT_UPDATE)
+				CheckCmdReplicaIdentity(resultRel, CMD_UPDATE);
 			break;
 		case RELKIND_SEQUENCE:
 			ereport(ERROR,
diff --git a/src/backend/executor/execPartition.c b/src/backend/executor/execPartition.c
index 514eae1037dc3..1f2da072632e3 100644
--- a/src/backend/executor/execPartition.c
+++ b/src/backend/executor/execPartition.c
@@ -360,8 +360,12 @@ ExecFindPartition(ModifyTableState *mtstate,
 											   true, false);
 				if (rri)
 				{
+					ModifyTable *node = (ModifyTable *) mtstate->ps.plan;
+
 					/* Verify this ResultRelInfo allows INSERTs */
-					CheckValidResultRel(rri, CMD_INSERT, NIL);
+					CheckValidResultRel(rri, CMD_INSERT,
+										node ? node->onConflictAction : ONCONFLICT_NONE,
+										NIL);
 
 					/*
 					 * Initialize information needed to insert this and
@@ -527,7 +531,8 @@ ExecInitPartitionInfo(ModifyTableState *mtstate, EState *estate,
 	 * partition-key becomes a DELETE+INSERT operation, so this check is still
 	 * required when the operation is CMD_UPDATE.
 	 */
-	CheckValidResultRel(leaf_part_rri, CMD_INSERT, NIL);
+	CheckValidResultRel(leaf_part_rri, CMD_INSERT,
+						node ? node->onConflictAction : ONCONFLICT_NONE, NIL);
 
 	/*
 	 * Open partition indices.  The user may have asked to check for conflicts
diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c
index 7c6c2c1f6e42a..b0c4e2c0d32a4 100644
--- a/src/backend/executor/nodeModifyTable.c
+++ b/src/backend/executor/nodeModifyTable.c
@@ -4811,7 +4811,8 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
 		/*
 		 * Verify result relation is a valid target for the current operation
 		 */
-		CheckValidResultRel(resultRelInfo, operation, mergeActions);
+		CheckValidResultRel(resultRelInfo, operation, node->onConflictAction,
+							mergeActions);
 
 		resultRelInfo++;
 		i++;
diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h
index 10dcea037c3d0..31133514e8438 100644
--- a/src/include/executor/executor.h
+++ b/src/include/executor/executor.h
@@ -244,6 +244,7 @@ extern bool ExecCheckPermissions(List *rangeTable,
 								 List *rteperminfos, bool ereport_on_violation);
 extern bool ExecCheckOneRelPerms(RTEPermissionInfo *perminfo);
 extern void CheckValidResultRel(ResultRelInfo *resultRelInfo, CmdType operation,
+								OnConflictAction onConflictAction,
 								List *mergeActions);
 extern void InitResultRelInfo(ResultRelInfo *resultRelInfo,
 							  Relation resultRelationDesc,
diff --git a/src/test/regress/expected/publication.out b/src/test/regress/expected/publication.out
index 53268059142ee..00f3cb978d774 100644
--- a/src/test/regress/expected/publication.out
+++ b/src/test/regress/expected/publication.out
@@ -1924,6 +1924,29 @@ DROP PUBLICATION pub1;
 DROP PUBLICATION pub2;
 DROP TABLE gencols;
 RESET client_min_messages;
+-- Test that the INSERT ON CONFLICT command correctly checks REPLICA IDENTITY
+-- when the target table is published.
+CREATE TABLE testpub_insert_onconfl_no_ri (a int unique, b int);
+CREATE TABLE testpub_insert_onconfl_parted (a int unique, b int) PARTITION by RANGE (a);
+CREATE TABLE testpub_insert_onconfl_part_no_ri PARTITION OF testpub_insert_onconfl_parted FOR VALUES FROM (1) TO (10);
+SET client_min_messages = 'ERROR';
+CREATE PUBLICATION pub1 FOR ALL TABLES;
+RESET client_min_messages;
+-- fail - missing REPLICA IDENTITY
+INSERT INTO testpub_insert_onconfl_no_ri VALUES (1, 1) ON CONFLICT (a) DO UPDATE SET b = 2;
+ERROR:  cannot update table "testpub_insert_onconfl_no_ri" because it does not have a replica identity and publishes updates
+HINT:  To enable updating the table, set REPLICA IDENTITY using ALTER TABLE.
+-- ok - no updates
+INSERT INTO testpub_insert_onconfl_no_ri VALUES (1, 1) ON CONFLICT DO NOTHING;
+-- fail - missing REPLICA IDENTITY in partition testpub_insert_onconfl_no_ri
+INSERT INTO testpub_insert_onconfl_parted VALUES (1, 1) ON CONFLICT (a) DO UPDATE SET b = 2;
+ERROR:  cannot update table "testpub_insert_onconfl_part_no_ri" because it does not have a replica identity and publishes updates
+HINT:  To enable updating the table, set REPLICA IDENTITY using ALTER TABLE.
+-- ok - no updates
+INSERT INTO testpub_insert_onconfl_parted VALUES (1, 1) ON CONFLICT DO NOTHING;
+DROP PUBLICATION pub1;
+DROP TABLE testpub_insert_onconfl_no_ri;
+DROP TABLE testpub_insert_onconfl_parted;
 RESET SESSION AUTHORIZATION;
 DROP ROLE regress_publication_user, regress_publication_user2;
 DROP ROLE regress_publication_user_dummy;
diff --git a/src/test/regress/sql/publication.sql b/src/test/regress/sql/publication.sql
index deddf0da8445f..53422d30320d3 100644
--- a/src/test/regress/sql/publication.sql
+++ b/src/test/regress/sql/publication.sql
@@ -1223,6 +1223,33 @@ DROP PUBLICATION pub2;
 DROP TABLE gencols;
 
 RESET client_min_messages;
+
+-- Test that the INSERT ON CONFLICT command correctly checks REPLICA IDENTITY
+-- when the target table is published.
+CREATE TABLE testpub_insert_onconfl_no_ri (a int unique, b int);
+CREATE TABLE testpub_insert_onconfl_parted (a int unique, b int) PARTITION by RANGE (a);
+CREATE TABLE testpub_insert_onconfl_part_no_ri PARTITION OF testpub_insert_onconfl_parted FOR VALUES FROM (1) TO (10);
+
+SET client_min_messages = 'ERROR';
+CREATE PUBLICATION pub1 FOR ALL TABLES;
+RESET client_min_messages;
+
+-- fail - missing REPLICA IDENTITY
+INSERT INTO testpub_insert_onconfl_no_ri VALUES (1, 1) ON CONFLICT (a) DO UPDATE SET b = 2;
+
+-- ok - no updates
+INSERT INTO testpub_insert_onconfl_no_ri VALUES (1, 1) ON CONFLICT DO NOTHING;
+
+-- fail - missing REPLICA IDENTITY in partition testpub_insert_onconfl_no_ri
+INSERT INTO testpub_insert_onconfl_parted VALUES (1, 1) ON CONFLICT (a) DO UPDATE SET b = 2;
+
+-- ok - no updates
+INSERT INTO testpub_insert_onconfl_parted VALUES (1, 1) ON CONFLICT DO NOTHING;
+
+DROP PUBLICATION pub1;
+DROP TABLE testpub_insert_onconfl_no_ri;
+DROP TABLE testpub_insert_onconfl_parted;
+
 RESET SESSION AUTHORIZATION;
 DROP ROLE regress_publication_user, regress_publication_user2;
 DROP ROLE regress_publication_user_dummy;

From fc6600fc1cd13b695fbde4b5f3ff0d2e97c36dea Mon Sep 17 00:00:00 2001
From: Dean Rasheed <dean.a.rasheed@gmail.com>
Date: Thu, 4 Sep 2025 11:45:44 +0100
Subject: [PATCH 05/73] Fix replica identity check for MERGE.

When executing a MERGE, check that the target relation supports all
actions mentioned in the MERGE command. Specifically, check that it
has a REPLICA IDENTITY if it publishes updates or deletes and the
MERGE command contains update or delete actions. Failing to do this
can silently break replication.

Author: Zhijie Hou <houzj.fnst@fujitsu.com>
Reviewed-by: Ashutosh Bapat <ashutosh.bapat.oss@gmail.com>
Reviewed-by: Dean Rasheed <dean.a.rasheed@gmail.com>
Tested-by: Chao Li <li.evan.chao@gmail.com>
Discussion: https://postgr.es/m/OS3PR01MB57180C87E43A679A730482DF94B62@OS3PR01MB5718.jpnprd01.prod.outlook.com
Backpatch-through: 15
---
 src/backend/executor/execMain.c           | 11 +++++++-
 src/test/regress/expected/publication.out | 28 ++++++++++++++++++++
 src/test/regress/sql/publication.sql      | 31 +++++++++++++++++++++++
 3 files changed, 69 insertions(+), 1 deletion(-)

diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c
index 8f56d5e312ec2..ff12e2e136438 100644
--- a/src/backend/executor/execMain.c
+++ b/src/backend/executor/execMain.c
@@ -1061,7 +1061,16 @@ CheckValidResultRel(ResultRelInfo *resultRelInfo, CmdType operation,
 	{
 		case RELKIND_RELATION:
 		case RELKIND_PARTITIONED_TABLE:
-			CheckCmdReplicaIdentity(resultRel, operation);
+
+			/*
+			 * For MERGE, check that the target relation supports each action.
+			 * For other operations, just check the operation itself.
+			 */
+			if (operation == CMD_MERGE)
+				foreach_node(MergeAction, action, mergeActions)
+					CheckCmdReplicaIdentity(resultRel, action->commandType);
+			else
+				CheckCmdReplicaIdentity(resultRel, operation);
 
 			/*
 			 * For INSERT ON CONFLICT DO UPDATE, additionally check that the
diff --git a/src/test/regress/expected/publication.out b/src/test/regress/expected/publication.out
index 00f3cb978d774..895ca87a0dfeb 100644
--- a/src/test/regress/expected/publication.out
+++ b/src/test/regress/expected/publication.out
@@ -1947,6 +1947,34 @@ INSERT INTO testpub_insert_onconfl_parted VALUES (1, 1) ON CONFLICT DO NOTHING;
 DROP PUBLICATION pub1;
 DROP TABLE testpub_insert_onconfl_no_ri;
 DROP TABLE testpub_insert_onconfl_parted;
+-- Test that the MERGE command correctly checks REPLICA IDENTITY when the
+-- target table is published.
+CREATE TABLE testpub_merge_no_ri (a int, b int);
+CREATE TABLE testpub_merge_pk (a int primary key, b int);
+SET client_min_messages = 'ERROR';
+CREATE PUBLICATION pub1 FOR ALL TABLES;
+RESET client_min_messages;
+-- fail - missing REPLICA IDENTITY
+MERGE INTO testpub_merge_no_ri USING testpub_merge_pk s ON s.a >= 1
+ WHEN MATCHED THEN UPDATE SET b = s.b;
+ERROR:  cannot update table "testpub_merge_no_ri" because it does not have a replica identity and publishes updates
+HINT:  To enable updating the table, set REPLICA IDENTITY using ALTER TABLE.
+-- fail - missing REPLICA IDENTITY
+MERGE INTO testpub_merge_no_ri USING testpub_merge_pk s ON s.a >= 1
+ WHEN MATCHED THEN DELETE;
+ERROR:  cannot delete from table "testpub_merge_no_ri" because it does not have a replica identity and publishes deletes
+HINT:  To enable deleting from the table, set REPLICA IDENTITY using ALTER TABLE.
+-- ok - insert and do nothing are not restricted
+MERGE INTO testpub_merge_no_ri USING testpub_merge_pk s ON s.a >= 1
+ WHEN MATCHED THEN DO NOTHING
+ WHEN NOT MATCHED THEN INSERT (a, b) VALUES (0, 0);
+-- ok - REPLICA IDENTITY is DEFAULT and table has a PK
+MERGE INTO testpub_merge_pk USING testpub_merge_no_ri s ON s.a >= 1
+ WHEN MATCHED AND s.a > 0 THEN UPDATE SET b = s.b
+ WHEN MATCHED THEN DELETE;
+DROP PUBLICATION pub1;
+DROP TABLE testpub_merge_no_ri;
+DROP TABLE testpub_merge_pk;
 RESET SESSION AUTHORIZATION;
 DROP ROLE regress_publication_user, regress_publication_user2;
 DROP ROLE regress_publication_user_dummy;
diff --git a/src/test/regress/sql/publication.sql b/src/test/regress/sql/publication.sql
index 53422d30320d3..3f42306139533 100644
--- a/src/test/regress/sql/publication.sql
+++ b/src/test/regress/sql/publication.sql
@@ -1250,6 +1250,37 @@ DROP PUBLICATION pub1;
 DROP TABLE testpub_insert_onconfl_no_ri;
 DROP TABLE testpub_insert_onconfl_parted;
 
+-- Test that the MERGE command correctly checks REPLICA IDENTITY when the
+-- target table is published.
+CREATE TABLE testpub_merge_no_ri (a int, b int);
+CREATE TABLE testpub_merge_pk (a int primary key, b int);
+
+SET client_min_messages = 'ERROR';
+CREATE PUBLICATION pub1 FOR ALL TABLES;
+RESET client_min_messages;
+
+-- fail - missing REPLICA IDENTITY
+MERGE INTO testpub_merge_no_ri USING testpub_merge_pk s ON s.a >= 1
+ WHEN MATCHED THEN UPDATE SET b = s.b;
+
+-- fail - missing REPLICA IDENTITY
+MERGE INTO testpub_merge_no_ri USING testpub_merge_pk s ON s.a >= 1
+ WHEN MATCHED THEN DELETE;
+
+-- ok - insert and do nothing are not restricted
+MERGE INTO testpub_merge_no_ri USING testpub_merge_pk s ON s.a >= 1
+ WHEN MATCHED THEN DO NOTHING
+ WHEN NOT MATCHED THEN INSERT (a, b) VALUES (0, 0);
+
+-- ok - REPLICA IDENTITY is DEFAULT and table has a PK
+MERGE INTO testpub_merge_pk USING testpub_merge_no_ri s ON s.a >= 1
+ WHEN MATCHED AND s.a > 0 THEN UPDATE SET b = s.b
+ WHEN MATCHED THEN DELETE;
+
+DROP PUBLICATION pub1;
+DROP TABLE testpub_merge_no_ri;
+DROP TABLE testpub_merge_pk;
+
 RESET SESSION AUTHORIZATION;
 DROP ROLE regress_publication_user, regress_publication_user2;
 DROP ROLE regress_publication_user_dummy;

From 1129d3e4c8883f25642d1f44750b4d36e29ec006 Mon Sep 17 00:00:00 2001
From: Nathan Bossart <nathan@postgresql.org>
Date: Thu, 4 Sep 2025 10:18:42 -0500
Subject: [PATCH 06/73] Adjust commentary for WaitEventLWLock in
 wait_event_names.txt.

In addition to changing a couple of references for clarity, this
commit combines the two similar comments.
---
 src/backend/utils/activity/wait_event_names.txt | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/src/backend/utils/activity/wait_event_names.txt b/src/backend/utils/activity/wait_event_names.txt
index 5427da5bc1b19..7553f6eacef7b 100644
--- a/src/backend/utils/activity/wait_event_names.txt
+++ b/src/backend/utils/activity/wait_event_names.txt
@@ -302,9 +302,12 @@ ABI_compatibility:
 # This class of wait events has its own set of C structure, so these are
 # only used for the documentation.
 #
-# NB: Predefined LWLocks (i.e., those declared in lwlocklist.h) must be
-# listed in the top section of locks and must be listed in the same order as in
-# lwlocklist.h.
+# NB: Predefined LWLocks (i.e., those declared with PG_LWLOCK in lwlocklist.h)
+# must be listed before the "END OF PREDEFINED LWLOCKS" comment and must be
+# listed in the same order as in lwlocklist.h.  Likewise, the built-in LWLock
+# tranches (i.e., those declared with PG_LWLOCKTRANCHE in lwlocklist.h) must be
+# listed after the "END OF PREDEFINED LWLOCKS" comment and must be listed in
+# the same order as lwlocklist.h.
 #
 
 Section: ClassName - WaitEventLWLock
@@ -356,14 +359,6 @@ AioWorkerSubmissionQueue	"Waiting to access AIO worker submission queue."
 #
 # END OF PREDEFINED LWLOCKS (DO NOT CHANGE THIS LINE)
 #
-# Predefined LWLocks (i.e., those declared at the top of lwlocknames.h) must be
-# listed in the section above and must be listed in the same order as in
-# lwlocknames.h.
-#
-# Likewise, the built-in LWLock tranches (i.e., those declared at the bottom of
-# lwlocknames.h) must be listed in the section below and must be listed in the
-# same order as in lwlocknames.h.
-#
 
 XactBuffer	"Waiting for I/O on a transaction status SLRU buffer."
 CommitTsBuffer	"Waiting for I/O on a commit timestamp SLRU buffer."

From f0478149c34dfe02e0f43fc5f832a63a864dc364 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut <peter@eisentraut.org>
Date: Thu, 4 Sep 2025 12:57:03 +0200
Subject: [PATCH 07/73] Clean up newly added guc_tables.inc.c

There was a missing makefile rule to clean up the guc_tables.inc.c
symlink in src/include/.  Oversight in commit 63599896545.

Author: Nathan Bossart <nathandbossart@gmail.com>
Discussion: https://www.postgresql.org/message-id/flat/dae6fe89-1e0c-4c3f-8d92-19d23374fb10%40eisentraut.org
---
 src/include/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/include/Makefile b/src/include/Makefile
index 3f94543f3270b..24c5452de98fb 100644
--- a/src/include/Makefile
+++ b/src/include/Makefile
@@ -72,7 +72,7 @@ uninstall:
 
 
 clean:
-	rm -f utils/fmgroids.h utils/fmgrprotos.h utils/errcodes.h utils/header-stamp
+	rm -f utils/fmgroids.h utils/fmgrprotos.h utils/guc_tables.inc.c utils/errcodes.h utils/header-stamp
 	rm -f storage/lwlocknames.h utils/probes.h utils/wait_event_types.h
 	rm -f nodes/nodetags.h nodes/header-stamp
 	$(MAKE) -C catalog clean

From d814d7fc3d5257ae258b502229fc7ca97c97270a Mon Sep 17 00:00:00 2001
From: Nathan Bossart <nathan@postgresql.org>
Date: Thu, 4 Sep 2025 15:34:48 -0500
Subject: [PATCH 08/73] Revert recent change to RequestNamedLWLockTranche().

Commit 38b602b028 modified this function to allocate enough space
for MAX_NAMED_TRANCHES (256) requests, which is likely far more
than most clusters need.  This commit reverts that change so that
it first allocates enough space for only 16 requests and resizes
the array when necessary.  While at it, remove the check for too
many tranches from this function.  We can now rely on
InitializeLWLocks() to do that check via its calls to
LWLockNewTrancheId() for the named tranches.

Reviewed-by: Sami Imseih <samimseih@gmail.com>
Discussion: https://postgr.es/m/aLmzwC2dRbqk14y6%40nathan
---
 src/backend/storage/lmgr/lwlock.c | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c
index 258cdebd0f5c9..fcbac5213a5c0 100644
--- a/src/backend/storage/lmgr/lwlock.c
+++ b/src/backend/storage/lmgr/lwlock.c
@@ -610,6 +610,7 @@ void
 RequestNamedLWLockTranche(const char *tranche_name, int num_lwlocks)
 {
 	NamedLWLockTrancheRequest *request;
+	static int	NamedLWLockTrancheRequestsAllocated;
 
 	if (!process_shmem_requests_in_progress)
 		elog(FATAL, "cannot request additional LWLocks outside shmem_request_hook");
@@ -628,17 +629,22 @@ RequestNamedLWLockTranche(const char *tranche_name, int num_lwlocks)
 
 	if (NamedLWLockTrancheRequestArray == NULL)
 	{
+		NamedLWLockTrancheRequestsAllocated = 16;
 		NamedLWLockTrancheRequestArray = (NamedLWLockTrancheRequest *)
 			MemoryContextAlloc(TopMemoryContext,
-							   MAX_NAMED_TRANCHES
+							   NamedLWLockTrancheRequestsAllocated
 							   * sizeof(NamedLWLockTrancheRequest));
 	}
 
-	if (NamedLWLockTrancheRequests >= MAX_NAMED_TRANCHES)
-		ereport(ERROR,
-				(errmsg("maximum number of tranches already registered"),
-				 errdetail("No more than %d tranches may be registered.",
-						   MAX_NAMED_TRANCHES)));
+	if (NamedLWLockTrancheRequests >= NamedLWLockTrancheRequestsAllocated)
+	{
+		int			i = pg_nextpower2_32(NamedLWLockTrancheRequests + 1);
+
+		NamedLWLockTrancheRequestArray = (NamedLWLockTrancheRequest *)
+			repalloc(NamedLWLockTrancheRequestArray,
+					 i * sizeof(NamedLWLockTrancheRequest));
+		NamedLWLockTrancheRequestsAllocated = i;
+	}
 
 	request = &NamedLWLockTrancheRequestArray[NamedLWLockTrancheRequests];
 	strlcpy(request->tranche_name, tranche_name, NAMEDATALEN);

From ae453120085f7da8f4082bb912e9668410cdccab Mon Sep 17 00:00:00 2001
From: Michael Paquier <michael@paquier.xyz>
Date: Fri, 5 Sep 2025 12:59:29 +0900
Subject: [PATCH 09/73] Change pg_lsn_in_internal() to use soft error reporting

pg_lsn includes pg_lsn_in_internal() for the purpose of parsing a LSN
position for the GUC recovery_target_lsn (21f428ebde39).  It relies on a
boolean called "have_error" that would be set when the LSN parsing
fails, then let its callers handle any errors.

d9f7f5d32f20 has added support for soft error reporting.  This commit
removes some boilerplate code and switches the routine to use soft error
reporting directly, giving to the callers of pg_lsn_in_internal()
the possibility to be fed the error message generated on failure.

pg_lsn_in_internal() routine is renamed to pg_lsn_in_safe(), for
consistency with other similar routines that are given an escontext.

Author: Amul Sul <sulamul@gmail.com>
Reviewed-by: Dean Rasheed <dean.a.rasheed@gmail.com>
Discussion: https://postgr.es/m/CAAJ_b96No5h5tRuR+KhcC44YcYUCw8WAHuLoqqyyop8_k3+JDQ@mail.gmail.com
---
 src/backend/access/transam/xlogrecovery.c |  6 ++--
 src/backend/utils/adt/pg_lsn.c            | 35 ++++++++++-------------
 src/include/utils/pg_lsn.h                |  5 +++-
 3 files changed, 22 insertions(+), 24 deletions(-)

diff --git a/src/backend/access/transam/xlogrecovery.c b/src/backend/access/transam/xlogrecovery.c
index f23ec8969c27d..346319338a0ee 100644
--- a/src/backend/access/transam/xlogrecovery.c
+++ b/src/backend/access/transam/xlogrecovery.c
@@ -4834,10 +4834,10 @@ check_recovery_target_lsn(char **newval, void **extra, GucSource source)
 	{
 		XLogRecPtr	lsn;
 		XLogRecPtr *myextra;
-		bool		have_error = false;
+		ErrorSaveContext escontext = {T_ErrorSaveContext};
 
-		lsn = pg_lsn_in_internal(*newval, &have_error);
-		if (have_error)
+		lsn = pg_lsn_in_safe(*newval, (Node *) &escontext);
+		if (escontext.error_occurred)
 			return false;
 
 		myextra = (XLogRecPtr *) guc_malloc(LOG, sizeof(XLogRecPtr));
diff --git a/src/backend/utils/adt/pg_lsn.c b/src/backend/utils/adt/pg_lsn.c
index 12de2446f5b69..e1ec5f3bc69cf 100644
--- a/src/backend/utils/adt/pg_lsn.c
+++ b/src/backend/utils/adt/pg_lsn.c
@@ -25,8 +25,11 @@
  * Formatting and conversion routines.
  *---------------------------------------------------------*/
 
+/*
+ * Internal version of pg_lsn_in() with support for soft error reporting.
+ */
 XLogRecPtr
-pg_lsn_in_internal(const char *str, bool *have_error)
+pg_lsn_in_safe(const char *str, Node *escontext)
 {
 	int			len1,
 				len2;
@@ -34,22 +37,14 @@ pg_lsn_in_internal(const char *str, bool *have_error)
 				off;
 	XLogRecPtr	result;
 
-	Assert(have_error != NULL);
-	*have_error = false;
-
 	/* Sanity check input format. */
 	len1 = strspn(str, "0123456789abcdefABCDEF");
 	if (len1 < 1 || len1 > MAXPG_LSNCOMPONENT || str[len1] != '/')
-	{
-		*have_error = true;
-		return InvalidXLogRecPtr;
-	}
+		goto syntax_error;
+
 	len2 = strspn(str + len1 + 1, "0123456789abcdefABCDEF");
 	if (len2 < 1 || len2 > MAXPG_LSNCOMPONENT || str[len1 + 1 + len2] != '\0')
-	{
-		*have_error = true;
-		return InvalidXLogRecPtr;
-	}
+		goto syntax_error;
 
 	/* Decode result. */
 	id = (uint32) strtoul(str, NULL, 16);
@@ -57,6 +52,12 @@ pg_lsn_in_internal(const char *str, bool *have_error)
 	result = ((uint64) id << 32) | off;
 
 	return result;
+
+syntax_error:
+	ereturn(escontext, InvalidXLogRecPtr,
+			(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+			 errmsg("invalid input syntax for type %s: \"%s\"",
+					"pg_lsn", str)));
 }
 
 Datum
@@ -64,14 +65,8 @@ pg_lsn_in(PG_FUNCTION_ARGS)
 {
 	char	   *str = PG_GETARG_CSTRING(0);
 	XLogRecPtr	result;
-	bool		have_error = false;
-
-	result = pg_lsn_in_internal(str, &have_error);
-	if (have_error)
-		ereturn(fcinfo->context, (Datum) 0,
-				(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-				 errmsg("invalid input syntax for type %s: \"%s\"",
-						"pg_lsn", str)));
+
+	result = pg_lsn_in_safe(str, fcinfo->context);
 
 	PG_RETURN_LSN(result);
 }
diff --git a/src/include/utils/pg_lsn.h b/src/include/utils/pg_lsn.h
index ae198af745029..461a4fdcba954 100644
--- a/src/include/utils/pg_lsn.h
+++ b/src/include/utils/pg_lsn.h
@@ -18,6 +18,9 @@
 #include "access/xlogdefs.h"
 #include "fmgr.h"
 
+/* forward declaration to avoid node.h include */
+typedef struct Node Node;
+
 static inline XLogRecPtr
 DatumGetLSN(Datum X)
 {
@@ -33,6 +36,6 @@ LSNGetDatum(XLogRecPtr X)
 #define PG_GETARG_LSN(n)	 DatumGetLSN(PG_GETARG_DATUM(n))
 #define PG_RETURN_LSN(x)	 return LSNGetDatum(x)
 
-extern XLogRecPtr pg_lsn_in_internal(const char *str, bool *have_error);
+extern XLogRecPtr pg_lsn_in_safe(const char *str, Node *escontext);
 
 #endif							/* PG_LSN_H */

From 4246a977bad6e76c4276a0d52def8a3dced154bb Mon Sep 17 00:00:00 2001
From: Michael Paquier <michael@paquier.xyz>
Date: Fri, 5 Sep 2025 13:53:47 +0900
Subject: [PATCH 10/73] Switch some numeric-related functions to use soft error
 reporting

This commit changes some functions related to the data type numeric to
use the soft error reporting rather than a custom boolean flag (called
"have_error") that callers of these functions could rely on to bypass
the generation of ERROR reports, letting the callers do their own error
handling (timestamp, jsonpath and numeric_to_char() require them).

This results in the removal of some boilerplate code that was required
to handle both the ereport() and the "have_error" code paths bypassing
ereport(), unifying everything under the soft error reporting facility.

While on it, some duplicated error messages are removed.  The function
upgraded in this commit were suffixed with "_opt_error" in their names.
They are renamed to "_safe" instead.

This change relies on d9f7f5d32f20, that has introduced the soft error
reporting infrastructure.

Author: Amul Sul <sulamul@gmail.com>
Reviewed-by: Dean Rasheed <dean.a.rasheed@gmail.com>
Discussion: https://postgr.es/m/CAAJ_b96No5h5tRuR+KhcC44YcYUCw8WAHuLoqqyyop8_k3+JDQ@mail.gmail.com
---
 src/backend/utils/adt/formatting.c    |   6 +-
 src/backend/utils/adt/jsonpath_exec.c |  62 +++---
 src/backend/utils/adt/numeric.c       | 266 ++++++++------------------
 src/backend/utils/adt/timestamp.c     |  46 ++---
 src/include/utils/numeric.h           |  22 +--
 5 files changed, 152 insertions(+), 250 deletions(-)

diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c
index 7ad453314c307..78e19ac39ac17 100644
--- a/src/backend/utils/adt/formatting.c
+++ b/src/backend/utils/adt/formatting.c
@@ -6389,12 +6389,12 @@ numeric_to_char(PG_FUNCTION_ARGS)
 	if (IS_ROMAN(&Num))
 	{
 		int32		intvalue;
-		bool		err;
+		ErrorSaveContext escontext = {T_ErrorSaveContext};
 
 		/* Round and convert to int */
-		intvalue = numeric_int4_opt_error(value, &err);
+		intvalue = numeric_int4_safe(value, (Node *) &escontext);
 		/* On overflow, just use PG_INT32_MAX; int_to_roman will cope */
-		if (err)
+		if (escontext.error_occurred)
 			intvalue = PG_INT32_MAX;
 		numstr = int_to_roman(intvalue);
 	}
diff --git a/src/backend/utils/adt/jsonpath_exec.c b/src/backend/utils/adt/jsonpath_exec.c
index 5a56253522357..8156695e97e09 100644
--- a/src/backend/utils/adt/jsonpath_exec.c
+++ b/src/backend/utils/adt/jsonpath_exec.c
@@ -252,7 +252,8 @@ typedef JsonPathBool (*JsonPathPredicateCallback) (JsonPathItem *jsp,
 												   JsonbValue *larg,
 												   JsonbValue *rarg,
 												   void *param);
-typedef Numeric (*BinaryArithmFunc) (Numeric num1, Numeric num2, bool *error);
+typedef Numeric (*BinaryArithmFunc) (Numeric num1, Numeric num2,
+									 Node *escontext);
 
 static JsonPathExecResult executeJsonPath(JsonPath *path, void *vars,
 										  JsonPathGetVarCallback getVar,
@@ -808,23 +809,23 @@ executeItemOptUnwrapTarget(JsonPathExecContext *cxt, JsonPathItem *jsp,
 
 		case jpiAdd:
 			return executeBinaryArithmExpr(cxt, jsp, jb,
-										   numeric_add_opt_error, found);
+										   numeric_add_safe, found);
 
 		case jpiSub:
 			return executeBinaryArithmExpr(cxt, jsp, jb,
-										   numeric_sub_opt_error, found);
+										   numeric_sub_safe, found);
 
 		case jpiMul:
 			return executeBinaryArithmExpr(cxt, jsp, jb,
-										   numeric_mul_opt_error, found);
+										   numeric_mul_safe, found);
 
 		case jpiDiv:
 			return executeBinaryArithmExpr(cxt, jsp, jb,
-										   numeric_div_opt_error, found);
+										   numeric_div_safe, found);
 
 		case jpiMod:
 			return executeBinaryArithmExpr(cxt, jsp, jb,
-										   numeric_mod_opt_error, found);
+										   numeric_mod_safe, found);
 
 		case jpiPlus:
 			return executeUnaryArithmExpr(cxt, jsp, jb, NULL, found);
@@ -1269,11 +1270,12 @@ executeItemOptUnwrapTarget(JsonPathExecContext *cxt, JsonPathItem *jsp,
 
 				if (jb->type == jbvNumeric)
 				{
-					bool		have_error;
+					ErrorSaveContext escontext = {T_ErrorSaveContext};
 					int64		val;
 
-					val = numeric_int8_opt_error(jb->val.numeric, &have_error);
-					if (have_error)
+					val = numeric_int8_safe(jb->val.numeric,
+											(Node *) &escontext);
+					if (escontext.error_occurred)
 						RETURN_ERROR(ereport(ERROR,
 											 (errcode(ERRCODE_NON_NUMERIC_SQL_JSON_ITEM),
 											  errmsg("argument \"%s\" of jsonpath item method .%s() is invalid for type %s",
@@ -1466,7 +1468,6 @@ executeItemOptUnwrapTarget(JsonPathExecContext *cxt, JsonPathItem *jsp,
 					Datum		dtypmod;
 					int32		precision;
 					int32		scale = 0;
-					bool		have_error;
 					bool		noerr;
 					ArrayType  *arrtypmod;
 					Datum		datums[2];
@@ -1478,9 +1479,9 @@ executeItemOptUnwrapTarget(JsonPathExecContext *cxt, JsonPathItem *jsp,
 					if (elem.type != jpiNumeric)
 						elog(ERROR, "invalid jsonpath item type for .decimal() precision");
 
-					precision = numeric_int4_opt_error(jspGetNumeric(&elem),
-													   &have_error);
-					if (have_error)
+					precision = numeric_int4_safe(jspGetNumeric(&elem),
+												  (Node *) &escontext);
+					if (escontext.error_occurred)
 						RETURN_ERROR(ereport(ERROR,
 											 (errcode(ERRCODE_NON_NUMERIC_SQL_JSON_ITEM),
 											  errmsg("precision of jsonpath item method .%s() is out of range for type integer",
@@ -1492,9 +1493,9 @@ executeItemOptUnwrapTarget(JsonPathExecContext *cxt, JsonPathItem *jsp,
 						if (elem.type != jpiNumeric)
 							elog(ERROR, "invalid jsonpath item type for .decimal() scale");
 
-						scale = numeric_int4_opt_error(jspGetNumeric(&elem),
-													   &have_error);
-						if (have_error)
+						scale = numeric_int4_safe(jspGetNumeric(&elem),
+												  (Node *) &escontext);
+						if (escontext.error_occurred)
 							RETURN_ERROR(ereport(ERROR,
 												 (errcode(ERRCODE_NON_NUMERIC_SQL_JSON_ITEM),
 												  errmsg("scale of jsonpath item method .%s() is out of range for type integer",
@@ -1550,11 +1551,12 @@ executeItemOptUnwrapTarget(JsonPathExecContext *cxt, JsonPathItem *jsp,
 
 				if (jb->type == jbvNumeric)
 				{
-					bool		have_error;
 					int32		val;
+					ErrorSaveContext escontext = {T_ErrorSaveContext};
 
-					val = numeric_int4_opt_error(jb->val.numeric, &have_error);
-					if (have_error)
+					val = numeric_int4_safe(jb->val.numeric,
+											(Node *) &escontext);
+					if (escontext.error_occurred)
 						RETURN_ERROR(ereport(ERROR,
 											 (errcode(ERRCODE_NON_NUMERIC_SQL_JSON_ITEM),
 											  errmsg("argument \"%s\" of jsonpath item method .%s() is invalid for type %s",
@@ -2149,11 +2151,11 @@ executeBinaryArithmExpr(JsonPathExecContext *cxt, JsonPathItem *jsp,
 	}
 	else
 	{
-		bool		error = false;
+		ErrorSaveContext escontext = {T_ErrorSaveContext};
 
-		res = func(lval->val.numeric, rval->val.numeric, &error);
+		res = func(lval->val.numeric, rval->val.numeric, (Node *) &escontext);
 
-		if (error)
+		if (escontext.error_occurred)
 			return jperError;
 	}
 
@@ -2433,7 +2435,7 @@ executeDateTimeMethod(JsonPathExecContext *cxt, JsonPathItem *jsp,
 		if (jsp->type != jpiDatetime && jsp->type != jpiDate &&
 			jsp->content.arg)
 		{
-			bool		have_error;
+			ErrorSaveContext escontext = {T_ErrorSaveContext};
 
 			jspGetArg(jsp, &elem);
 
@@ -2441,9 +2443,9 @@ executeDateTimeMethod(JsonPathExecContext *cxt, JsonPathItem *jsp,
 				elog(ERROR, "invalid jsonpath item type for %s argument",
 					 jspOperationName(jsp->type));
 
-			time_precision = numeric_int4_opt_error(jspGetNumeric(&elem),
-													&have_error);
-			if (have_error)
+			time_precision = numeric_int4_safe(jspGetNumeric(&elem),
+											   (Node *) &escontext);
+			if (escontext.error_occurred)
 				RETURN_ERROR(ereport(ERROR,
 									 (errcode(ERRCODE_INVALID_ARGUMENT_FOR_SQL_JSON_DATETIME_FUNCTION),
 									  errmsg("time precision of jsonpath item method .%s() is out of range for type integer",
@@ -3462,7 +3464,7 @@ getArrayIndex(JsonPathExecContext *cxt, JsonPathItem *jsp, JsonbValue *jb,
 	JsonValueList found = {0};
 	JsonPathExecResult res = executeItem(cxt, jsp, jb, &found);
 	Datum		numeric_index;
-	bool		have_error = false;
+	ErrorSaveContext escontext = {T_ErrorSaveContext};
 
 	if (jperIsError(res))
 		return res;
@@ -3477,10 +3479,10 @@ getArrayIndex(JsonPathExecContext *cxt, JsonPathItem *jsp, JsonbValue *jb,
 										NumericGetDatum(jbv->val.numeric),
 										Int32GetDatum(0));
 
-	*index = numeric_int4_opt_error(DatumGetNumeric(numeric_index),
-									&have_error);
+	*index = numeric_int4_safe(DatumGetNumeric(numeric_index),
+							   (Node *) &escontext);
 
-	if (have_error)
+	if (escontext.error_occurred)
 		RETURN_ERROR(ereport(ERROR,
 							 (errcode(ERRCODE_INVALID_SQL_JSON_SUBSCRIPT),
 							  errmsg("jsonpath array subscript is out of integer range"))));
diff --git a/src/backend/utils/adt/numeric.c b/src/backend/utils/adt/numeric.c
index b6287f5d97305..76269918593d7 100644
--- a/src/backend/utils/adt/numeric.c
+++ b/src/backend/utils/adt/numeric.c
@@ -517,7 +517,7 @@ static void numericvar_deserialize(StringInfo buf, NumericVar *var);
 
 static Numeric duplicate_numeric(Numeric num);
 static Numeric make_result(const NumericVar *var);
-static Numeric make_result_opt_error(const NumericVar *var, bool *have_error);
+static Numeric make_result_safe(const NumericVar *var, Node *escontext);
 
 static bool apply_typmod(NumericVar *var, int32 typmod, Node *escontext);
 static bool apply_typmod_special(Numeric num, int32 typmod, Node *escontext);
@@ -717,7 +717,6 @@ numeric_in(PG_FUNCTION_ARGS)
 		 */
 		NumericVar	value;
 		int			base;
-		bool		have_error;
 
 		init_var(&value);
 
@@ -776,12 +775,7 @@ numeric_in(PG_FUNCTION_ARGS)
 		if (!apply_typmod(&value, typmod, escontext))
 			PG_RETURN_NULL();
 
-		res = make_result_opt_error(&value, &have_error);
-
-		if (have_error)
-			ereturn(escontext, (Datum) 0,
-					(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
-					 errmsg("value overflows numeric format")));
+		res = make_result_safe(&value, escontext);
 
 		free_var(&value);
 	}
@@ -2874,20 +2868,18 @@ numeric_add(PG_FUNCTION_ARGS)
 	Numeric		num2 = PG_GETARG_NUMERIC(1);
 	Numeric		res;
 
-	res = numeric_add_opt_error(num1, num2, NULL);
+	res = numeric_add_safe(num1, num2, NULL);
 
 	PG_RETURN_NUMERIC(res);
 }
 
 /*
- * numeric_add_opt_error() -
+ * numeric_add_safe() -
  *
- *	Internal version of numeric_add().  If "*have_error" flag is provided,
- *	on error it's set to true, NULL returned.  This is helpful when caller
- *	need to handle errors by itself.
+ *	Internal version of numeric_add() with support for soft error reporting.
  */
 Numeric
-numeric_add_opt_error(Numeric num1, Numeric num2, bool *have_error)
+numeric_add_safe(Numeric num1, Numeric num2, Node *escontext)
 {
 	NumericVar	arg1;
 	NumericVar	arg2;
@@ -2931,7 +2923,7 @@ numeric_add_opt_error(Numeric num1, Numeric num2, bool *have_error)
 	init_var(&result);
 	add_var(&arg1, &arg2, &result);
 
-	res = make_result_opt_error(&result, have_error);
+	res = make_result_safe(&result, escontext);
 
 	free_var(&result);
 
@@ -2951,21 +2943,19 @@ numeric_sub(PG_FUNCTION_ARGS)
 	Numeric		num2 = PG_GETARG_NUMERIC(1);
 	Numeric		res;
 
-	res = numeric_sub_opt_error(num1, num2, NULL);
+	res = numeric_sub_safe(num1, num2, NULL);
 
 	PG_RETURN_NUMERIC(res);
 }
 
 
 /*
- * numeric_sub_opt_error() -
+ * numeric_sub_safe() -
  *
- *	Internal version of numeric_sub().  If "*have_error" flag is provided,
- *	on error it's set to true, NULL returned.  This is helpful when caller
- *	need to handle errors by itself.
+ *	Internal version of numeric_sub() with support for soft error reporting.
  */
 Numeric
-numeric_sub_opt_error(Numeric num1, Numeric num2, bool *have_error)
+numeric_sub_safe(Numeric num1, Numeric num2, Node *escontext)
 {
 	NumericVar	arg1;
 	NumericVar	arg2;
@@ -3009,7 +2999,7 @@ numeric_sub_opt_error(Numeric num1, Numeric num2, bool *have_error)
 	init_var(&result);
 	sub_var(&arg1, &arg2, &result);
 
-	res = make_result_opt_error(&result, have_error);
+	res = make_result_safe(&result, escontext);
 
 	free_var(&result);
 
@@ -3029,21 +3019,19 @@ numeric_mul(PG_FUNCTION_ARGS)
 	Numeric		num2 = PG_GETARG_NUMERIC(1);
 	Numeric		res;
 
-	res = numeric_mul_opt_error(num1, num2, NULL);
+	res = numeric_mul_safe(num1, num2, NULL);
 
 	PG_RETURN_NUMERIC(res);
 }
 
 
 /*
- * numeric_mul_opt_error() -
+ * numeric_mul_safe() -
  *
- *	Internal version of numeric_mul().  If "*have_error" flag is provided,
- *	on error it's set to true, NULL returned.  This is helpful when caller
- *	need to handle errors by itself.
+ *	Internal version of numeric_mul() with support for soft error reporting.
  */
 Numeric
-numeric_mul_opt_error(Numeric num1, Numeric num2, bool *have_error)
+numeric_mul_safe(Numeric num1, Numeric num2, Node *escontext)
 {
 	NumericVar	arg1;
 	NumericVar	arg2;
@@ -3130,7 +3118,7 @@ numeric_mul_opt_error(Numeric num1, Numeric num2, bool *have_error)
 	if (result.dscale > NUMERIC_DSCALE_MAX)
 		round_var(&result, NUMERIC_DSCALE_MAX);
 
-	res = make_result_opt_error(&result, have_error);
+	res = make_result_safe(&result, escontext);
 
 	free_var(&result);
 
@@ -3150,21 +3138,19 @@ numeric_div(PG_FUNCTION_ARGS)
 	Numeric		num2 = PG_GETARG_NUMERIC(1);
 	Numeric		res;
 
-	res = numeric_div_opt_error(num1, num2, NULL);
+	res = numeric_div_safe(num1, num2, NULL);
 
 	PG_RETURN_NUMERIC(res);
 }
 
 
 /*
- * numeric_div_opt_error() -
+ * numeric_div_safe() -
  *
- *	Internal version of numeric_div().  If "*have_error" flag is provided,
- *	on error it's set to true, NULL returned.  This is helpful when caller
- *	need to handle errors by itself.
+ *	Internal version of numeric_div() with support for soft error reporting.
  */
 Numeric
-numeric_div_opt_error(Numeric num1, Numeric num2, bool *have_error)
+numeric_div_safe(Numeric num1, Numeric num2, Node *escontext)
 {
 	NumericVar	arg1;
 	NumericVar	arg2;
@@ -3172,9 +3158,6 @@ numeric_div_opt_error(Numeric num1, Numeric num2, bool *have_error)
 	Numeric		res;
 	int			rscale;
 
-	if (have_error)
-		*have_error = false;
-
 	/*
 	 * Handle NaN and infinities
 	 */
@@ -3189,15 +3172,7 @@ numeric_div_opt_error(Numeric num1, Numeric num2, bool *have_error)
 			switch (numeric_sign_internal(num2))
 			{
 				case 0:
-					if (have_error)
-					{
-						*have_error = true;
-						return NULL;
-					}
-					ereport(ERROR,
-							(errcode(ERRCODE_DIVISION_BY_ZERO),
-							 errmsg("division by zero")));
-					break;
+					goto division_by_zero;
 				case 1:
 					return make_result(&const_pinf);
 				case -1:
@@ -3212,15 +3187,7 @@ numeric_div_opt_error(Numeric num1, Numeric num2, bool *have_error)
 			switch (numeric_sign_internal(num2))
 			{
 				case 0:
-					if (have_error)
-					{
-						*have_error = true;
-						return NULL;
-					}
-					ereport(ERROR,
-							(errcode(ERRCODE_DIVISION_BY_ZERO),
-							 errmsg("division by zero")));
-					break;
+					goto division_by_zero;
 				case 1:
 					return make_result(&const_ninf);
 				case -1:
@@ -3251,25 +3218,25 @@ numeric_div_opt_error(Numeric num1, Numeric num2, bool *have_error)
 	 */
 	rscale = select_div_scale(&arg1, &arg2);
 
-	/*
-	 * If "have_error" is provided, check for division by zero here
-	 */
-	if (have_error && (arg2.ndigits == 0 || arg2.digits[0] == 0))
-	{
-		*have_error = true;
-		return NULL;
-	}
+	/* Check for division by zero */
+	if (arg2.ndigits == 0 || arg2.digits[0] == 0)
+		goto division_by_zero;
 
 	/*
 	 * Do the divide and return the result
 	 */
 	div_var(&arg1, &arg2, &result, rscale, true, true);
 
-	res = make_result_opt_error(&result, have_error);
+	res = make_result_safe(&result, escontext);
 
 	free_var(&result);
 
 	return res;
+
+division_by_zero:
+	ereturn(escontext, NULL,
+			errcode(ERRCODE_DIVISION_BY_ZERO),
+			errmsg("division by zero"));
 }
 
 
@@ -3374,30 +3341,25 @@ numeric_mod(PG_FUNCTION_ARGS)
 	Numeric		num2 = PG_GETARG_NUMERIC(1);
 	Numeric		res;
 
-	res = numeric_mod_opt_error(num1, num2, NULL);
+	res = numeric_mod_safe(num1, num2, NULL);
 
 	PG_RETURN_NUMERIC(res);
 }
 
 
 /*
- * numeric_mod_opt_error() -
+ * numeric_mod_safe() -
  *
- *	Internal version of numeric_mod().  If "*have_error" flag is provided,
- *	on error it's set to true, NULL returned.  This is helpful when caller
- *	need to handle errors by itself.
+ *	Internal version of numeric_mod() with support for soft error reporting.
  */
 Numeric
-numeric_mod_opt_error(Numeric num1, Numeric num2, bool *have_error)
+numeric_mod_safe(Numeric num1, Numeric num2, Node *escontext)
 {
 	Numeric		res;
 	NumericVar	arg1;
 	NumericVar	arg2;
 	NumericVar	result;
 
-	if (have_error)
-		*have_error = false;
-
 	/*
 	 * Handle NaN and infinities.  We follow POSIX fmod() on this, except that
 	 * POSIX treats x-is-infinite and y-is-zero identically, raising EDOM and
@@ -3410,16 +3372,8 @@ numeric_mod_opt_error(Numeric num1, Numeric num2, bool *have_error)
 		if (NUMERIC_IS_INF(num1))
 		{
 			if (numeric_sign_internal(num2) == 0)
-			{
-				if (have_error)
-				{
-					*have_error = true;
-					return NULL;
-				}
-				ereport(ERROR,
-						(errcode(ERRCODE_DIVISION_BY_ZERO),
-						 errmsg("division by zero")));
-			}
+				goto division_by_zero;
+
 			/* Inf % any nonzero = NaN */
 			return make_result(&const_nan);
 		}
@@ -3432,22 +3386,22 @@ numeric_mod_opt_error(Numeric num1, Numeric num2, bool *have_error)
 
 	init_var(&result);
 
-	/*
-	 * If "have_error" is provided, check for division by zero here
-	 */
-	if (have_error && (arg2.ndigits == 0 || arg2.digits[0] == 0))
-	{
-		*have_error = true;
-		return NULL;
-	}
+	/* Check for division by zero */
+	if (arg2.ndigits == 0 || arg2.digits[0] == 0)
+		goto division_by_zero;
 
 	mod_var(&arg1, &arg2, &result);
 
-	res = make_result_opt_error(&result, NULL);
+	res = make_result_safe(&result, escontext);
 
 	free_var(&result);
 
 	return res;
+
+division_by_zero:
+	ereturn(escontext, NULL,
+			errcode(ERRCODE_DIVISION_BY_ZERO),
+			errmsg("division by zero"));
 }
 
 
@@ -4404,52 +4358,34 @@ int4_numeric(PG_FUNCTION_ARGS)
 	PG_RETURN_NUMERIC(int64_to_numeric(val));
 }
 
+/*
+ * Internal version of int4_numeric() with support for soft error reporting.
+ */
 int32
-numeric_int4_opt_error(Numeric num, bool *have_error)
+numeric_int4_safe(Numeric num, Node *escontext)
 {
 	NumericVar	x;
 	int32		result;
 
-	if (have_error)
-		*have_error = false;
-
 	if (NUMERIC_IS_SPECIAL(num))
 	{
-		if (have_error)
-		{
-			*have_error = true;
-			return 0;
-		}
+		if (NUMERIC_IS_NAN(num))
+			ereturn(escontext, 0,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("cannot convert NaN to %s", "integer")));
 		else
-		{
-			if (NUMERIC_IS_NAN(num))
-				ereport(ERROR,
-						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-						 errmsg("cannot convert NaN to %s", "integer")));
-			else
-				ereport(ERROR,
-						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-						 errmsg("cannot convert infinity to %s", "integer")));
-		}
+			ereturn(escontext, 0,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("cannot convert infinity to %s", "integer")));
 	}
 
 	/* Convert to variable format, then convert to int4 */
 	init_var_from_num(num, &x);
 
 	if (!numericvar_to_int32(&x, &result))
-	{
-		if (have_error)
-		{
-			*have_error = true;
-			return 0;
-		}
-		else
-		{
-			ereport(ERROR,
-					(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
-					 errmsg("integer out of range")));
-		}
-	}
+		ereturn(escontext, 0,
+				(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+				 errmsg("integer out of range")));
 
 	return result;
 }
@@ -4459,7 +4395,7 @@ numeric_int4(PG_FUNCTION_ARGS)
 {
 	Numeric		num = PG_GETARG_NUMERIC(0);
 
-	PG_RETURN_INT32(numeric_int4_opt_error(num, NULL));
+	PG_RETURN_INT32(numeric_int4_safe(num, NULL));
 }
 
 /*
@@ -4492,52 +4428,34 @@ int8_numeric(PG_FUNCTION_ARGS)
 	PG_RETURN_NUMERIC(int64_to_numeric(val));
 }
 
+/*
+ * Internal version of int8_numeric() with support for soft error reporting.
+ */
 int64
-numeric_int8_opt_error(Numeric num, bool *have_error)
+numeric_int8_safe(Numeric num, Node *escontext)
 {
 	NumericVar	x;
 	int64		result;
 
-	if (have_error)
-		*have_error = false;
-
 	if (NUMERIC_IS_SPECIAL(num))
 	{
-		if (have_error)
-		{
-			*have_error = true;
-			return 0;
-		}
+		if (NUMERIC_IS_NAN(num))
+			ereturn(escontext, 0,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("cannot convert NaN to %s", "bigint")));
 		else
-		{
-			if (NUMERIC_IS_NAN(num))
-				ereport(ERROR,
-						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-						 errmsg("cannot convert NaN to %s", "bigint")));
-			else
-				ereport(ERROR,
-						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-						 errmsg("cannot convert infinity to %s", "bigint")));
-		}
+			ereturn(escontext, 0,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("cannot convert infinity to %s", "bigint")));
 	}
 
 	/* Convert to variable format, then convert to int8 */
 	init_var_from_num(num, &x);
 
 	if (!numericvar_to_int64(&x, &result))
-	{
-		if (have_error)
-		{
-			*have_error = true;
-			return 0;
-		}
-		else
-		{
-			ereport(ERROR,
-					(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
-					 errmsg("bigint out of range")));
-		}
-	}
+		ereturn(escontext, 0,
+				(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+				 errmsg("bigint out of range")));
 
 	return result;
 }
@@ -4547,7 +4465,7 @@ numeric_int8(PG_FUNCTION_ARGS)
 {
 	Numeric		num = PG_GETARG_NUMERIC(0);
 
-	PG_RETURN_INT64(numeric_int8_opt_error(num, NULL));
+	PG_RETURN_INT64(numeric_int8_safe(num, NULL));
 }
 
 
@@ -7583,16 +7501,13 @@ duplicate_numeric(Numeric num)
 }
 
 /*
- * make_result_opt_error() -
+ * make_result_safe() -
  *
  *	Create the packed db numeric format in palloc()'d memory from
  *	a variable.  This will handle NaN and Infinity cases.
- *
- *	If "have_error" isn't NULL, on overflow *have_error is set to true and
- *	NULL is returned.  This is helpful when caller needs to handle errors.
  */
 static Numeric
-make_result_opt_error(const NumericVar *var, bool *have_error)
+make_result_safe(const NumericVar *var, Node *escontext)
 {
 	Numeric		result;
 	NumericDigit *digits = var->digits;
@@ -7601,9 +7516,6 @@ make_result_opt_error(const NumericVar *var, bool *have_error)
 	int			n;
 	Size		len;
 
-	if (have_error)
-		*have_error = false;
-
 	if ((sign & NUMERIC_SIGN_MASK) == NUMERIC_SPECIAL)
 	{
 		/*
@@ -7676,19 +7588,9 @@ make_result_opt_error(const NumericVar *var, bool *have_error)
 	/* Check for overflow of int16 fields */
 	if (NUMERIC_WEIGHT(result) != weight ||
 		NUMERIC_DSCALE(result) != var->dscale)
-	{
-		if (have_error)
-		{
-			*have_error = true;
-			return NULL;
-		}
-		else
-		{
-			ereport(ERROR,
-					(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
-					 errmsg("value overflows numeric format")));
-		}
-	}
+		ereturn(escontext, NULL,
+				(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+				 errmsg("value overflows numeric format")));
 
 	dump_numeric("make_result()", result);
 	return result;
@@ -7698,12 +7600,12 @@ make_result_opt_error(const NumericVar *var, bool *have_error)
 /*
  * make_result() -
  *
- *	An interface to make_result_opt_error() without "have_error" argument.
+ *	An interface to make_result_safe() without "escontext" argument.
  */
 static Numeric
 make_result(const NumericVar *var)
 {
-	return make_result_opt_error(var, NULL);
+	return make_result_safe(var, NULL);
 }
 
 
diff --git a/src/backend/utils/adt/timestamp.c b/src/backend/utils/adt/timestamp.c
index 3e5f9dc1458e1..156a4830ffda6 100644
--- a/src/backend/utils/adt/timestamp.c
+++ b/src/backend/utils/adt/timestamp.c
@@ -5629,11 +5629,11 @@ timestamp_part_common(PG_FUNCTION_ARGS, bool retnumeric)
 
 			case DTK_JULIAN:
 				if (retnumeric)
-					PG_RETURN_NUMERIC(numeric_add_opt_error(int64_to_numeric(date2j(tm->tm_year, tm->tm_mon, tm->tm_mday)),
-															numeric_div_opt_error(int64_to_numeric(((((tm->tm_hour * MINS_PER_HOUR) + tm->tm_min) * SECS_PER_MINUTE) + tm->tm_sec) * INT64CONST(1000000) + fsec),
-																				  int64_to_numeric(SECS_PER_DAY * INT64CONST(1000000)),
-																				  NULL),
-															NULL));
+					PG_RETURN_NUMERIC(numeric_add_safe(int64_to_numeric(date2j(tm->tm_year, tm->tm_mon, tm->tm_mday)),
+													   numeric_div_safe(int64_to_numeric(((((tm->tm_hour * MINS_PER_HOUR) + tm->tm_min) * SECS_PER_MINUTE) + tm->tm_sec) * INT64CONST(1000000) + fsec),
+																		int64_to_numeric(SECS_PER_DAY * INT64CONST(1000000)),
+																		NULL),
+													   NULL));
 				else
 					PG_RETURN_FLOAT8(date2j(tm->tm_year, tm->tm_mon, tm->tm_mday) +
 									 ((((tm->tm_hour * MINS_PER_HOUR) + tm->tm_min) * SECS_PER_MINUTE) +
@@ -5685,11 +5685,11 @@ timestamp_part_common(PG_FUNCTION_ARGS, bool retnumeric)
 						result = int64_div_fast_to_numeric(timestamp - epoch, 6);
 					else
 					{
-						result = numeric_div_opt_error(numeric_sub_opt_error(int64_to_numeric(timestamp),
-																			 int64_to_numeric(epoch),
-																			 NULL),
-													   int64_to_numeric(1000000),
-													   NULL);
+						result = numeric_div_safe(numeric_sub_safe(int64_to_numeric(timestamp),
+																   int64_to_numeric(epoch),
+																   NULL),
+												  int64_to_numeric(1000000),
+												  NULL);
 						result = DatumGetNumeric(DirectFunctionCall2(numeric_round,
 																	 NumericGetDatum(result),
 																	 Int32GetDatum(6)));
@@ -5903,11 +5903,11 @@ timestamptz_part_common(PG_FUNCTION_ARGS, bool retnumeric)
 
 			case DTK_JULIAN:
 				if (retnumeric)
-					PG_RETURN_NUMERIC(numeric_add_opt_error(int64_to_numeric(date2j(tm->tm_year, tm->tm_mon, tm->tm_mday)),
-															numeric_div_opt_error(int64_to_numeric(((((tm->tm_hour * MINS_PER_HOUR) + tm->tm_min) * SECS_PER_MINUTE) + tm->tm_sec) * INT64CONST(1000000) + fsec),
-																				  int64_to_numeric(SECS_PER_DAY * INT64CONST(1000000)),
-																				  NULL),
-															NULL));
+					PG_RETURN_NUMERIC(numeric_add_safe(int64_to_numeric(date2j(tm->tm_year, tm->tm_mon, tm->tm_mday)),
+													   numeric_div_safe(int64_to_numeric(((((tm->tm_hour * MINS_PER_HOUR) + tm->tm_min) * SECS_PER_MINUTE) + tm->tm_sec) * INT64CONST(1000000) + fsec),
+																		int64_to_numeric(SECS_PER_DAY * INT64CONST(1000000)),
+																		NULL),
+													   NULL));
 				else
 					PG_RETURN_FLOAT8(date2j(tm->tm_year, tm->tm_mon, tm->tm_mday) +
 									 ((((tm->tm_hour * MINS_PER_HOUR) + tm->tm_min) * SECS_PER_MINUTE) +
@@ -5956,11 +5956,11 @@ timestamptz_part_common(PG_FUNCTION_ARGS, bool retnumeric)
 						result = int64_div_fast_to_numeric(timestamp - epoch, 6);
 					else
 					{
-						result = numeric_div_opt_error(numeric_sub_opt_error(int64_to_numeric(timestamp),
-																			 int64_to_numeric(epoch),
-																			 NULL),
-													   int64_to_numeric(1000000),
-													   NULL);
+						result = numeric_div_safe(numeric_sub_safe(int64_to_numeric(timestamp),
+																   int64_to_numeric(epoch),
+																   NULL),
+												  int64_to_numeric(1000000),
+												  NULL);
 						result = DatumGetNumeric(DirectFunctionCall2(numeric_round,
 																	 NumericGetDatum(result),
 																	 Int32GetDatum(6)));
@@ -6247,9 +6247,9 @@ interval_part_common(PG_FUNCTION_ARGS, bool retnumeric)
 				result = int64_div_fast_to_numeric(val, 6);
 			else
 				result =
-					numeric_add_opt_error(int64_div_fast_to_numeric(interval->time, 6),
-										  int64_to_numeric(secs_from_day_month),
-										  NULL);
+					numeric_add_safe(int64_div_fast_to_numeric(interval->time, 6),
+									 int64_to_numeric(secs_from_day_month),
+									 NULL);
 
 			PG_RETURN_NUMERIC(result);
 		}
diff --git a/src/include/utils/numeric.h b/src/include/utils/numeric.h
index 9e79fc376cbea..215f1ea4f53b4 100644
--- a/src/include/utils/numeric.h
+++ b/src/include/utils/numeric.h
@@ -17,6 +17,9 @@
 #include "common/pg_prng.h"
 #include "fmgr.h"
 
+/* forward declaration to avoid node.h include */
+typedef struct Node Node;
+
 /*
  * Limits on the precision and scale specifiable in a NUMERIC typmod.  The
  * precision is strictly positive, but the scale may be positive or negative.
@@ -91,18 +94,13 @@ extern char *numeric_normalize(Numeric num);
 extern Numeric int64_to_numeric(int64 val);
 extern Numeric int64_div_fast_to_numeric(int64 val1, int log10val2);
 
-extern Numeric numeric_add_opt_error(Numeric num1, Numeric num2,
-									 bool *have_error);
-extern Numeric numeric_sub_opt_error(Numeric num1, Numeric num2,
-									 bool *have_error);
-extern Numeric numeric_mul_opt_error(Numeric num1, Numeric num2,
-									 bool *have_error);
-extern Numeric numeric_div_opt_error(Numeric num1, Numeric num2,
-									 bool *have_error);
-extern Numeric numeric_mod_opt_error(Numeric num1, Numeric num2,
-									 bool *have_error);
-extern int32 numeric_int4_opt_error(Numeric num, bool *have_error);
-extern int64 numeric_int8_opt_error(Numeric num, bool *have_error);
+extern Numeric numeric_add_safe(Numeric num1, Numeric num2, Node *escontext);
+extern Numeric numeric_sub_safe(Numeric num1, Numeric num2, Node *escontext);
+extern Numeric numeric_mul_safe(Numeric num1, Numeric num2, Node *escontext);
+extern Numeric numeric_div_safe(Numeric num1, Numeric num2, Node *escontext);
+extern Numeric numeric_mod_safe(Numeric num1, Numeric num2, Node *escontext);
+extern int32 numeric_int4_safe(Numeric num, Node *escontext);
+extern int64 numeric_int8_safe(Numeric num, Node *escontext);
 
 extern Numeric random_numeric(pg_prng_state *state,
 							  Numeric rmin, Numeric rmax);

From 567d27e8e2b752743626eb259ba75ecdc936eaf3 Mon Sep 17 00:00:00 2001
From: Michael Paquier <michael@paquier.xyz>
Date: Fri, 5 Sep 2025 14:10:08 +0900
Subject: [PATCH 11/73] Fix outdated comments in slru.c

SlruRecentlyUsed() is an inline function since 53c2a97a9266, not a
macro.  The description of long_segment_names was missing at the top of
SimpleLruInit(), part forgotten in 4ed8f0913bfd.

Author: Julien Rouhaud <rjuju123@gmail.com>
Discussion: https://postgr.es/m/aLpBLMOYwEQkaleF@jrouhaud
Backpatch-through: 17
---
 src/backend/access/transam/slru.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c
index d7ebd889aea71..5d3fcd62c9443 100644
--- a/src/backend/access/transam/slru.c
+++ b/src/backend/access/transam/slru.c
@@ -246,6 +246,7 @@ SimpleLruAutotuneBuffers(int divisor, int max)
  * buffer_tranche_id: tranche ID to use for the SLRU's per-buffer LWLocks.
  * bank_tranche_id: tranche ID to use for the bank LWLocks.
  * sync_handler: which set of functions to use to handle sync requests
+ * long_segment_names: use short or long segment names
  */
 void
 SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns,
@@ -644,7 +645,7 @@ SimpleLruReadPage_ReadOnly(SlruCtl ctl, int64 pageno, TransactionId xid)
 			shared->page_number[slotno] == pageno &&
 			shared->page_status[slotno] != SLRU_PAGE_READ_IN_PROGRESS)
 		{
-			/* See comments for SlruRecentlyUsed macro */
+			/* See comments for SlruRecentlyUsed() */
 			SlruRecentlyUsed(shared, slotno);
 
 			/* update the stats counter of pages found in the SLRU */

From 6ede13d1b5f515df0a199a7a830e448dab1511c0 Mon Sep 17 00:00:00 2001
From: Dean Rasheed <dean.a.rasheed@gmail.com>
Date: Fri, 5 Sep 2025 08:18:18 +0100
Subject: [PATCH 12/73] Fix concurrent update issue with MERGE.

When executing a MERGE UPDATE action, if there is more than one
concurrent update of the target row, the lock-and-retry code would
sometimes incorrectly identify the latest version of the target tuple,
leading to incorrect results.

This was caused by using the ctid field from the TM_FailureData
returned by table_tuple_lock() in a case where the result was TM_Ok,
which is unsafe because the TM_FailureData struct is not guaranteed to
be fully populated in that case. Instead, it should use the tupleid
passed to (and updated by) table_tuple_lock().

To reduce the chances of similar errors in the future, improve the
commentary for table_tuple_lock() and TM_FailureData to make it
clearer that table_tuple_lock() updates the tid passed to it, and most
fields of TM_FailureData should not be relied on in non-failure cases.
An exception to this is the "traversed" field, which is set in both
success and failure cases.

Reported-by: Dmitry <dsy.075@yandex.ru>
Author: Yugo Nagata <nagata@sraoss.co.jp>
Reviewed-by: Dean Rasheed <dean.a.rasheed@gmail.com>
Reviewed-by: Chao Li <li.evan.chao@gmail.com>
Discussion: https://postgr.es/m/1570d30e-2b95-4239-b9c3-f7bf2f2f8556@yandex.ru
Backpatch-through: 15
---
 src/backend/executor/nodeModifyTable.c        |   9 +-
 src/include/access/tableam.h                  |  15 +-
 .../expected/merge-match-recheck.out          | 145 ++++++++++++++++++
 .../isolation/specs/merge-match-recheck.spec  |  18 +++
 4 files changed, 179 insertions(+), 8 deletions(-)

diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c
index b0c4e2c0d32a4..4c5647ac38a1c 100644
--- a/src/backend/executor/nodeModifyTable.c
+++ b/src/backend/executor/nodeModifyTable.c
@@ -3402,7 +3402,7 @@ ExecMergeMatched(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
 							 * the tuple moved, and setting our current
 							 * resultRelInfo to that.
 							 */
-							if (ItemPointerIndicatesMovedPartitions(&context->tmfd.ctid))
+							if (ItemPointerIndicatesMovedPartitions(tupleid))
 								ereport(ERROR,
 										(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
 										 errmsg("tuple to be merged was already moved to another partition due to concurrent update")));
@@ -3450,12 +3450,13 @@ ExecMergeMatched(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
 									if (ItemPointerIsValid(&lockedtid))
 										UnlockTuple(resultRelInfo->ri_RelationDesc, &lockedtid,
 													InplaceUpdateTupleLock);
-									LockTuple(resultRelInfo->ri_RelationDesc, &context->tmfd.ctid,
+									LockTuple(resultRelInfo->ri_RelationDesc, tupleid,
 											  InplaceUpdateTupleLock);
-									lockedtid = context->tmfd.ctid;
+									lockedtid = *tupleid;
 								}
+
 								if (!table_tuple_fetch_row_version(resultRelationDesc,
-																   &context->tmfd.ctid,
+																   tupleid,
 																   SnapshotAny,
 																   resultRelInfo->ri_oldTupleSlot))
 									elog(ERROR, "failed to fetch the target tuple");
diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h
index 1c9e802a6b128..b2ce35e2a3407 100644
--- a/src/include/access/tableam.h
+++ b/src/include/access/tableam.h
@@ -121,7 +121,9 @@ typedef enum TU_UpdateIndexes
 /*
  * When table_tuple_update, table_tuple_delete, or table_tuple_lock fail
  * because the target tuple is already outdated, they fill in this struct to
- * provide information to the caller about what happened.
+ * provide information to the caller about what happened. When those functions
+ * succeed, the contents of this struct should not be relied upon, except for
+ * `traversed`, which may be set in both success and failure cases.
  *
  * ctid is the target's ctid link: it is the same as the target's TID if the
  * target was deleted, or the location of the replacement tuple if the target
@@ -137,6 +139,9 @@ typedef enum TU_UpdateIndexes
  * tuple); otherwise cmax is zero.  (We make this restriction because
  * HeapTupleHeaderGetCmax doesn't work for tuples outdated in other
  * transactions.)
+ *
+ * traversed indicates if an update chain was followed in order to try to lock
+ * the target tuple.  (This may be set in both success and failure cases.)
  */
 typedef struct TM_FailureData
 {
@@ -1508,7 +1513,7 @@ table_tuple_update(Relation rel, ItemPointer otid, TupleTableSlot *slot,
  *
  * Input parameters:
  *	relation: relation containing tuple (caller must hold suitable lock)
- *	tid: TID of tuple to lock
+ *	tid: TID of tuple to lock (updated if an update chain was followed)
  *	snapshot: snapshot to use for visibility determinations
  *	cid: current command ID (used for visibility test, and stored into
  *		tuple's cmax if lock is successful)
@@ -1533,8 +1538,10 @@ table_tuple_update(Relation rel, ItemPointer otid, TupleTableSlot *slot,
  *	TM_WouldBlock: lock couldn't be acquired and wait_policy is skip
  *
  * In the failure cases other than TM_Invisible and TM_Deleted, the routine
- * fills *tmfd with the tuple's t_ctid, t_xmax, and, if possible, t_cmax.  See
- * comments for struct TM_FailureData for additional info.
+ * fills *tmfd with the tuple's t_ctid, t_xmax, and, if possible, t_cmax.
+ * Additionally, in both success and failure cases, tmfd->traversed is set if
+ * an update chain was followed.  See comments for struct TM_FailureData for
+ * additional info.
  */
 static inline TM_Result
 table_tuple_lock(Relation rel, ItemPointer tid, Snapshot snapshot,
diff --git a/src/test/isolation/expected/merge-match-recheck.out b/src/test/isolation/expected/merge-match-recheck.out
index 90300f1db5ab3..4250b85af2d3c 100644
--- a/src/test/isolation/expected/merge-match-recheck.out
+++ b/src/test/isolation/expected/merge-match-recheck.out
@@ -271,6 +271,151 @@ key|balance|status|val
 
 step c1: COMMIT;
 
+starting permutation: update1 update6 merge_bal c2 select1 c1
+step update1: UPDATE target t SET balance = balance + 10, val = t.val || ' updated by update1' WHERE t.key = 1;
+step update6: UPDATE target t SET balance = balance - 100, val = t.val || ' updated by update6' WHERE t.key = 1;
+step merge_bal: 
+  MERGE INTO target t
+  USING (SELECT 1 as key) s
+  ON s.key = t.key
+  WHEN MATCHED AND balance < 100 THEN
+	UPDATE SET balance = balance * 2, val = t.val || ' when1'
+  WHEN MATCHED AND balance < 200 THEN
+	UPDATE SET balance = balance * 4, val = t.val || ' when2'
+  WHEN MATCHED AND balance < 300 THEN
+	UPDATE SET balance = balance * 8, val = t.val || ' when3';
+ <waiting ...>
+step c2: COMMIT;
+step merge_bal: <... completed>
+step select1: SELECT * FROM target;
+key|balance|status|val                                              
+---+-------+------+-------------------------------------------------
+  1|    140|s1    |setup updated by update1 updated by update6 when1
+(1 row)
+
+step c1: COMMIT;
+
+starting permutation: update1_pa update6_pa merge_bal_pa c2 select1_pa c1
+step update1_pa: UPDATE target_pa t SET balance = balance + 10, val = t.val || ' updated by update1_pa' WHERE t.key = 1;
+step update6_pa: UPDATE target_pa t SET balance = balance - 100, val = t.val || ' updated by update6_pa' WHERE t.key = 1;
+step merge_bal_pa: 
+  MERGE INTO target_pa t
+  USING (SELECT 1 as key) s
+  ON s.key = t.key
+  WHEN MATCHED AND balance < 100 THEN
+	UPDATE SET balance = balance * 2, val = t.val || ' when1'
+  WHEN MATCHED AND balance < 200 THEN
+	UPDATE SET balance = balance * 4, val = t.val || ' when2'
+  WHEN MATCHED AND balance < 300 THEN
+	UPDATE SET balance = balance * 8, val = t.val || ' when3';
+ <waiting ...>
+step c2: COMMIT;
+step merge_bal_pa: <... completed>
+step select1_pa: SELECT * FROM target_pa;
+key|balance|status|val                                                    
+---+-------+------+-------------------------------------------------------
+  1|    140|s1    |setup updated by update1_pa updated by update6_pa when1
+(1 row)
+
+step c1: COMMIT;
+
+starting permutation: update1_tg update6_tg merge_bal_tg c2 select1_tg c1
+s2: NOTICE:  Update: (1,160,s1,setup) -> (1,170,s1,"setup updated by update1_tg")
+step update1_tg: UPDATE target_tg t SET balance = balance + 10, val = t.val || ' updated by update1_tg' WHERE t.key = 1;
+s2: NOTICE:  Update: (1,170,s1,"setup updated by update1_tg") -> (1,70,s1,"setup updated by update1_tg updated by update6_tg")
+step update6_tg: UPDATE target_tg t SET balance = balance - 100, val = t.val || ' updated by update6_tg' WHERE t.key = 1;
+step merge_bal_tg: 
+  WITH t AS (
+    MERGE INTO target_tg t
+    USING (SELECT 1 as key) s
+    ON s.key = t.key
+    WHEN MATCHED AND balance < 100 THEN
+      UPDATE SET balance = balance * 2, val = t.val || ' when1'
+    WHEN MATCHED AND balance < 200 THEN
+      UPDATE SET balance = balance * 4, val = t.val || ' when2'
+    WHEN MATCHED AND balance < 300 THEN
+      UPDATE SET balance = balance * 8, val = t.val || ' when3'
+    RETURNING t.*
+  )
+  SELECT * FROM t;
+ <waiting ...>
+step c2: COMMIT;
+s1: NOTICE:  Update: (1,70,s1,"setup updated by update1_tg updated by update6_tg") -> (1,140,s1,"setup updated by update1_tg updated by update6_tg when1")
+step merge_bal_tg: <... completed>
+key|balance|status|val                                                    
+---+-------+------+-------------------------------------------------------
+  1|    140|s1    |setup updated by update1_tg updated by update6_tg when1
+(1 row)
+
+step select1_tg: SELECT * FROM target_tg;
+key|balance|status|val                                                    
+---+-------+------+-------------------------------------------------------
+  1|    140|s1    |setup updated by update1_tg updated by update6_tg when1
+(1 row)
+
+step c1: COMMIT;
+
+starting permutation: update7 update6 merge_bal c2 select1 c1
+step update7: UPDATE target t SET balance = 350, val = t.val || ' updated by update7' WHERE t.key = 1;
+step update6: UPDATE target t SET balance = balance - 100, val = t.val || ' updated by update6' WHERE t.key = 1;
+step merge_bal: 
+  MERGE INTO target t
+  USING (SELECT 1 as key) s
+  ON s.key = t.key
+  WHEN MATCHED AND balance < 100 THEN
+	UPDATE SET balance = balance * 2, val = t.val || ' when1'
+  WHEN MATCHED AND balance < 200 THEN
+	UPDATE SET balance = balance * 4, val = t.val || ' when2'
+  WHEN MATCHED AND balance < 300 THEN
+	UPDATE SET balance = balance * 8, val = t.val || ' when3';
+ <waiting ...>
+step c2: COMMIT;
+step merge_bal: <... completed>
+step select1: SELECT * FROM target;
+key|balance|status|val                                              
+---+-------+------+-------------------------------------------------
+  1|   2000|s1    |setup updated by update7 updated by update6 when3
+(1 row)
+
+step c1: COMMIT;
+
+starting permutation: update1_pa_move merge_bal_pa c2 c1
+step update1_pa_move: UPDATE target_pa t SET balance = 210, val = t.val || ' updated by update1_pa_move' WHERE t.key = 1;
+step merge_bal_pa: 
+  MERGE INTO target_pa t
+  USING (SELECT 1 as key) s
+  ON s.key = t.key
+  WHEN MATCHED AND balance < 100 THEN
+	UPDATE SET balance = balance * 2, val = t.val || ' when1'
+  WHEN MATCHED AND balance < 200 THEN
+	UPDATE SET balance = balance * 4, val = t.val || ' when2'
+  WHEN MATCHED AND balance < 300 THEN
+	UPDATE SET balance = balance * 8, val = t.val || ' when3';
+ <waiting ...>
+step c2: COMMIT;
+step merge_bal_pa: <... completed>
+ERROR:  tuple to be locked was already moved to another partition due to concurrent update
+step c1: COMMIT;
+
+starting permutation: update1_pa update1_pa_move merge_bal_pa c2 c1
+step update1_pa: UPDATE target_pa t SET balance = balance + 10, val = t.val || ' updated by update1_pa' WHERE t.key = 1;
+step update1_pa_move: UPDATE target_pa t SET balance = 210, val = t.val || ' updated by update1_pa_move' WHERE t.key = 1;
+step merge_bal_pa: 
+  MERGE INTO target_pa t
+  USING (SELECT 1 as key) s
+  ON s.key = t.key
+  WHEN MATCHED AND balance < 100 THEN
+	UPDATE SET balance = balance * 2, val = t.val || ' when1'
+  WHEN MATCHED AND balance < 200 THEN
+	UPDATE SET balance = balance * 4, val = t.val || ' when2'
+  WHEN MATCHED AND balance < 300 THEN
+	UPDATE SET balance = balance * 8, val = t.val || ' when3';
+ <waiting ...>
+step c2: COMMIT;
+step merge_bal_pa: <... completed>
+ERROR:  tuple to be locked was already moved to another partition due to concurrent update
+step c1: COMMIT;
+
 starting permutation: update1 merge_delete c2 select1 c1
 step update1: UPDATE target t SET balance = balance + 10, val = t.val || ' updated by update1' WHERE t.key = 1;
 step merge_delete: 
diff --git a/src/test/isolation/specs/merge-match-recheck.spec b/src/test/isolation/specs/merge-match-recheck.spec
index 15226e40c9efc..6e7a776d17e5a 100644
--- a/src/test/isolation/specs/merge-match-recheck.spec
+++ b/src/test/isolation/specs/merge-match-recheck.spec
@@ -146,6 +146,8 @@ setup
   BEGIN ISOLATION LEVEL READ COMMITTED;
 }
 step "update1" { UPDATE target t SET balance = balance + 10, val = t.val || ' updated by update1' WHERE t.key = 1; }
+step "update1_pa" { UPDATE target_pa t SET balance = balance + 10, val = t.val || ' updated by update1_pa' WHERE t.key = 1; }
+step "update1_pa_move" { UPDATE target_pa t SET balance = 210, val = t.val || ' updated by update1_pa_move' WHERE t.key = 1; }
 step "update1_tg" { UPDATE target_tg t SET balance = balance + 10, val = t.val || ' updated by update1_tg' WHERE t.key = 1; }
 step "update2" { UPDATE target t SET status = 's2', val = t.val || ' updated by update2' WHERE t.key = 1; }
 step "update2_tg" { UPDATE target_tg t SET status = 's2', val = t.val || ' updated by update2_tg' WHERE t.key = 1; }
@@ -153,6 +155,10 @@ step "update3" { UPDATE target t SET status = 's3', val = t.val || ' updated by
 step "update3_tg" { UPDATE target_tg t SET status = 's3', val = t.val || ' updated by update3_tg' WHERE t.key = 1; }
 step "update5" { UPDATE target t SET status = 's5', val = t.val || ' updated by update5' WHERE t.key = 1; }
 step "update5_tg" { UPDATE target_tg t SET status = 's5', val = t.val || ' updated by update5_tg' WHERE t.key = 1; }
+step "update6" { UPDATE target t SET balance = balance - 100, val = t.val || ' updated by update6' WHERE t.key = 1; }
+step "update6_pa" { UPDATE target_pa t SET balance = balance - 100, val = t.val || ' updated by update6_pa' WHERE t.key = 1; }
+step "update6_tg" { UPDATE target_tg t SET balance = balance - 100, val = t.val || ' updated by update6_tg' WHERE t.key = 1; }
+step "update7" { UPDATE target t SET balance = 350, val = t.val || ' updated by update7' WHERE t.key = 1; }
 step "update_bal1" { UPDATE target t SET balance = 50, val = t.val || ' updated by update_bal1' WHERE t.key = 1; }
 step "update_bal1_pa" { UPDATE target_pa t SET balance = 50, val = t.val || ' updated by update_bal1_pa' WHERE t.key = 1; }
 step "update_bal1_tg" { UPDATE target_tg t SET balance = 50, val = t.val || ' updated by update_bal1_tg' WHERE t.key = 1; }
@@ -179,6 +185,18 @@ permutation "update_bal1" "merge_bal" "c2" "select1" "c1"
 permutation "update_bal1_pa" "merge_bal_pa" "c2" "select1_pa" "c1"
 permutation "update_bal1_tg" "merge_bal_tg" "c2" "select1_tg" "c1"
 
+# merge_bal sees row concurrently updated twice and rechecks WHEN conditions, different check passes, so final balance = 140
+permutation "update1" "update6" "merge_bal" "c2" "select1" "c1"
+permutation "update1_pa" "update6_pa" "merge_bal_pa" "c2" "select1_pa" "c1"
+permutation "update1_tg" "update6_tg" "merge_bal_tg" "c2" "select1_tg" "c1"
+
+# merge_bal sees row concurrently updated twice, first update would cause all checks to fail, second update causes different check to pass, so final balance = 2000
+permutation "update7" "update6" "merge_bal" "c2" "select1" "c1"
+
+# merge_bal sees concurrently updated row moved to new partition, so fails
+permutation "update1_pa_move" "merge_bal_pa" "c2" "c1"
+permutation "update1_pa" "update1_pa_move" "merge_bal_pa" "c2" "c1"
+
 # merge_delete sees concurrently updated row and rechecks WHEN conditions, but recheck passes and row is deleted
 permutation "update1" "merge_delete" "c2" "select1" "c1"
 permutation "update1_tg" "merge_delete_tg" "c2" "select1_tg" "c1"

From e3d5ddb7ca91e5982e9d4cff9eef210d97e4f47e Mon Sep 17 00:00:00 2001
From: Melanie Plageman <melanieplageman@gmail.com>
Date: Fri, 5 Sep 2025 09:33:36 -0400
Subject: [PATCH 13/73] Add assert and log message to visibilitymap_set

Add an assert to visibilitymap_set() that the provided heap buffer is
exclusively locked, which is expected.

Also, enhance the debug logging message to specify which VM flags were
set.

Based on a related suggestion by Kirill Reshke on an in-progress
patchset.

Author: Melanie Plageman <melanieplageman@gmail.com>
Reviewed-by: Kirill Reshke <reshkekirill@gmail.com>
Reviewed-by: Andres Freund <andres@anarazel.de>
Discussion: https://postgr.es/m/CALdSSPhAU56g1gGVT0%2BwG8RrSWE6qW8TOfNJS1HNAWX6wPgbFA%40mail.gmail.com
---
 src/backend/access/heap/visibilitymap.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/backend/access/heap/visibilitymap.c b/src/backend/access/heap/visibilitymap.c
index 953ad4a484399..7306c16f05cd3 100644
--- a/src/backend/access/heap/visibilitymap.c
+++ b/src/backend/access/heap/visibilitymap.c
@@ -255,7 +255,8 @@ visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf,
 	uint8		status;
 
 #ifdef TRACE_VISIBILITYMAP
-	elog(DEBUG1, "vm_set %s %d", RelationGetRelationName(rel), heapBlk);
+	elog(DEBUG1, "vm_set flags 0x%02X for %s %d",
+		 flags, RelationGetRelationName(rel), heapBlk);
 #endif
 
 	Assert(InRecovery || XLogRecPtrIsInvalid(recptr));
@@ -269,6 +270,8 @@ visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf,
 	if (BufferIsValid(heapBuf) && BufferGetBlockNumber(heapBuf) != heapBlk)
 		elog(ERROR, "wrong heap buffer passed to visibilitymap_set");
 
+	Assert(!BufferIsValid(heapBuf) || BufferIsExclusiveLocked(heapBuf));
+
 	/* Check that we have the right VM page pinned */
 	if (!BufferIsValid(vmBuf) || BufferGetBlockNumber(vmBuf) != mapBlock)
 		elog(ERROR, "wrong VM buffer passed to visibilitymap_set");

From 50e4c6ace5e69fbe69868c270a1c76acd4cb12ec Mon Sep 17 00:00:00 2001
From: Andres Freund <andres@anarazel.de>
Date: Fri, 5 Sep 2025 12:25:59 -0400
Subject: [PATCH 14/73] bufmgr: Use consistent naming of the clock-sweep
 algorithm

Minor edits to comments only.

Author: Greg Burd <greg@burd.me>
Reviewed-by: Tomas Vondra <tomas@vondra.me>
Reviewed-by: Andres Freund <andres@anarazel.de>
Discussion: https://postgr.es/m/70C6A5B5-2A20-4D0B-BC73-EB09DD62D61C@getmailspring.com
---
 src/backend/storage/buffer/README     |  4 ++--
 src/backend/storage/buffer/bufmgr.c   |  8 ++++----
 src/backend/storage/buffer/freelist.c | 10 +++++-----
 src/backend/storage/buffer/localbuf.c |  2 +-
 src/include/storage/buf_internals.h   |  4 ++--
 5 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/src/backend/storage/buffer/README b/src/backend/storage/buffer/README
index a182fcd660ccb..4b13da5d7add8 100644
--- a/src/backend/storage/buffer/README
+++ b/src/backend/storage/buffer/README
@@ -211,9 +211,9 @@ Buffer Ring Replacement Strategy
 When running a query that needs to access a large number of pages just once,
 such as VACUUM or a large sequential scan, a different strategy is used.
 A page that has been touched only by such a scan is unlikely to be needed
-again soon, so instead of running the normal clock sweep algorithm and
+again soon, so instead of running the normal clock-sweep algorithm and
 blowing out the entire buffer cache, a small ring of buffers is allocated
-using the normal clock sweep algorithm and those buffers are reused for the
+using the normal clock-sweep algorithm and those buffers are reused for the
 whole scan.  This also implies that much of the write traffic caused by such
 a statement will be done by the backend itself and not pushed off onto other
 processes.
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 350cc0402aa8f..9fc906a4a4082 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -3608,7 +3608,7 @@ BufferSync(int flags)
  * This is called periodically by the background writer process.
  *
  * Returns true if it's appropriate for the bgwriter process to go into
- * low-power hibernation mode.  (This happens if the strategy clock sweep
+ * low-power hibernation mode.  (This happens if the strategy clock-sweep
  * has been "lapped" and no buffer allocations have occurred recently,
  * or if the bgwriter has been effectively disabled by setting
  * bgwriter_lru_maxpages to 0.)
@@ -3658,7 +3658,7 @@ BgBufferSync(WritebackContext *wb_context)
 	uint32		new_recent_alloc;
 
 	/*
-	 * Find out where the freelist clock sweep currently is, and how many
+	 * Find out where the freelist clock-sweep currently is, and how many
 	 * buffer allocations have happened since our last call.
 	 */
 	strategy_buf_id = StrategySyncStart(&strategy_passes, &recent_alloc);
@@ -3679,8 +3679,8 @@ BgBufferSync(WritebackContext *wb_context)
 
 	/*
 	 * Compute strategy_delta = how many buffers have been scanned by the
-	 * clock sweep since last time.  If first time through, assume none. Then
-	 * see if we are still ahead of the clock sweep, and if so, how many
+	 * clock-sweep since last time.  If first time through, assume none. Then
+	 * see if we are still ahead of the clock-sweep, and if so, how many
 	 * buffers we could scan before we'd catch up with it and "lap" it. Note:
 	 * weird-looking coding of xxx_passes comparisons are to avoid bogus
 	 * behavior when the passes counts wrap around.
diff --git a/src/backend/storage/buffer/freelist.c b/src/backend/storage/buffer/freelist.c
index 01909be027258..cd94a7d8a7b39 100644
--- a/src/backend/storage/buffer/freelist.c
+++ b/src/backend/storage/buffer/freelist.c
@@ -33,7 +33,7 @@ typedef struct
 	slock_t		buffer_strategy_lock;
 
 	/*
-	 * Clock sweep hand: index of next buffer to consider grabbing. Note that
+	 * clock-sweep hand: index of next buffer to consider grabbing. Note that
 	 * this isn't a concrete buffer - we only ever increase the value. So, to
 	 * get an actual buffer, it needs to be used modulo NBuffers.
 	 */
@@ -51,7 +51,7 @@ typedef struct
 	 * Statistics.  These counters should be wide enough that they can't
 	 * overflow during a single bgwriter cycle.
 	 */
-	uint32		completePasses; /* Complete cycles of the clock sweep */
+	uint32		completePasses; /* Complete cycles of the clock-sweep */
 	pg_atomic_uint32 numBufferAllocs;	/* Buffers allocated since last reset */
 
 	/*
@@ -311,7 +311,7 @@ StrategyGetBuffer(BufferAccessStrategy strategy, uint32 *buf_state, bool *from_r
 		}
 	}
 
-	/* Nothing on the freelist, so run the "clock sweep" algorithm */
+	/* Nothing on the freelist, so run the "clock-sweep" algorithm */
 	trycounter = NBuffers;
 	for (;;)
 	{
@@ -511,7 +511,7 @@ StrategyInitialize(bool init)
 		StrategyControl->firstFreeBuffer = 0;
 		StrategyControl->lastFreeBuffer = NBuffers - 1;
 
-		/* Initialize the clock sweep pointer */
+		/* Initialize the clock-sweep pointer */
 		pg_atomic_init_u32(&StrategyControl->nextVictimBuffer, 0);
 
 		/* Clear statistics */
@@ -759,7 +759,7 @@ GetBufferFromRing(BufferAccessStrategy strategy, uint32 *buf_state)
 	 *
 	 * If usage_count is 0 or 1 then the buffer is fair game (we expect 1,
 	 * since our own previous usage of the ring element would have left it
-	 * there, but it might've been decremented by clock sweep since then). A
+	 * there, but it might've been decremented by clock-sweep since then). A
 	 * higher usage_count indicates someone else has touched the buffer, so we
 	 * shouldn't re-use it.
 	 */
diff --git a/src/backend/storage/buffer/localbuf.c b/src/backend/storage/buffer/localbuf.c
index 3c0d20f4659d2..04fef13409b02 100644
--- a/src/backend/storage/buffer/localbuf.c
+++ b/src/backend/storage/buffer/localbuf.c
@@ -229,7 +229,7 @@ GetLocalVictimBuffer(void)
 	ResourceOwnerEnlarge(CurrentResourceOwner);
 
 	/*
-	 * Need to get a new buffer.  We use a clock sweep algorithm (essentially
+	 * Need to get a new buffer.  We use a clock-sweep algorithm (essentially
 	 * the same as what freelist.c does now...)
 	 */
 	trycounter = NLocBuffer;
diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h
index 52a71b138f736..3a210c710f633 100644
--- a/src/include/storage/buf_internals.h
+++ b/src/include/storage/buf_internals.h
@@ -80,8 +80,8 @@ StaticAssertDecl(BUF_REFCOUNT_BITS + BUF_USAGECOUNT_BITS + BUF_FLAG_BITS == 32,
  * The maximum allowed value of usage_count represents a tradeoff between
  * accuracy and speed of the clock-sweep buffer management algorithm.  A
  * large value (comparable to NBuffers) would approximate LRU semantics.
- * But it can take as many as BM_MAX_USAGE_COUNT+1 complete cycles of
- * clock sweeps to find a free buffer, so in practice we don't want the
+ * But it can take as many as BM_MAX_USAGE_COUNT+1 complete cycles of the
+ * clock-sweep hand to find a free buffer, so in practice we don't want the
  * value to be very large.
  */
 #define BM_MAX_USAGE_COUNT	5

From 2c789405275928ce0d2ceb7c4add91d27df92502 Mon Sep 17 00:00:00 2001
From: Andres Freund <andres@anarazel.de>
Date: Fri, 5 Sep 2025 12:25:59 -0400
Subject: [PATCH 15/73] bufmgr: Remove freelist, always use clock-sweep

This set of changes removes the list of available buffers and instead simply
uses the clock-sweep algorithm to find and return an available buffer.  This
also removes the have_free_buffer() function and simply caps the
pg_autoprewarm process to at most NBuffers.

While on the surface this appears to be removing an optimization it is in fact
eliminating code that induces overhead in the form of synchronization that is
problematic for multi-core systems.

The main reason for removing the freelist, however, is not the moderate
improvement in scalability, but that having the freelist would require
dedicated complexity in several upcoming patches. As we have not been able to
find a case benefiting from the freelist...

Author: Greg Burd <greg@burd.me>
Reviewed-by: Tomas Vondra <tomas@vondra.me>
Reviewed-by: Andres Freund <andres@anarazel.de>
Discussion: https://postgr.es/m/70C6A5B5-2A20-4D0B-BC73-EB09DD62D61C@getmailspring.com
---
 contrib/pg_prewarm/autoprewarm.c      |  30 +++----
 src/backend/storage/buffer/README     |  40 +++------
 src/backend/storage/buffer/buf_init.c |   9 --
 src/backend/storage/buffer/bufmgr.c   |  29 +------
 src/backend/storage/buffer/freelist.c | 119 +-------------------------
 src/include/storage/buf_internals.h   |  13 +--
 6 files changed, 31 insertions(+), 209 deletions(-)

diff --git a/contrib/pg_prewarm/autoprewarm.c b/contrib/pg_prewarm/autoprewarm.c
index 880e897796a1e..8b68dafc2611c 100644
--- a/contrib/pg_prewarm/autoprewarm.c
+++ b/contrib/pg_prewarm/autoprewarm.c
@@ -370,6 +370,15 @@ apw_load_buffers(void)
 	apw_state->prewarm_start_idx = apw_state->prewarm_stop_idx = 0;
 	apw_state->prewarmed_blocks = 0;
 
+	/* Don't prewarm more than we can fit. */
+	if (num_elements > NBuffers)
+	{
+		num_elements = NBuffers;
+		ereport(LOG,
+				(errmsg("autoprewarm capping prewarmed blocks to %d (shared_buffers size)",
+						NBuffers)));
+	}
+
 	/* Get the info position of the first block of the next database. */
 	while (apw_state->prewarm_start_idx < num_elements)
 	{
@@ -410,10 +419,6 @@ apw_load_buffers(void)
 		apw_state->database = current_db;
 		Assert(apw_state->prewarm_start_idx < apw_state->prewarm_stop_idx);
 
-		/* If we've run out of free buffers, don't launch another worker. */
-		if (!have_free_buffer())
-			break;
-
 		/*
 		 * Likewise, don't launch if we've already been told to shut down.
 		 * (The launch would fail anyway, but we might as well skip it.)
@@ -462,12 +467,6 @@ apw_read_stream_next_block(ReadStream *stream,
 	{
 		BlockInfoRecord blk = p->block_info[p->pos];
 
-		if (!have_free_buffer())
-		{
-			p->pos = apw_state->prewarm_stop_idx;
-			return InvalidBlockNumber;
-		}
-
 		if (blk.tablespace != p->tablespace)
 			return InvalidBlockNumber;
 
@@ -523,10 +522,10 @@ autoprewarm_database_main(Datum main_arg)
 	blk = block_info[i];
 
 	/*
-	 * Loop until we run out of blocks to prewarm or until we run out of free
+	 * Loop until we run out of blocks to prewarm or until we run out of
 	 * buffers.
 	 */
-	while (i < apw_state->prewarm_stop_idx && have_free_buffer())
+	while (i < apw_state->prewarm_stop_idx)
 	{
 		Oid			tablespace = blk.tablespace;
 		RelFileNumber filenumber = blk.filenumber;
@@ -568,14 +567,13 @@ autoprewarm_database_main(Datum main_arg)
 
 		/*
 		 * We have a relation; now let's loop until we find a valid fork of
-		 * the relation or we run out of free buffers. Once we've read from
-		 * all valid forks or run out of options, we'll close the relation and
+		 * the relation or we run out of buffers. Once we've read from all
+		 * valid forks or run out of options, we'll close the relation and
 		 * move on.
 		 */
 		while (i < apw_state->prewarm_stop_idx &&
 			   blk.tablespace == tablespace &&
-			   blk.filenumber == filenumber &&
-			   have_free_buffer())
+			   blk.filenumber == filenumber)
 		{
 			ForkNumber	forknum = blk.forknum;
 			BlockNumber nblocks;
diff --git a/src/backend/storage/buffer/README b/src/backend/storage/buffer/README
index 4b13da5d7add8..119f31b5d6584 100644
--- a/src/backend/storage/buffer/README
+++ b/src/backend/storage/buffer/README
@@ -128,11 +128,11 @@ independently.  If it is necessary to lock more than one partition at a time,
 they must be locked in partition-number order to avoid risk of deadlock.
 
 * A separate system-wide spinlock, buffer_strategy_lock, provides mutual
-exclusion for operations that access the buffer free list or select
-buffers for replacement.  A spinlock is used here rather than a lightweight
-lock for efficiency; no other locks of any sort should be acquired while
-buffer_strategy_lock is held.  This is essential to allow buffer replacement
-to happen in multiple backends with reasonable concurrency.
+exclusion for operations that select buffers for replacement.  A spinlock is
+used here rather than a lightweight lock for efficiency; no other locks of any
+sort should be acquired while buffer_strategy_lock is held.  This is essential
+to allow buffer replacement to happen in multiple backends with reasonable
+concurrency.
 
 * Each buffer header contains a spinlock that must be taken when examining
 or changing fields of that buffer header.  This allows operations such as
@@ -158,18 +158,8 @@ unset by sleeping on the buffer's condition variable.
 Normal Buffer Replacement Strategy
 ----------------------------------
 
-There is a "free list" of buffers that are prime candidates for replacement.
-In particular, buffers that are completely free (contain no valid page) are
-always in this list.  We could also throw buffers into this list if we
-consider their pages unlikely to be needed soon; however, the current
-algorithm never does that.  The list is singly-linked using fields in the
-buffer headers; we maintain head and tail pointers in global variables.
-(Note: although the list links are in the buffer headers, they are
-considered to be protected by the buffer_strategy_lock, not the buffer-header
-spinlocks.)  To choose a victim buffer to recycle when there are no free
-buffers available, we use a simple clock-sweep algorithm, which avoids the
-need to take system-wide locks during common operations.  It works like
-this:
+To choose a victim buffer to recycle we use a simple clock-sweep algorithm.  It
+works like this:
 
 Each buffer header contains a usage counter, which is incremented (up to a
 small limit value) whenever the buffer is pinned.  (This requires only the
@@ -184,20 +174,14 @@ The algorithm for a process that needs to obtain a victim buffer is:
 
 1. Obtain buffer_strategy_lock.
 
-2. If buffer free list is nonempty, remove its head buffer.  Release
-buffer_strategy_lock.  If the buffer is pinned or has a nonzero usage count,
-it cannot be used; ignore it go back to step 1.  Otherwise, pin the buffer,
-and return it.
+2. Select the buffer pointed to by nextVictimBuffer, and circularly advance
+nextVictimBuffer for next time. Release buffer_strategy_lock.
 
-3. Otherwise, the buffer free list is empty.  Select the buffer pointed to by
-nextVictimBuffer, and circularly advance nextVictimBuffer for next time.
-Release buffer_strategy_lock.
-
-4. If the selected buffer is pinned or has a nonzero usage count, it cannot
+3. If the selected buffer is pinned or has a nonzero usage count, it cannot
 be used.  Decrement its usage count (if nonzero), reacquire
 buffer_strategy_lock, and return to step 3 to examine the next buffer.
 
-5. Pin the selected buffer, and return.
+4. Pin the selected buffer, and return.
 
 (Note that if the selected buffer is dirty, we will have to write it out
 before we can recycle it; if someone else pins the buffer meanwhile we will
@@ -234,7 +218,7 @@ the ring strategy effectively degrades to the normal strategy.
 
 VACUUM uses a ring like sequential scans, however, the size of this ring is
 controlled by the vacuum_buffer_usage_limit GUC.  Dirty pages are not removed
-from the ring.  Instead, WAL is flushed if needed to allow reuse of the
+from the ring.  Instead, the WAL is flushed if needed to allow reuse of the
 buffers.  Before introducing the buffer ring strategy in 8.3, VACUUM's buffers
 were sent to the freelist, which was effectively a buffer ring of 1 buffer,
 resulting in excessive WAL flushing.
diff --git a/src/backend/storage/buffer/buf_init.c b/src/backend/storage/buffer/buf_init.c
index ed1dc488a42b4..6fd3a6bbac5ea 100644
--- a/src/backend/storage/buffer/buf_init.c
+++ b/src/backend/storage/buffer/buf_init.c
@@ -128,20 +128,11 @@ BufferManagerShmemInit(void)
 
 			pgaio_wref_clear(&buf->io_wref);
 
-			/*
-			 * Initially link all the buffers together as unused. Subsequent
-			 * management of this list is done by freelist.c.
-			 */
-			buf->freeNext = i + 1;
-
 			LWLockInitialize(BufferDescriptorGetContentLock(buf),
 							 LWTRANCHE_BUFFER_CONTENT);
 
 			ConditionVariableInit(BufferDescriptorGetIOCV(buf));
 		}
-
-		/* Correct last entry of linked list */
-		GetBufferDescriptor(NBuffers - 1)->freeNext = FREENEXT_END_OF_LIST;
 	}
 
 	/* Init other shared buffer-management stuff */
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 9fc906a4a4082..fe470de63f20c 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -2094,12 +2094,6 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 		 */
 		UnpinBuffer(victim_buf_hdr);
 
-		/*
-		 * The victim buffer we acquired previously is clean and unused, let
-		 * it be found again quickly
-		 */
-		StrategyFreeBuffer(victim_buf_hdr);
-
 		/* remaining code should match code at top of routine */
 
 		existing_buf_hdr = GetBufferDescriptor(existing_buf_id);
@@ -2158,8 +2152,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 }
 
 /*
- * InvalidateBuffer -- mark a shared buffer invalid and return it to the
- * freelist.
+ * InvalidateBuffer -- mark a shared buffer invalid.
  *
  * The buffer header spinlock must be held at entry.  We drop it before
  * returning.  (This is sane because the caller must have locked the
@@ -2257,11 +2250,6 @@ InvalidateBuffer(BufferDesc *buf)
 	 * Done with mapping lock.
 	 */
 	LWLockRelease(oldPartitionLock);
-
-	/*
-	 * Insert the buffer at the head of the list of free buffers.
-	 */
-	StrategyFreeBuffer(buf);
 }
 
 /*
@@ -2679,11 +2667,6 @@ ExtendBufferedRelShared(BufferManagerRelation bmr,
 		{
 			BufferDesc *buf_hdr = GetBufferDescriptor(buffers[i] - 1);
 
-			/*
-			 * The victim buffer we acquired previously is clean and unused,
-			 * let it be found again quickly
-			 */
-			StrategyFreeBuffer(buf_hdr);
 			UnpinBuffer(buf_hdr);
 		}
 
@@ -2756,12 +2739,6 @@ ExtendBufferedRelShared(BufferManagerRelation bmr,
 			valid = PinBuffer(existing_hdr, strategy);
 
 			LWLockRelease(partition_lock);
-
-			/*
-			 * The victim buffer we acquired previously is clean and unused,
-			 * let it be found again quickly
-			 */
-			StrategyFreeBuffer(victim_buf_hdr);
 			UnpinBuffer(victim_buf_hdr);
 
 			buffers[i] = BufferDescriptorGetBuffer(existing_hdr);
@@ -3658,8 +3635,8 @@ BgBufferSync(WritebackContext *wb_context)
 	uint32		new_recent_alloc;
 
 	/*
-	 * Find out where the freelist clock-sweep currently is, and how many
-	 * buffer allocations have happened since our last call.
+	 * Find out where the clock-sweep currently is, and how many buffer
+	 * allocations have happened since our last call.
 	 */
 	strategy_buf_id = StrategySyncStart(&strategy_passes, &recent_alloc);
 
diff --git a/src/backend/storage/buffer/freelist.c b/src/backend/storage/buffer/freelist.c
index cd94a7d8a7b39..7d59a92bd1a88 100644
--- a/src/backend/storage/buffer/freelist.c
+++ b/src/backend/storage/buffer/freelist.c
@@ -39,14 +39,6 @@ typedef struct
 	 */
 	pg_atomic_uint32 nextVictimBuffer;
 
-	int			firstFreeBuffer;	/* Head of list of unused buffers */
-	int			lastFreeBuffer; /* Tail of list of unused buffers */
-
-	/*
-	 * NOTE: lastFreeBuffer is undefined when firstFreeBuffer is -1 (that is,
-	 * when the list is empty)
-	 */
-
 	/*
 	 * Statistics.  These counters should be wide enough that they can't
 	 * overflow during a single bgwriter cycle.
@@ -163,23 +155,6 @@ ClockSweepTick(void)
 	return victim;
 }
 
-/*
- * have_free_buffer -- a lockless check to see if there is a free buffer in
- *					   buffer pool.
- *
- * If the result is true that will become stale once free buffers are moved out
- * by other operations, so the caller who strictly want to use a free buffer
- * should not call this.
- */
-bool
-have_free_buffer(void)
-{
-	if (StrategyControl->firstFreeBuffer >= 0)
-		return true;
-	else
-		return false;
-}
-
 /*
  * StrategyGetBuffer
  *
@@ -249,69 +224,7 @@ StrategyGetBuffer(BufferAccessStrategy strategy, uint32 *buf_state, bool *from_r
 	 */
 	pg_atomic_fetch_add_u32(&StrategyControl->numBufferAllocs, 1);
 
-	/*
-	 * First check, without acquiring the lock, whether there's buffers in the
-	 * freelist. Since we otherwise don't require the spinlock in every
-	 * StrategyGetBuffer() invocation, it'd be sad to acquire it here -
-	 * uselessly in most cases. That obviously leaves a race where a buffer is
-	 * put on the freelist but we don't see the store yet - but that's pretty
-	 * harmless, it'll just get used during the next buffer acquisition.
-	 *
-	 * If there's buffers on the freelist, acquire the spinlock to pop one
-	 * buffer of the freelist. Then check whether that buffer is usable and
-	 * repeat if not.
-	 *
-	 * Note that the freeNext fields are considered to be protected by the
-	 * buffer_strategy_lock not the individual buffer spinlocks, so it's OK to
-	 * manipulate them without holding the spinlock.
-	 */
-	if (StrategyControl->firstFreeBuffer >= 0)
-	{
-		while (true)
-		{
-			/* Acquire the spinlock to remove element from the freelist */
-			SpinLockAcquire(&StrategyControl->buffer_strategy_lock);
-
-			if (StrategyControl->firstFreeBuffer < 0)
-			{
-				SpinLockRelease(&StrategyControl->buffer_strategy_lock);
-				break;
-			}
-
-			buf = GetBufferDescriptor(StrategyControl->firstFreeBuffer);
-			Assert(buf->freeNext != FREENEXT_NOT_IN_LIST);
-
-			/* Unconditionally remove buffer from freelist */
-			StrategyControl->firstFreeBuffer = buf->freeNext;
-			buf->freeNext = FREENEXT_NOT_IN_LIST;
-
-			/*
-			 * Release the lock so someone else can access the freelist while
-			 * we check out this buffer.
-			 */
-			SpinLockRelease(&StrategyControl->buffer_strategy_lock);
-
-			/*
-			 * If the buffer is pinned or has a nonzero usage_count, we cannot
-			 * use it; discard it and retry.  (This can only happen if VACUUM
-			 * put a valid buffer in the freelist and then someone else used
-			 * it before we got to it.  It's probably impossible altogether as
-			 * of 8.3, but we'd better check anyway.)
-			 */
-			local_buf_state = LockBufHdr(buf);
-			if (BUF_STATE_GET_REFCOUNT(local_buf_state) == 0
-				&& BUF_STATE_GET_USAGECOUNT(local_buf_state) == 0)
-			{
-				if (strategy != NULL)
-					AddBufferToRing(strategy, buf);
-				*buf_state = local_buf_state;
-				return buf;
-			}
-			UnlockBufHdr(buf, local_buf_state);
-		}
-	}
-
-	/* Nothing on the freelist, so run the "clock-sweep" algorithm */
+	/* Use the "clock sweep" algorithm to find a free buffer */
 	trycounter = NBuffers;
 	for (;;)
 	{
@@ -356,29 +269,6 @@ StrategyGetBuffer(BufferAccessStrategy strategy, uint32 *buf_state, bool *from_r
 	}
 }
 
-/*
- * StrategyFreeBuffer: put a buffer on the freelist
- */
-void
-StrategyFreeBuffer(BufferDesc *buf)
-{
-	SpinLockAcquire(&StrategyControl->buffer_strategy_lock);
-
-	/*
-	 * It is possible that we are told to put something in the freelist that
-	 * is already in it; don't screw up the list if so.
-	 */
-	if (buf->freeNext == FREENEXT_NOT_IN_LIST)
-	{
-		buf->freeNext = StrategyControl->firstFreeBuffer;
-		if (buf->freeNext < 0)
-			StrategyControl->lastFreeBuffer = buf->buf_id;
-		StrategyControl->firstFreeBuffer = buf->buf_id;
-	}
-
-	SpinLockRelease(&StrategyControl->buffer_strategy_lock);
-}
-
 /*
  * StrategySyncStart -- tell BgBufferSync where to start syncing
  *
@@ -504,13 +394,6 @@ StrategyInitialize(bool init)
 
 		SpinLockInit(&StrategyControl->buffer_strategy_lock);
 
-		/*
-		 * Grab the whole linked list of free buffers for our strategy. We
-		 * assume it was previously set up by BufferManagerShmemInit().
-		 */
-		StrategyControl->firstFreeBuffer = 0;
-		StrategyControl->lastFreeBuffer = NBuffers - 1;
-
 		/* Initialize the clock-sweep pointer */
 		pg_atomic_init_u32(&StrategyControl->nextVictimBuffer, 0);
 
diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h
index 3a210c710f633..dfd614f7ca449 100644
--- a/src/include/storage/buf_internals.h
+++ b/src/include/storage/buf_internals.h
@@ -217,8 +217,7 @@ BufMappingPartitionLockByIndex(uint32 index)
  * single atomic variable.  This layout allow us to do some operations in a
  * single atomic operation, without actually acquiring and releasing spinlock;
  * for instance, increase or decrease refcount.  buf_id field never changes
- * after initialization, so does not need locking.  freeNext is protected by
- * the buffer_strategy_lock not buffer header lock.  The LWLock can take care
+ * after initialization, so does not need locking.  The LWLock can take care
  * of itself.  The buffer header lock is *not* used to control access to the
  * data in the buffer!
  *
@@ -264,7 +263,6 @@ typedef struct BufferDesc
 	pg_atomic_uint32 state;
 
 	int			wait_backend_pgprocno;	/* backend of pin-count waiter */
-	int			freeNext;		/* link in freelist chain */
 
 	PgAioWaitRef io_wref;		/* set iff AIO is in progress */
 	LWLock		content_lock;	/* to lock access to buffer contents */
@@ -360,13 +358,6 @@ BufferDescriptorGetContentLock(const BufferDesc *bdesc)
 	return (LWLock *) (&bdesc->content_lock);
 }
 
-/*
- * The freeNext field is either the index of the next freelist entry,
- * or one of these special values:
- */
-#define FREENEXT_END_OF_LIST	(-1)
-#define FREENEXT_NOT_IN_LIST	(-2)
-
 /*
  * Functions for acquiring/releasing a shared buffer header's spinlock.  Do
  * not apply these to local buffers!
@@ -444,7 +435,6 @@ extern void TerminateBufferIO(BufferDesc *buf, bool clear_dirty, uint32 set_flag
 extern IOContext IOContextForStrategy(BufferAccessStrategy strategy);
 extern BufferDesc *StrategyGetBuffer(BufferAccessStrategy strategy,
 									 uint32 *buf_state, bool *from_ring);
-extern void StrategyFreeBuffer(BufferDesc *buf);
 extern bool StrategyRejectBuffer(BufferAccessStrategy strategy,
 								 BufferDesc *buf, bool from_ring);
 
@@ -453,7 +443,6 @@ extern void StrategyNotifyBgWriter(int bgwprocno);
 
 extern Size StrategyShmemSize(void);
 extern void StrategyInitialize(bool init);
-extern bool have_free_buffer(void);
 
 /* buf_table.c */
 extern Size BufTableShmemSize(int size);

From 06473f5a344df8c9594ead90a609b86f6724cff8 Mon Sep 17 00:00:00 2001
From: Tatsuo Ishii <ishii@postgresql.org>
Date: Sat, 6 Sep 2025 07:49:51 +0900
Subject: [PATCH 16/73] Allow to log raw parse tree.

This commit allows to log the raw parse tree in the same way we
currently log the parse tree, rewritten tree, and plan tree.

To avoid unnecessary log noise for users not interested in this
detail, a new GUC option, "debug_print_raw_parse", has been added.

When starting the PostgreSQL process with "-d N", and N is 3 or higher,
debug_print_raw_parse is enabled automatically, alongside
debug_print_parse.

Author: Chao Li <lic@highgo.com>
Reviewed-by: Tender Wang <tndrwang@gmail.com>
Reviewed-by: Tatsuo Ishii <ishii@postgresql.org>
Reviewed-by: John Naylor <johncnaylorls@gmail.com>
Discussion: https://postgr.es/m/CAEoWx2mcO0Gpo4vd8kPMAFWeJLSp0MeUUnaLdE1x0tSVd-VzUw%40mail.gmail.com
---
 doc/src/sgml/config.sgml                      | 12 +++++++++---
 doc/src/sgml/rules.sgml                       |  1 +
 src/backend/tcop/postgres.c                   |  7 +++++++
 src/backend/utils/misc/guc_parameters.dat     |  6 ++++++
 src/backend/utils/misc/guc_tables.c           |  1 +
 src/backend/utils/misc/postgresql.conf.sample |  1 +
 src/include/utils/guc.h                       |  1 +
 7 files changed, 26 insertions(+), 3 deletions(-)

diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 0a4b3e55ba5ed..2a3685f474a96 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -7383,6 +7383,11 @@ local0.*    /var/log/postgresql
      </varlistentry>
 
      <varlistentry id="guc-debug-print-parse">
+      <term><varname>debug_print_raw_parse</varname> (<type>boolean</type>)
+      <indexterm>
+       <primary><varname>debug_print_raw_parse</varname> configuration parameter</primary>
+      </indexterm>
+      </term>
       <term><varname>debug_print_parse</varname> (<type>boolean</type>)
       <indexterm>
        <primary><varname>debug_print_parse</varname> configuration parameter</primary>
@@ -7401,8 +7406,8 @@ local0.*    /var/log/postgresql
       <listitem>
        <para>
         These parameters enable various debugging output to be emitted.
-        When set, they print the resulting parse tree, the query rewriter
-        output, or the execution plan for each executed query.
+        When set, they print the resulting raw parse tree, the parse tree, the query
+        rewriter output, or the execution plan for each executed query.
         These messages are emitted at <literal>LOG</literal> message level, so by
         default they will appear in the server log but will not be sent to the
         client.  You can change that by adjusting
@@ -7422,7 +7427,8 @@ local0.*    /var/log/postgresql
       <listitem>
        <para>
         When set, <varname>debug_pretty_print</varname> indents the messages
-        produced by <varname>debug_print_parse</varname>,
+        produced by <varname>debug_print_raw_parse</varname>,
+        <varname>debug_print_parse</varname>,
         <varname>debug_print_rewritten</varname>, or
         <varname>debug_print_plan</varname>.  This results in more readable
         but much longer output than the <quote>compact</quote> format used when
diff --git a/doc/src/sgml/rules.sgml b/doc/src/sgml/rules.sgml
index 8467d961fd0a0..282dcd722d495 100644
--- a/doc/src/sgml/rules.sgml
+++ b/doc/src/sgml/rules.sgml
@@ -60,6 +60,7 @@
     <acronym>SQL</acronym> statement where the single parts that it is
     built from are stored separately. These query trees can be shown
     in the server log if you set the configuration parameters
+    <varname>debug_print_raw_parse</varname>,
     <varname>debug_print_parse</varname>,
     <varname>debug_print_rewritten</varname>, or
     <varname>debug_print_plan</varname>.  The rule actions are also
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index 0cecd4649020f..d356830f756be 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -649,6 +649,10 @@ pg_parse_query(const char *query_string)
 
 	TRACE_POSTGRESQL_QUERY_PARSE_DONE(query_string);
 
+	if (Debug_print_raw_parse)
+		elog_node_display(LOG, "raw parse tree", raw_parsetree_list,
+						  Debug_pretty_print);
+
 	return raw_parsetree_list;
 }
 
@@ -3697,7 +3701,10 @@ set_debug_options(int debug_flag, GucContext context, GucSource source)
 	if (debug_flag >= 2)
 		SetConfigOption("log_statement", "all", context, source);
 	if (debug_flag >= 3)
+	{
+		SetConfigOption("debug_print_raw_parse", "true", context, source);
 		SetConfigOption("debug_print_parse", "true", context, source);
+	}
 	if (debug_flag >= 4)
 		SetConfigOption("debug_print_plan", "true", context, source);
 	if (debug_flag >= 5)
diff --git a/src/backend/utils/misc/guc_parameters.dat b/src/backend/utils/misc/guc_parameters.dat
index a157cec3c4d00..0da01627cfec1 100644
--- a/src/backend/utils/misc/guc_parameters.dat
+++ b/src/backend/utils/misc/guc_parameters.dat
@@ -414,6 +414,12 @@
   ifdef => 'DEBUG_NODE_TESTS_ENABLED',
 },
 
+{ name => 'debug_print_raw_parse', type => 'bool', context => 'PGC_USERSET', group => 'LOGGING_WHAT',
+  short_desc => 'Logs each query\'s raw parse tree.',
+  variable => 'Debug_print_raw_parse',
+  boot_val => 'false',
+},
+
 { name => 'debug_print_parse', type => 'bool', context => 'PGC_USERSET', group => 'LOGGING_WHAT',
   short_desc => 'Logs each query\'s parse tree.',
   variable => 'Debug_print_parse',
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index 787933a9e5acd..00c8376cf4ded 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -507,6 +507,7 @@ bool		AllowAlterSystem = true;
 bool		log_duration = false;
 bool		Debug_print_plan = false;
 bool		Debug_print_parse = false;
+bool		Debug_print_raw_parse = false;
 bool		Debug_print_rewritten = false;
 bool		Debug_pretty_print = true;
 
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index a9d8293474af5..26c0869356485 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -581,6 +581,7 @@
 
 # - What to Log -
 
+#debug_print_raw_parse = off
 #debug_print_parse = off
 #debug_print_rewritten = off
 #debug_print_plan = off
diff --git a/src/include/utils/guc.h b/src/include/utils/guc.h
index 72981053e610f..756e80a2c2fcc 100644
--- a/src/include/utils/guc.h
+++ b/src/include/utils/guc.h
@@ -247,6 +247,7 @@ typedef enum
 /* GUC vars that are actually defined in guc_tables.c, rather than elsewhere */
 extern PGDLLIMPORT bool Debug_print_plan;
 extern PGDLLIMPORT bool Debug_print_parse;
+extern PGDLLIMPORT bool Debug_print_raw_parse;
 extern PGDLLIMPORT bool Debug_print_rewritten;
 extern PGDLLIMPORT bool Debug_pretty_print;
 

From 43eb2c541941479714c11de9cfb7c67b54f1810d Mon Sep 17 00:00:00 2001
From: Michael Paquier <michael@paquier.xyz>
Date: Mon, 8 Sep 2025 10:07:14 +0900
Subject: [PATCH 17/73] Update parser README to include parse_jsontable.c

The README was missing parse_jsontable.c which handles JSON_TABLE.
Oversight in de3600452b61.

Author: Karthik S <karthikselvaam@gmail.com>
Discussion: https://postgr.es/m/CAK4gQD9gdcj+vq_FZGp=Rv-W+41v8_C7cmCUmDeu=cfrOdfXEw@mail.gmail.com
Backpatch-through: 17
---
 src/backend/parser/README | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/backend/parser/README b/src/backend/parser/README
index e0c986a41efea..e26eb437a9f35 100644
--- a/src/backend/parser/README
+++ b/src/backend/parser/README
@@ -20,6 +20,7 @@ parse_cte.c	handle Common Table Expressions (WITH clauses)
 parse_expr.c	handle expressions like col, col + 3, x = 3 or x = 4
 parse_enr.c	handle ephemeral named rels (trigger transition tables, ...)
 parse_func.c	handle functions, table.column and column identifiers
+parse_jsontable.c handle JSON_TABLE
 parse_merge.c	handle MERGE
 parse_node.c	create nodes for various structures
 parse_oper.c	handle operators in expressions

From 1f7e9ba3ac4eff13041abcc4c9c517ad835fa449 Mon Sep 17 00:00:00 2001
From: Amit Kapila <akapila@postgresql.org>
Date: Mon, 8 Sep 2025 06:10:15 +0000
Subject: [PATCH 18/73] Post-commit review fixes for 228c370868.

This commit fixes three issues:

1) When a disabled subscription is created with retain_dead_tuples set to true,
the launcher is not woken up immediately, which may lead to delays in creating
the conflict detection slot.

Creating the conflict detection slot is essential even when the subscription is
not enabled. This ensures that dead tuples are retained, which is necessary for
accurately identifying the type of conflict during replication.

2) Conflict-related data was unnecessarily retained when the subscription does
not have a table.

3) Conflict-relevant data could be prematurely removed before applying
prepared transactions on the publisher that are in the commit critical section.

This issue occurred because the backend executing COMMIT PREPARED was not
accounted for during the computation of oldestXid in the commit phase on
the publisher. As a result, the subscriber could advance the conflict
slot's xmin without waiting for such COMMIT PREPARED transactions to
complete.

We fixed this issue by identifying prepared transactions that are in the
commit critical section during computation of oldestXid in commit phase.

Author: Zhijie Hou <houzj.fnst@fujitsu.com>
Reviewed-by: shveta malik <shveta.malik@gmail.com>
Reviewed-by: Dilip Kumar <dilipbalaut@gmail.com>
Reviewed-by: Nisha Moond <nisha.moond412@gmail.com>
Reviewed-by: Amit Kapila <amit.kapila16@gmail.com>
Discussion: https://postgr.es/m/OS9PR01MB16913DACB64E5721872AA5C02943BA@OS9PR01MB16913.jpnprd01.prod.outlook.com
Discussion: https://postgr.es/m/OS9PR01MB16913F67856B0DA2A909788129400A@OS9PR01MB16913.jpnprd01.prod.outlook.com
---
 src/backend/access/transam/twophase.c       | 55 +++++++++++++++++++++
 src/backend/commands/subscriptioncmds.c     | 12 ++++-
 src/backend/replication/logical/tablesync.c | 26 ++++++++++
 src/backend/replication/logical/worker.c    | 25 ++++++++--
 src/backend/replication/walsender.c         | 12 +++++
 src/include/access/twophase.h               |  2 +
 src/include/replication/worker_internal.h   |  1 +
 src/test/subscription/t/035_conflicts.pl    | 29 +++++++++++
 8 files changed, 157 insertions(+), 5 deletions(-)

diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c
index 7918176fc588e..3e20f4487872e 100644
--- a/src/backend/access/transam/twophase.c
+++ b/src/backend/access/transam/twophase.c
@@ -2809,3 +2809,58 @@ LookupGXactBySubid(Oid subid)
 
 	return found;
 }
+
+/*
+ * TwoPhaseGetXidByLockingProc
+ *		Return the oldest transaction ID from prepared transactions that are
+ *		currently in the commit critical section.
+ *
+ * This function only considers transactions in the currently connected
+ * database. If no matching transactions are found, it returns
+ * InvalidTransactionId.
+ */
+TransactionId
+TwoPhaseGetOldestXidInCommit(void)
+{
+	TransactionId oldestRunningXid = InvalidTransactionId;
+
+	LWLockAcquire(TwoPhaseStateLock, LW_SHARED);
+
+	for (int i = 0; i < TwoPhaseState->numPrepXacts; i++)
+	{
+		GlobalTransaction gxact = TwoPhaseState->prepXacts[i];
+		PGPROC	   *commitproc;
+		TransactionId xid;
+
+		if (!gxact->valid)
+			continue;
+
+		if (gxact->locking_backend == INVALID_PROC_NUMBER)
+			continue;
+
+		/*
+		 * Get the backend that is handling the transaction. It's safe to
+		 * access this backend while holding TwoPhaseStateLock, as the backend
+		 * can only be destroyed after either removing or unlocking the
+		 * current global transaction, both of which require an exclusive
+		 * TwoPhaseStateLock.
+		 */
+		commitproc = GetPGProcByNumber(gxact->locking_backend);
+
+		if (MyDatabaseId != commitproc->databaseId)
+			continue;
+
+		if ((commitproc->delayChkptFlags & DELAY_CHKPT_IN_COMMIT) == 0)
+			continue;
+
+		xid = XidFromFullTransactionId(gxact->fxid);
+
+		if (!TransactionIdIsValid(oldestRunningXid) ||
+			TransactionIdPrecedes(xid, oldestRunningXid))
+			oldestRunningXid = xid;
+	}
+
+	LWLockRelease(TwoPhaseStateLock);
+
+	return oldestRunningXid;
+}
diff --git a/src/backend/commands/subscriptioncmds.c b/src/backend/commands/subscriptioncmds.c
index 82cf65fae737a..750d262fccade 100644
--- a/src/backend/commands/subscriptioncmds.c
+++ b/src/backend/commands/subscriptioncmds.c
@@ -854,7 +854,17 @@ CreateSubscription(ParseState *pstate, CreateSubscriptionStmt *stmt,
 
 	pgstat_create_subscription(subid);
 
-	if (opts.enabled)
+	/*
+	 * Notify the launcher to start the apply worker if the subscription is
+	 * enabled, or to create the conflict detection slot if retain_dead_tuples
+	 * is enabled.
+	 *
+	 * Creating the conflict detection slot is essential even when the
+	 * subscription is not enabled. This ensures that dead tuples are
+	 * retained, which is necessary for accurately identifying the type of
+	 * conflict during replication.
+	 */
+	if (opts.enabled || opts.retaindeadtuples)
 		ApplyLauncherWakeupAtCommit();
 
 	ObjectAddressSet(myself, SubscriptionRelationId, subid);
diff --git a/src/backend/replication/logical/tablesync.c b/src/backend/replication/logical/tablesync.c
index d3356bc84ee0c..e6da4028d392e 100644
--- a/src/backend/replication/logical/tablesync.c
+++ b/src/backend/replication/logical/tablesync.c
@@ -1788,6 +1788,32 @@ AllTablesyncsReady(void)
 	return has_subrels && (table_states_not_ready == NIL);
 }
 
+/*
+ * Return whether the subscription currently has any relations.
+ *
+ * Note: Unlike HasSubscriptionRelations(), this function relies on cached
+ * information for subscription relations. Additionally, it should not be
+ * invoked outside of apply or tablesync workers, as MySubscription must be
+ * initialized first.
+ */
+bool
+HasSubscriptionRelationsCached(void)
+{
+	bool		started_tx;
+	bool		has_subrels;
+
+	/* We need up-to-date subscription tables info here */
+	has_subrels = FetchTableStates(&started_tx);
+
+	if (started_tx)
+	{
+		CommitTransactionCommand();
+		pgstat_report_stat(true);
+	}
+
+	return has_subrels;
+}
+
 /*
  * Update the two_phase state of the specified subscription in pg_subscription.
  */
diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c
index f1ebd63e792ee..c0f6bef5c282c 100644
--- a/src/backend/replication/logical/worker.c
+++ b/src/backend/replication/logical/worker.c
@@ -4595,11 +4595,28 @@ wait_for_local_flush(RetainDeadTuplesData *rdt_data)
 	 * workers is complex and not worth the effort, so we simply return if not
 	 * all tables are in the READY state.
 	 *
-	 * It is safe to add new tables with initial states to the subscription
-	 * after this check because any changes applied to these tables should
-	 * have a WAL position greater than the rdt_data->remote_lsn.
+	 * Advancing the transaction ID is necessary even when no tables are
+	 * currently subscribed, to avoid retaining dead tuples unnecessarily.
+	 * While it might seem safe to skip all phases and directly assign
+	 * candidate_xid to oldest_nonremovable_xid during the
+	 * RDT_GET_CANDIDATE_XID phase in such cases, this is unsafe. If users
+	 * concurrently add tables to the subscription, the apply worker may not
+	 * process invalidations in time. Consequently,
+	 * HasSubscriptionRelationsCached() might miss the new tables, leading to
+	 * premature advancement of oldest_nonremovable_xid.
+	 *
+	 * Performing the check during RDT_WAIT_FOR_LOCAL_FLUSH is safe, as
+	 * invalidations are guaranteed to be processed before applying changes
+	 * from newly added tables while waiting for the local flush to reach
+	 * remote_lsn.
+	 *
+	 * Additionally, even if we check for subscription tables during
+	 * RDT_GET_CANDIDATE_XID, they might be dropped before reaching
+	 * RDT_WAIT_FOR_LOCAL_FLUSH. Therefore, it's still necessary to verify
+	 * subscription tables at this stage to prevent unnecessary tuple
+	 * retention.
 	 */
-	if (!AllTablesyncsReady())
+	if (HasSubscriptionRelationsCached() && !AllTablesyncsReady())
 	{
 		TimestampTz now;
 
diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c
index e3dce9dc68d04..59822f22b8d06 100644
--- a/src/backend/replication/walsender.c
+++ b/src/backend/replication/walsender.c
@@ -51,6 +51,7 @@
 
 #include "access/timeline.h"
 #include "access/transam.h"
+#include "access/twophase.h"
 #include "access/xact.h"
 #include "access/xlog_internal.h"
 #include "access/xlogreader.h"
@@ -2719,6 +2720,7 @@ ProcessStandbyPSRequestMessage(void)
 {
 	XLogRecPtr	lsn = InvalidXLogRecPtr;
 	TransactionId oldestXidInCommit;
+	TransactionId oldestGXidInCommit;
 	FullTransactionId nextFullXid;
 	FullTransactionId fullOldestXidInCommit;
 	WalSnd	   *walsnd = MyWalSnd;
@@ -2746,6 +2748,16 @@ ProcessStandbyPSRequestMessage(void)
 	 * ones replicated.
 	 */
 	oldestXidInCommit = GetOldestActiveTransactionId(true, false);
+	oldestGXidInCommit = TwoPhaseGetOldestXidInCommit();
+
+	/*
+	 * Update the oldest xid for standby transmission if an older prepared
+	 * transaction exists and is currently in commit phase.
+	 */
+	if (TransactionIdIsValid(oldestGXidInCommit) &&
+		TransactionIdPrecedes(oldestGXidInCommit, oldestXidInCommit))
+		oldestXidInCommit = oldestGXidInCommit;
+
 	nextFullXid = ReadNextFullTransactionId();
 	fullOldestXidInCommit = FullTransactionIdFromAllowableAt(nextFullXid,
 															 oldestXidInCommit);
diff --git a/src/include/access/twophase.h b/src/include/access/twophase.h
index 509bdad9a5d55..64463e9f4afb4 100644
--- a/src/include/access/twophase.h
+++ b/src/include/access/twophase.h
@@ -68,4 +68,6 @@ extern void TwoPhaseTransactionGid(Oid subid, TransactionId xid, char *gid_res,
 								   int szgid);
 extern bool LookupGXactBySubid(Oid subid);
 
+extern TransactionId TwoPhaseGetOldestXidInCommit(void);
+
 #endif							/* TWOPHASE_H */
diff --git a/src/include/replication/worker_internal.h b/src/include/replication/worker_internal.h
index 62ea1a0058081..de00380261279 100644
--- a/src/include/replication/worker_internal.h
+++ b/src/include/replication/worker_internal.h
@@ -272,6 +272,7 @@ extern void ReplicationOriginNameForLogicalRep(Oid suboid, Oid relid,
 											   char *originname, Size szoriginname);
 
 extern bool AllTablesyncsReady(void);
+extern bool HasSubscriptionRelationsCached(void);
 extern void UpdateTwoPhaseState(Oid suboid, char new_state);
 
 extern void process_syncing_tables(XLogRecPtr current_lsn);
diff --git a/src/test/subscription/t/035_conflicts.pl b/src/test/subscription/t/035_conflicts.pl
index 51b23a39fa935..e06429c288fe6 100644
--- a/src/test/subscription/t/035_conflicts.pl
+++ b/src/test/subscription/t/035_conflicts.pl
@@ -386,6 +386,35 @@
 .*Remote row \(2, 4\); replica identity full \(2, 2\)/,
 	'update target row was deleted in tab');
 
+###############################################################################
+# Check that the xmin value of the conflict detection slot can be advanced when
+# the subscription has no tables.
+###############################################################################
+
+# Remove the table from the publication
+$node_B->safe_psql('postgres', "ALTER PUBLICATION tap_pub_B DROP TABLE tab");
+
+$node_A->safe_psql('postgres',
+	"ALTER SUBSCRIPTION $subname_AB REFRESH PUBLICATION");
+
+# Remember the next transaction ID to be assigned
+$next_xid = $node_A->safe_psql('postgres', "SELECT txid_current() + 1;");
+
+# Confirm that the xmin value is advanced to the latest nextXid. If no
+# transactions are running, the apply worker selects nextXid as the candidate
+# for the non-removable xid. See GetOldestActiveTransactionId().
+ok( $node_A->poll_query_until(
+		'postgres',
+		"SELECT xmin = $next_xid from pg_replication_slots WHERE slot_name = 'pg_conflict_detection'"
+	),
+	"the xmin value of slot 'pg_conflict_detection' is updated on Node A");
+
+# Re-add the table to the publication for further tests
+$node_B->safe_psql('postgres', "ALTER PUBLICATION tap_pub_B ADD TABLE tab");
+
+$node_A->safe_psql('postgres',
+	"ALTER SUBSCRIPTION $subname_AB REFRESH PUBLICATION WITH (copy_data = false)");
+
 ###############################################################################
 # Check that dead tuple retention stops due to the wait time surpassing
 # max_retention_duration.

From 8191e0c16a0373f851a9f5a8112e3aec105b5276 Mon Sep 17 00:00:00 2001
From: Michael Paquier <michael@paquier.xyz>
Date: Mon, 8 Sep 2025 15:52:23 +0900
Subject: [PATCH 19/73] Fix corruption of pgstats shared hashtable due to OOM
 failures

A new pgstats entry is created as a two-step process:
- The entry is looked at in the shared hashtable of pgstats, and is
inserted if not found.
- When not found and inserted, its fields are then initialized.  This
part include a DSA chunk allocation for the stats data of the new entry.

As currently coded, if the DSA chunk allocation fails due to an
out-of-memory failure, an ERROR is generated, leaving in the pgstats
shared hashtable an inconsistent entry due to the first step, as the
entry has already been inserted in the hashtable.  These broken entries
can then be found by other backends, crashing them.

There are only two callers of pgstat_init_entry(), when loading the
pgstats file at startup and when creating a new pgstats entry.  This
commit changes pgstat_init_entry() so as we use dsa_allocate_extended()
with DSA_ALLOC_NO_OOM, making it return NULL on allocation failure
instead of failing.  This way, a backend failing an entry creation can
take appropriate cleanup actions in the shared hashtable before throwing
an error.  Currently, this means removing the entry from the shared
hashtable before throwing the error for the allocation failure.

Out-of-memory errors unlikely happen in the wild, and we do not bother
with back-patches when these are fixed, usually.  However, the problem
dealt with here is a degree worse as it breaks the shared memory state
of pgstats, impacting other processes that may look at an inconsistent
entry that a different process has failed to create.

Author: Mikhail Kot <mikhail.kot@databricks.com>
Discussion: https://postgr.es/m/CAAi9E7jELo5_-sBENftnc2E8XhW2PKZJWfTC3i2y-GMQd2bcqQ@mail.gmail.com
Backpatch-through: 15
---
 src/backend/utils/activity/pgstat.c       | 11 +++++++++
 src/backend/utils/activity/pgstat_shmem.c | 28 ++++++++++++++++++++++-
 2 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/src/backend/utils/activity/pgstat.c b/src/backend/utils/activity/pgstat.c
index ffb5b8cce3441..f8e91484e36be 100644
--- a/src/backend/utils/activity/pgstat.c
+++ b/src/backend/utils/activity/pgstat.c
@@ -1975,6 +1975,17 @@ pgstat_read_statsfile(void)
 
 					header = pgstat_init_entry(key.kind, p);
 					dshash_release_lock(pgStatLocal.shared_hash, p);
+					if (header == NULL)
+					{
+						/*
+						 * It would be tempting to switch this ERROR to a
+						 * WARNING, but it would mean that all the statistics
+						 * are discarded when the environment fails on OOM.
+						 */
+						elog(ERROR, "could not allocate entry %u/%u/%" PRIu64 " of type %c",
+							 key.kind, key.dboid,
+							 key.objid, t);
+					}
 
 					if (!read_chunk(fpin,
 									pgstat_get_entry_data(key.kind, header),
diff --git a/src/backend/utils/activity/pgstat_shmem.c b/src/backend/utils/activity/pgstat_shmem.c
index 62de347445365..9dc3212f7dd01 100644
--- a/src/backend/utils/activity/pgstat_shmem.c
+++ b/src/backend/utils/activity/pgstat_shmem.c
@@ -289,6 +289,13 @@ pgstat_detach_shmem(void)
  * ------------------------------------------------------------
  */
 
+/*
+ * Initialize entry newly-created.
+ *
+ * Returns NULL in the event of an allocation failure, so as callers can
+ * take cleanup actions as the entry initialized is already inserted in the
+ * shared hashtable.
+ */
 PgStatShared_Common *
 pgstat_init_entry(PgStat_Kind kind,
 				  PgStatShared_HashEntry *shhashent)
@@ -311,7 +318,12 @@ pgstat_init_entry(PgStat_Kind kind,
 	pg_atomic_init_u32(&shhashent->generation, 0);
 	shhashent->dropped = false;
 
-	chunk = dsa_allocate0(pgStatLocal.dsa, pgstat_get_kind_info(kind)->shared_size);
+	chunk = dsa_allocate_extended(pgStatLocal.dsa,
+								  pgstat_get_kind_info(kind)->shared_size,
+								  DSA_ALLOC_ZERO | DSA_ALLOC_NO_OOM);
+	if (chunk == InvalidDsaPointer)
+		return NULL;
+
 	shheader = dsa_get_address(pgStatLocal.dsa, chunk);
 	shheader->magic = 0xdeadbeef;
 
@@ -509,6 +521,20 @@ pgstat_get_entry_ref(PgStat_Kind kind, Oid dboid, uint64 objid, bool create,
 		if (!shfound)
 		{
 			shheader = pgstat_init_entry(kind, shhashent);
+			if (shheader == NULL)
+			{
+				/*
+				 * Failed the allocation of a new entry, so clean up the
+				 * shared hashtable before giving up.
+				 */
+				dshash_delete_entry(pgStatLocal.shared_hash, shhashent);
+
+				ereport(ERROR,
+						(errcode(ERRCODE_OUT_OF_MEMORY),
+						 errmsg("out of memory"),
+						 errdetail("Failed while allocating entry %u/%u/%" PRIu64 ".",
+								   key.kind, key.dboid, key.objid)));
+			}
 			pgstat_acquire_entry_ref(entry_ref, shhashent, shheader);
 
 			if (created_entry != NULL)

From 6456c6e2c4ad1cf9752e09cce37bfcfe2190c5e0 Mon Sep 17 00:00:00 2001
From: Amit Kapila <akapila@postgresql.org>
Date: Mon, 8 Sep 2025 11:38:02 +0000
Subject: [PATCH 20/73] Add test to prevent premature removal of
 conflict-relevant data.

A test has been added to ensure that conflict-relevant data is not
prematurely removed when a concurrent prepared transaction is being
committed on the publisher.

This test introduces an injection point that simulates the presence of a
prepared transaction in the commit phase, validating that the system
correctly delays conflict slot advancement until the transaction is fully
committed.

Additionally, the test serves as a safeguard for developers, ensuring that
the acquisition of the commit timestamp does not occur before marking
DELAY_CHKPT_IN_COMMIT in RecordTransactionCommitPrepared.

Reported-by: Robert Haas <robertmhaas@gmail.com>
Author: Zhijie Hou <houzj.fnst@fujitsu.com>
Reviewed-by: shveta malik <shveta.malik@gmail.com>
Reviewed-by: Amit Kapila <amit.kapila16@gmail.com>
Discussion: https://postgr.es/m/OS9PR01MB16913F67856B0DA2A909788129400A@OS9PR01MB16913.jpnprd01.prod.outlook.com
---
 src/backend/access/transam/twophase.c    |   6 +
 src/test/subscription/Makefile           |   4 +-
 src/test/subscription/meson.build        |   5 +-
 src/test/subscription/t/035_conflicts.pl | 160 +++++++++++++++++++++++
 4 files changed, 173 insertions(+), 2 deletions(-)

diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c
index 3e20f4487872e..d8e2fce2c99b7 100644
--- a/src/backend/access/transam/twophase.c
+++ b/src/backend/access/transam/twophase.c
@@ -103,6 +103,7 @@
 #include "storage/proc.h"
 #include "storage/procarray.h"
 #include "utils/builtins.h"
+#include "utils/injection_point.h"
 #include "utils/memutils.h"
 #include "utils/timestamp.h"
 
@@ -2332,12 +2333,17 @@ RecordTransactionCommitPrepared(TransactionId xid,
 	replorigin = (replorigin_session_origin != InvalidRepOriginId &&
 				  replorigin_session_origin != DoNotReplicateId);
 
+	/* Load the injection point before entering the critical section */
+	INJECTION_POINT_LOAD("commit-after-delay-checkpoint");
+
 	START_CRIT_SECTION();
 
 	/* See notes in RecordTransactionCommit */
 	Assert((MyProc->delayChkptFlags & DELAY_CHKPT_IN_COMMIT) == 0);
 	MyProc->delayChkptFlags |= DELAY_CHKPT_IN_COMMIT;
 
+	INJECTION_POINT_CACHED("commit-after-delay-checkpoint", NULL);
+
 	/*
 	 * Ensures the DELAY_CHKPT_IN_COMMIT flag write is globally visible before
 	 * commit time is written.
diff --git a/src/test/subscription/Makefile b/src/test/subscription/Makefile
index 50b65d8f6ea21..9d97e7d5c0d6d 100644
--- a/src/test/subscription/Makefile
+++ b/src/test/subscription/Makefile
@@ -13,9 +13,11 @@ subdir = src/test/subscription
 top_builddir = ../../..
 include $(top_builddir)/src/Makefile.global
 
-EXTRA_INSTALL = contrib/hstore
+EXTRA_INSTALL = contrib/hstore \
+	src/test/modules/injection_points
 
 export with_icu
+export enable_injection_points
 
 check:
 	$(prove_check)
diff --git a/src/test/subscription/meson.build b/src/test/subscription/meson.build
index 586ffba434e11..20b4e523d9307 100644
--- a/src/test/subscription/meson.build
+++ b/src/test/subscription/meson.build
@@ -5,7 +5,10 @@ tests += {
   'sd': meson.current_source_dir(),
   'bd': meson.current_build_dir(),
   'tap': {
-    'env': {'with_icu': icu.found() ? 'yes' : 'no'},
+    'env': {
+      'with_icu': icu.found() ? 'yes' : 'no',
+      'enable_injection_points': get_option('injection_points') ? 'yes' : 'no',
+    },
     'tests': [
       't/001_rep_changes.pl',
       't/002_types.pl',
diff --git a/src/test/subscription/t/035_conflicts.pl b/src/test/subscription/t/035_conflicts.pl
index e06429c288fe6..db0d5b464e825 100644
--- a/src/test/subscription/t/035_conflicts.pl
+++ b/src/test/subscription/t/035_conflicts.pl
@@ -415,6 +415,166 @@
 $node_A->safe_psql('postgres',
 	"ALTER SUBSCRIPTION $subname_AB REFRESH PUBLICATION WITH (copy_data = false)");
 
+###############################################################################
+# Test that publisher's transactions marked with DELAY_CHKPT_IN_COMMIT prevent
+# concurrently deleted tuples on the subscriber from being removed. This test
+# also acts as a safeguard to prevent developers from moving the commit
+# timestamp acquisition before marking DELAY_CHKPT_IN_COMMIT in
+# RecordTransactionCommitPrepared.
+###############################################################################
+
+my $injection_points_supported = $node_B->check_extension('injection_points');
+
+# This test depends on an injection point to block the prepared transaction
+# commit after marking DELAY_CHKPT_IN_COMMIT flag.
+if ($injection_points_supported != 0)
+{
+	$node_B->append_conf('postgresql.conf',
+		"shared_preload_libraries = 'injection_points'
+		max_prepared_transactions = 1");
+	$node_B->restart;
+
+	# Disable the subscription on Node B for testing only one-way
+	# replication.
+	$node_B->psql('postgres', "ALTER SUBSCRIPTION $subname_BA DISABLE;");
+
+	# Wait for the apply worker to stop
+	$node_B->poll_query_until('postgres',
+		"SELECT count(*) = 0 FROM pg_stat_activity WHERE backend_type = 'logical replication apply worker'"
+	);
+
+	# Truncate the table to cleanup existing dead rows in the table. Then insert
+	# a new row.
+	$node_B->safe_psql(
+		'postgres', qq(
+		TRUNCATE tab;
+		INSERT INTO tab VALUES(1, 1);
+	));
+
+	$node_B->wait_for_catchup($subname_AB);
+
+	# Create the injection_points extension on the publisher node and attach to the
+	# commit-after-delay-checkpoint injection point.
+	$node_B->safe_psql(
+		'postgres',
+		"CREATE EXTENSION injection_points;
+		 SELECT injection_points_attach('commit-after-delay-checkpoint', 'wait');"
+	);
+
+	# Start a background session on the publisher node to perform an update and
+	# pause at the injection point.
+	my $pub_session = $node_B->background_psql('postgres');
+	$pub_session->query_until(
+		qr/starting_bg_psql/,
+		q{
+			\echo starting_bg_psql
+			BEGIN;
+			UPDATE tab SET b = 2 WHERE a = 1;
+			PREPARE TRANSACTION 'txn_with_later_commit_ts';
+			COMMIT PREPARED 'txn_with_later_commit_ts';
+		}
+	);
+
+	# Confirm the update is suspended
+	$result =
+	  $node_B->safe_psql('postgres', 'SELECT * FROM tab WHERE a = 1');
+	is($result, qq(1|1), 'publisher sees the old row');
+
+	# Delete the row on the subscriber. The deleted row should be retained due to a
+	# transaction on the publisher, which is currently marked with the
+	# DELAY_CHKPT_IN_COMMIT flag.
+	$node_A->safe_psql('postgres', "DELETE FROM tab WHERE a = 1;");
+
+	# Get the commit timestamp for the delete
+	my $sub_ts = $node_A->safe_psql('postgres',
+		"SELECT timestamp FROM pg_last_committed_xact();");
+
+	$log_location = -s $node_A->logfile;
+
+	# Confirm that the apply worker keeps requesting publisher status, while
+	# awaiting the prepared transaction to commit. Thus, the request log should
+	# appear more than once.
+	$node_A->wait_for_log(
+		qr/sending publisher status request message/,
+		$log_location);
+
+	$log_location = -s $node_A->logfile;
+
+	$node_A->wait_for_log(
+		qr/sending publisher status request message/,
+		$log_location);
+
+	# Confirm that the dead tuple cannot be removed
+	($cmdret, $stdout, $stderr) =
+	  $node_A->psql('postgres', qq(VACUUM (verbose) public.tab;));
+
+	ok($stderr =~ qr/1 are dead but not yet removable/,
+		'the deleted column is non-removable');
+
+	$log_location = -s $node_A->logfile;
+
+	# Wakeup and detach the injection point on the publisher node. The prepared
+	# transaction should now commit.
+	$node_B->safe_psql(
+		'postgres',
+		"SELECT injection_points_wakeup('commit-after-delay-checkpoint');
+		 SELECT injection_points_detach('commit-after-delay-checkpoint');"
+	);
+
+	# Close the background session on the publisher node
+	ok($pub_session->quit, "close publisher session");
+
+	# Confirm that the transaction committed
+	$result =
+	  $node_B->safe_psql('postgres', 'SELECT * FROM tab WHERE a = 1');
+	is($result, qq(1|2), 'publisher sees the new row');
+
+	# Ensure the UPDATE is replayed on subscriber
+	$node_B->wait_for_catchup($subname_AB);
+
+	$logfile = slurp_file($node_A->logfile(), $log_location);
+	ok( $logfile =~
+		  qr/conflict detected on relation "public.tab": conflict=update_deleted.*
+.*DETAIL:.* The row to be updated was deleted locally in transaction [0-9]+ at .*
+.*Remote row \(1, 2\); replica identity full \(1, 1\)/,
+		'update target row was deleted in tab');
+
+	# Remember the next transaction ID to be assigned
+	$next_xid =
+	  $node_A->safe_psql('postgres', "SELECT txid_current() + 1;");
+
+	# Confirm that the xmin value is advanced to the latest nextXid after the
+	# prepared transaction on the publisher has been committed.
+	ok( $node_A->poll_query_until(
+			'postgres',
+			"SELECT xmin = $next_xid from pg_replication_slots WHERE slot_name = 'pg_conflict_detection'"
+		),
+		"the xmin value of slot 'pg_conflict_detection' is updated on subscriber"
+	);
+
+	# Confirm that the dead tuple can be removed now
+	($cmdret, $stdout, $stderr) =
+	  $node_A->psql('postgres', qq(VACUUM (verbose) public.tab;));
+
+	ok($stderr =~ qr/1 removed, 0 remain, 0 are dead but not yet removable/,
+		'the deleted column is removed');
+
+	# Get the commit timestamp for the publisher's update
+	my $pub_ts = $node_B->safe_psql('postgres',
+		"SELECT pg_xact_commit_timestamp(xmin) from tab where a=1;");
+
+	# Check that the commit timestamp for the update on the publisher is later than
+	# or equal to the timestamp of the local deletion, as the commit timestamp
+	# should be assigned after marking the DELAY_CHKPT_IN_COMMIT flag.
+	$result = $node_B->safe_psql('postgres',
+		"SELECT '$pub_ts'::timestamp >= '$sub_ts'::timestamp");
+	is($result, qq(t),
+		"pub UPDATE's timestamp is later than that of sub's DELETE");
+
+	# Re-enable the subscription for further tests
+	$node_B->psql('postgres', "ALTER SUBSCRIPTION $subname_BA ENABLE;");
+}
+
 ###############################################################################
 # Check that dead tuple retention stops due to the wait time surpassing
 # max_retention_duration.

From 3399c265543ec3cdbeff2fa2900e03b326705f63 Mon Sep 17 00:00:00 2001
From: Melanie Plageman <melanieplageman@gmail.com>
Date: Mon, 8 Sep 2025 10:22:42 -0400
Subject: [PATCH 21/73] Remove unneeded VM pin from VM replay

Previously, heap_xlog_visible() called visibilitymap_pin() even after
getting a buffer from XLogReadBufferForRedoExtended() -- which returns a
pinned buffer containing the specified block of the visibility map.

This would just have resulted in visibilitymap_pin() returning early
since the specified page was already present and pinned, but it was
confusing extraneous code, so remove it. It doesn't seem worth
backporting, though.

It appears to be an oversight in 2c03216.

While we are at it, remove two VM-related redundant asserts in the COPY
FREEZE code path. visibilitymap_set() already asserts that
PD_ALL_VISIBLE is set on the heap page and checks that the vmbuffer
contains the bits corresponding to the specified heap block, so callers
do not also need to check this.

Author: Melanie Plageman <melanieplageman@gmail.com>
Reported-by: Melanie Plageman <melanieplageman@gmail.com>
Reported-by: Kirill Reshke <reshkekirill@gmail.com>
Reviewed-by: Kirill Reshke <reshkekirill@gmail.com>
Reviewed-by: Andres Freund <andres@anarazel.de>

Discussion: https://postgr.es/m/CALdSSPhu7WZd%2BEfQDha1nz%3DDC93OtY1%3DUFEdWwSZsASka_2eRQ%40mail.gmail.com
---
 src/backend/access/heap/heapam.c      | 3 ---
 src/backend/access/heap/heapam_xlog.c | 1 -
 2 files changed, 4 deletions(-)

diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index e3e7307ef5f79..4c5ae205a7a60 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -2647,9 +2647,6 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
 		 */
 		if (all_frozen_set)
 		{
-			Assert(PageIsAllVisible(page));
-			Assert(visibilitymap_pin_ok(BufferGetBlockNumber(buffer), vmbuffer));
-
 			/*
 			 * It's fine to use InvalidTransactionId here - this is only used
 			 * when HEAP_INSERT_FROZEN is specified, which intentionally
diff --git a/src/backend/access/heap/heapam_xlog.c b/src/backend/access/heap/heapam_xlog.c
index 5d48f071f53a7..cf843277938de 100644
--- a/src/backend/access/heap/heapam_xlog.c
+++ b/src/backend/access/heap/heapam_xlog.c
@@ -295,7 +295,6 @@ heap_xlog_visible(XLogReaderState *record)
 		LockBuffer(vmbuffer, BUFFER_LOCK_UNLOCK);
 
 		reln = CreateFakeRelcacheEntry(rlocator);
-		visibilitymap_pin(reln, blkno, &vmbuffer);
 
 		visibilitymap_set(reln, blkno, InvalidBuffer, lsn, vmbuffer,
 						  xlrec->snapshotConflictHorizon, vmbits);

From 585e31fcb6dfcb1d88cfee2371f565574db24869 Mon Sep 17 00:00:00 2001
From: Robert Haas <rhaas@postgresql.org>
Date: Mon, 8 Sep 2025 11:50:33 -0400
Subject: [PATCH 22/73] Don't generate fake "*SELECT*" or "*SELECT* %d"
 subquery aliases.

rte->alias should point only to a user-written alias, but in these
cases that principle was violated. Fixing this causes some regression
test output changes: wherever rte->alias previously had a value and
is now NULL, rte->eref is now set to a generated name rather than to
rte->alias; and the scheme used to generate eref names differs from
what we were doing for aliases.

The upshot is that instead of "*SELECT*" or "*SELECT* %d",
EXPLAIN will now emit "unnamed_subquery" or "unnamed_subquery_%d".
But that's a reasonable descriptor, and we were already producing
that in yet other cases, so this seems not too objectionable.

Author: Tom Lane <tgl@sss.pgh.pa.us>
Co-authored-by: Robert Haas <rhaas@postgresql.org>
Discussion: https://postgr.es/m/CA+TgmoYSYmDA2GvanzPMci084n+mVucv0bJ0HPbs6uhmMN6HMg@mail.gmail.com
---
 contrib/postgres_fdw/expected/postgres_fdw.out |  8 ++++----
 src/backend/executor/functions.c               |  2 +-
 src/backend/parser/analyze.c                   |  7 ++-----
 src/test/regress/expected/partition_prune.out  |  4 ++--
 src/test/regress/expected/rangefuncs.out       |  8 ++++----
 src/test/regress/expected/union.out            | 14 +++++++-------
 6 files changed, 20 insertions(+), 23 deletions(-)

diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index 78b8367d28935..18d727d77907a 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -5086,13 +5086,13 @@ SELECT ft1.c1 FROM ft1 JOIN ft2 on ft1.c1 = ft2.c1 WHERE
 -- ===================================================================
 EXPLAIN (verbose, costs off)
 INSERT INTO ft2 (c1,c2,c3) SELECT c1+1000,c2+100, c3 || c3 FROM ft2 LIMIT 20;
-                                                                                                                      QUERY PLAN                                                                                                                      
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+                                                                                                                               QUERY PLAN                                                                                                                               
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
  Insert on public.ft2
    Remote SQL: INSERT INTO "S 1"."T 1"("C 1", c2, c3, c4, c5, c6, c7, c8) VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
    Batch Size: 1
-   ->  Subquery Scan on "*SELECT*"
-         Output: "*SELECT*"."?column?", "*SELECT*"."?column?_1", NULL::integer, "*SELECT*"."?column?_2", NULL::timestamp with time zone, NULL::timestamp without time zone, NULL::character varying(10), 'ft2       '::character(10), NULL::user_enum
+   ->  Subquery Scan on unnamed_subquery
+         Output: unnamed_subquery."?column?", unnamed_subquery."?column?_1", NULL::integer, unnamed_subquery."?column?_2", NULL::timestamp with time zone, NULL::timestamp without time zone, NULL::character varying(10), 'ft2       '::character(10), NULL::user_enum
          ->  Foreign Scan on public.ft2 ft2_1
                Output: (ft2_1.c1 + 1000), (ft2_1.c2 + 100), (ft2_1.c3 || ft2_1.c3)
                Remote SQL: SELECT "C 1", c2, c3 FROM "S 1"."T 1" LIMIT 20::bigint
diff --git a/src/backend/executor/functions.c b/src/backend/executor/functions.c
index 97455b1ed4a5b..630d708d2a3f0 100644
--- a/src/backend/executor/functions.c
+++ b/src/backend/executor/functions.c
@@ -2483,7 +2483,7 @@ check_sql_stmt_retval(List *queryTreeList,
 		rte = makeNode(RangeTblEntry);
 		rte->rtekind = RTE_SUBQUERY;
 		rte->subquery = parse;
-		rte->eref = rte->alias = makeAlias("*SELECT*", colnames);
+		rte->eref = makeAlias("unnamed_subquery", colnames);
 		rte->lateral = false;
 		rte->inh = false;
 		rte->inFromCl = true;
diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c
index 34f7c17f576ef..b9763ea17144c 100644
--- a/src/backend/parser/analyze.c
+++ b/src/backend/parser/analyze.c
@@ -777,7 +777,7 @@ transformInsertStmt(ParseState *pstate, InsertStmt *stmt)
 		 */
 		nsitem = addRangeTableEntryForSubquery(pstate,
 											   selectQuery,
-											   makeAlias("*SELECT*", NIL),
+											   NULL,
 											   false,
 											   false);
 		addNSItemToQuery(pstate, nsitem, true, false, false);
@@ -2100,7 +2100,6 @@ transformSetOperationTree(ParseState *pstate, SelectStmt *stmt,
 	{
 		/* Process leaf SELECT */
 		Query	   *selectQuery;
-		char		selectName[32];
 		ParseNamespaceItem *nsitem;
 		RangeTblRef *rtr;
 		ListCell   *tl;
@@ -2156,11 +2155,9 @@ transformSetOperationTree(ParseState *pstate, SelectStmt *stmt,
 		/*
 		 * Make the leaf query be a subquery in the top-level rangetable.
 		 */
-		snprintf(selectName, sizeof(selectName), "*SELECT* %d",
-				 list_length(pstate->p_rtable) + 1);
 		nsitem = addRangeTableEntryForSubquery(pstate,
 											   selectQuery,
-											   makeAlias(selectName, NIL),
+											   NULL,
 											   false,
 											   false);
 
diff --git a/src/test/regress/expected/partition_prune.out b/src/test/regress/expected/partition_prune.out
index d1966cd7d829f..68ecd95180920 100644
--- a/src/test/regress/expected/partition_prune.out
+++ b/src/test/regress/expected/partition_prune.out
@@ -4763,7 +4763,7 @@ select min(a) over (partition by a order by a) from part_abc where a >= stable_o
                                           QUERY PLAN                                          
 ----------------------------------------------------------------------------------------------
  Append
-   ->  Subquery Scan on "*SELECT* 1_1"
+   ->  Subquery Scan on unnamed_subquery_2
          ->  WindowAgg
                Window: w1 AS (PARTITION BY part_abc.a ORDER BY part_abc.a)
                ->  Append
@@ -4780,7 +4780,7 @@ select min(a) over (partition by a order by a) from part_abc where a >= stable_o
                            ->  Index Scan using part_abc_3_2_a_idx on part_abc_3_2 part_abc_4
                                  Index Cond: (a >= (stable_one() + 1))
                                  Filter: (d <= stable_one())
-   ->  Subquery Scan on "*SELECT* 2"
+   ->  Subquery Scan on unnamed_subquery_1
          ->  WindowAgg
                Window: w1 AS (PARTITION BY part_abc_5.a ORDER BY part_abc_5.a)
                ->  Append
diff --git a/src/test/regress/expected/rangefuncs.out b/src/test/regress/expected/rangefuncs.out
index c21be83aa4aaf..30241e22da270 100644
--- a/src/test/regress/expected/rangefuncs.out
+++ b/src/test/regress/expected/rangefuncs.out
@@ -2130,10 +2130,10 @@ select testrngfunc();
 
 explain (verbose, costs off)
 select * from testrngfunc();
-                        QUERY PLAN                        
-----------------------------------------------------------
- Subquery Scan on "*SELECT*"
-   Output: "*SELECT*"."?column?", "*SELECT*"."?column?_1"
+                              QUERY PLAN                              
+----------------------------------------------------------------------
+ Subquery Scan on unnamed_subquery
+   Output: unnamed_subquery."?column?", unnamed_subquery."?column?_1"
    ->  Unique
          Output: (1), (2)
          ->  Sort
diff --git a/src/test/regress/expected/union.out b/src/test/regress/expected/union.out
index 96962817ed45a..d3ea433db1577 100644
--- a/src/test/regress/expected/union.out
+++ b/src/test/regress/expected/union.out
@@ -942,7 +942,7 @@ SELECT q1 FROM int8_tbl EXCEPT SELECT q2 FROM int8_tbl ORDER BY q2 LIMIT 1;
 ERROR:  column "q2" does not exist
 LINE 1: ... int8_tbl EXCEPT SELECT q2 FROM int8_tbl ORDER BY q2 LIMIT 1...
                                                              ^
-DETAIL:  There is a column named "q2" in table "*SELECT* 2", but it cannot be referenced from this part of the query.
+DETAIL:  There is a column named "q2" in table "unnamed_subquery", but it cannot be referenced from this part of the query.
 -- But this should work:
 SELECT q1 FROM int8_tbl EXCEPT (((SELECT q2 FROM int8_tbl ORDER BY q2 LIMIT 1))) ORDER BY 1;
         q1        
@@ -1338,14 +1338,14 @@ where q2 = q2;
 ----------------------------------------------------------
  Unique
    ->  Merge Append
-         Sort Key: "*SELECT* 1".q1
-         ->  Subquery Scan on "*SELECT* 1"
+         Sort Key: unnamed_subquery.q1
+         ->  Subquery Scan on unnamed_subquery
                ->  Unique
                      ->  Sort
                            Sort Key: i81.q1, i81.q2
                            ->  Seq Scan on int8_tbl i81
                                  Filter: (q2 IS NOT NULL)
-         ->  Subquery Scan on "*SELECT* 2"
+         ->  Subquery Scan on unnamed_subquery_1
                ->  Unique
                      ->  Sort
                            Sort Key: i82.q1, i82.q2
@@ -1374,14 +1374,14 @@ where -q1 = q2;
 --------------------------------------------------------
  Unique
    ->  Merge Append
-         Sort Key: "*SELECT* 1".q1
-         ->  Subquery Scan on "*SELECT* 1"
+         Sort Key: unnamed_subquery.q1
+         ->  Subquery Scan on unnamed_subquery
                ->  Unique
                      ->  Sort
                            Sort Key: i81.q1, i81.q2
                            ->  Seq Scan on int8_tbl i81
                                  Filter: ((- q1) = q2)
-         ->  Subquery Scan on "*SELECT* 2"
+         ->  Subquery Scan on unnamed_subquery_1
                ->  Unique
                      ->  Sort
                            Sort Key: i82.q1, i82.q2

From 6f79024df3461f794aace8bbc8706d8e5f7da091 Mon Sep 17 00:00:00 2001
From: Robert Haas <rhaas@postgresql.org>
Date: Mon, 8 Sep 2025 12:24:02 -0400
Subject: [PATCH 23/73] Don't generate fake "ANY_subquery" aliases, either.

This is just like the previous commit, but for a different invented
alias name.

Author: Robert Haas <rhaas@postgresql.org>
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://postgr.es/m/CA+TgmoYSYmDA2GvanzPMci084n+mVucv0bJ0HPbs6uhmMN6HMg@mail.gmail.com
---
 src/backend/optimizer/plan/subselect.c  |  2 +-
 src/test/regress/expected/memoize.out   |  8 ++++----
 src/test/regress/expected/subselect.out | 14 +++++++-------
 3 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c
index d71ed958e31b3..fae18548e074e 100644
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -1397,7 +1397,7 @@ convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink,
 	 */
 	nsitem = addRangeTableEntryForSubquery(pstate,
 										   subselect,
-										   makeAlias("ANY_subquery", NIL),
+										   NULL,
 										   use_lateral,
 										   false);
 	rte = nsitem->p_rte;
diff --git a/src/test/regress/expected/memoize.out b/src/test/regress/expected/memoize.out
index 150dc1b44cf62..fbcaf113266c5 100644
--- a/src/test/regress/expected/memoize.out
+++ b/src/test/regress/expected/memoize.out
@@ -545,15 +545,15 @@ EXPLAIN (COSTS OFF)
 SELECT * FROM tab_anti t1 WHERE t1.a IN
  (SELECT a FROM tab_anti t2 WHERE t2.b IN
   (SELECT t1.b FROM tab_anti t3 WHERE t2.a > 1 OFFSET 0));
-                   QUERY PLAN                    
--------------------------------------------------
+                    QUERY PLAN                     
+---------------------------------------------------
  Nested Loop Semi Join
    ->  Seq Scan on tab_anti t1
    ->  Nested Loop Semi Join
          Join Filter: (t1.a = t2.a)
          ->  Seq Scan on tab_anti t2
-         ->  Subquery Scan on "ANY_subquery"
-               Filter: (t2.b = "ANY_subquery".b)
+         ->  Subquery Scan on unnamed_subquery
+               Filter: (t2.b = unnamed_subquery.b)
                ->  Result
                      One-Time Filter: (t2.a > 1)
                      ->  Seq Scan on tab_anti t3
diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out
index c16dff05bc12e..7a1c216a0b1b7 100644
--- a/src/test/regress/expected/subselect.out
+++ b/src/test/regress/expected/subselect.out
@@ -1692,14 +1692,14 @@ select * from int4_tbl o where (f1, f1) in
 -------------------------------------------------------------------
  Nested Loop Semi Join
    Output: o.f1
-   Join Filter: (o.f1 = "ANY_subquery".f1)
+   Join Filter: (o.f1 = unnamed_subquery.f1)
    ->  Seq Scan on public.int4_tbl o
          Output: o.f1
    ->  Materialize
-         Output: "ANY_subquery".f1, "ANY_subquery".g
-         ->  Subquery Scan on "ANY_subquery"
-               Output: "ANY_subquery".f1, "ANY_subquery".g
-               Filter: ("ANY_subquery".f1 = "ANY_subquery".g)
+         Output: unnamed_subquery.f1, unnamed_subquery.g
+         ->  Subquery Scan on unnamed_subquery
+               Output: unnamed_subquery.f1, unnamed_subquery.g
+               Filter: (unnamed_subquery.f1 = unnamed_subquery.g)
                ->  Result
                      Output: i.f1, ((generate_series(1, 50)) / 10)
                      ->  ProjectSet
@@ -2867,8 +2867,8 @@ ON B.hundred in (SELECT min(c.hundred) FROM tenk2 C WHERE c.odd = b.odd);
                ->  Memoize
                      Cache Key: b.hundred, b.odd
                      Cache Mode: binary
-                     ->  Subquery Scan on "ANY_subquery"
-                           Filter: (b.hundred = "ANY_subquery".min)
+                     ->  Subquery Scan on unnamed_subquery
+                           Filter: (b.hundred = unnamed_subquery.min)
                            ->  Result
                                  InitPlan 1
                                    ->  Limit

From 5a170e992a4d402ef0e1b8ce7284cd78879ece73 Mon Sep 17 00:00:00 2001
From: Robert Haas <rhaas@postgresql.org>
Date: Mon, 8 Sep 2025 12:58:07 -0400
Subject: [PATCH 24/73] Don't generate fake "*TLOCRN*" or "*TROCRN*" aliases,
 either.

This is just like the previous two commits, except that this fix
actually doesn't change any regression test outputs.

Author: Robert Haas <rhaas@postgresql.org>
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://postgr.es/m/CA+TgmoYSYmDA2GvanzPMci084n+mVucv0bJ0HPbs6uhmMN6HMg@mail.gmail.com
---
 src/backend/rewrite/rewriteSearchCycle.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/backend/rewrite/rewriteSearchCycle.c b/src/backend/rewrite/rewriteSearchCycle.c
index 9f95d4dc1b0e8..5202ef43d1068 100644
--- a/src/backend/rewrite/rewriteSearchCycle.c
+++ b/src/backend/rewrite/rewriteSearchCycle.c
@@ -282,8 +282,8 @@ rewriteSearchAndCycle(CommonTableExpr *cte)
 
 	newrte = makeNode(RangeTblEntry);
 	newrte->rtekind = RTE_SUBQUERY;
-	newrte->alias = makeAlias("*TLOCRN*", cte->ctecolnames);
-	newrte->eref = newrte->alias;
+	newrte->alias = NULL;
+	newrte->eref = makeAlias("*TLOCRN*", cte->ctecolnames);
 	newsubquery = copyObject(rte1->subquery);
 	IncrementVarSublevelsUp((Node *) newsubquery, 1, 1);
 	newrte->subquery = newsubquery;
@@ -379,8 +379,8 @@ rewriteSearchAndCycle(CommonTableExpr *cte)
 		ewcl = lappend(ewcl, makeString(cte->cycle_clause->cycle_mark_column));
 		ewcl = lappend(ewcl, makeString(cte->cycle_clause->cycle_path_column));
 	}
-	newrte->alias = makeAlias("*TROCRN*", ewcl);
-	newrte->eref = newrte->alias;
+	newrte->alias = NULL;
+	newrte->eref = makeAlias("*TROCRN*", ewcl);
 
 	/*
 	 * Find the reference to the recursive CTE in the right UNION subquery's

From 4b5f206de2bb9152a99a5c218caf2580cc5a0e9e Mon Sep 17 00:00:00 2001
From: Melanie Plageman <melanieplageman@gmail.com>
Date: Mon, 8 Sep 2025 14:25:10 -0400
Subject: [PATCH 25/73] Remove unused xl_heap_prune member, reason

f83d709760d8 refactored xl_heap_prune and added an unused member,
reason. While PruneReason is used when constructing this WAL record to
set the WAL record definition, it doesn't need to be stored in a
separate field in the record. Remove it.

We won't backport this, since modifying an exposed struct definition to
remove an unused field would do more harm than good.

Author: Melanie Plageman <melanieplageman@gmail.com>
Reported-by: Andres Freund <andres@anarazel.de>
Reviewed-by: Robert Haas <robertmhaas@gmail.com>

Discussion: https://postgr.es/m/tvvtfoxz5ykpsctxjbzxg3nldnzfc7geplrt2z2s54pmgto27y%40hbijsndifu45
---
 src/include/access/heapam_xlog.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/include/access/heapam_xlog.h b/src/include/access/heapam_xlog.h
index 277df6b3cf0b3..d4c0625b63228 100644
--- a/src/include/access/heapam_xlog.h
+++ b/src/include/access/heapam_xlog.h
@@ -284,7 +284,6 @@ typedef struct xl_heap_update
  */
 typedef struct xl_heap_prune
 {
-	uint8		reason;
 	uint8		flags;
 
 	/*

From 3bcfcd815e1a2d51772ba27e0d034467f0344f15 Mon Sep 17 00:00:00 2001
From: Nathan Bossart <nathan@postgresql.org>
Date: Mon, 8 Sep 2025 14:19:48 -0500
Subject: [PATCH 26/73] pg_upgrade: Transfer pg_largeobject_metadata's files
 when possible.

Commit 161a3e8b68 taught pg_upgrade to use COPY for large object
metadata for upgrades from v12 and newer, which is much faster to
restore than the proper large object commands.  For upgrades from
v16 and newer, we can take this a step further and transfer the
large object metadata files as if they were user tables.  We can't
transfer the files from older versions because the aclitem data
type (needed by pg_largeobject_metadata.lomacl) changed its storage
format in v16 (see commit 7b378237aa).  Note that this commit is
essentially a revert of commit 12a53c732c.

There are a couple of caveats.  First, we still need to COPY the
corresponding pg_shdepend rows for large objects.  Second, we need
to COPY anything in pg_largeobject_metadata with a comment or
security label, else restoring those will fail.  This means that an
upgrade in which every large object has a comment or security label
won't gain anything from this commit, but it should at least avoid
making those unusual use-cases any worse.

pg_upgrade must also take care to transfer the relfilenodes of
pg_largeobject_metadata and its index, as was done for
pg_largeobject in commits d498e052b4 and bbe08b8869.

Reviewed-by: Michael Paquier <michael@paquier.xyz>
Discussion: https://postgr.es/m/aJ3_Gih_XW1_O2HF%40nathan
---
 src/backend/commands/tablecmds.c           | 12 ++--
 src/bin/pg_dump/pg_dump.c                  | 80 ++++++++++++++++++----
 src/bin/pg_upgrade/Makefile                |  3 +-
 src/bin/pg_upgrade/info.c                  | 11 ++-
 src/bin/pg_upgrade/pg_upgrade.c            |  6 +-
 src/bin/pg_upgrade/t/006_transfer_modes.pl | 67 ++++++++++++++++++
 6 files changed, 154 insertions(+), 25 deletions(-)

diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 082a3575d621e..3be2e051d32fb 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -42,6 +42,7 @@
 #include "catalog/pg_foreign_table.h"
 #include "catalog/pg_inherits.h"
 #include "catalog/pg_largeobject.h"
+#include "catalog/pg_largeobject_metadata.h"
 #include "catalog/pg_namespace.h"
 #include "catalog/pg_opclass.h"
 #include "catalog/pg_policy.h"
@@ -2389,12 +2390,15 @@ truncate_check_rel(Oid relid, Form_pg_class reltuple)
 	/*
 	 * Most system catalogs can't be truncated at all, or at least not unless
 	 * allow_system_table_mods=on. As an exception, however, we allow
-	 * pg_largeobject to be truncated as part of pg_upgrade, because we need
-	 * to change its relfilenode to match the old cluster, and allowing a
-	 * TRUNCATE command to be executed is the easiest way of doing that.
+	 * pg_largeobject and pg_largeobject_metadata to be truncated as part of
+	 * pg_upgrade, because we need to change its relfilenode to match the old
+	 * cluster, and allowing a TRUNCATE command to be executed is the easiest
+	 * way of doing that.
 	 */
 	if (!allowSystemTableMods && IsSystemClass(relid, reltuple)
-		&& (!IsBinaryUpgrade || relid != LargeObjectRelationId))
+		&& (!IsBinaryUpgrade ||
+			(relid != LargeObjectRelationId &&
+			 relid != LargeObjectMetadataRelationId)))
 		ereport(ERROR,
 				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
 				 errmsg("permission denied: \"%s\" is a system catalog",
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index bea793456f969..b4c45ad803e94 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -1131,6 +1131,23 @@ main(int argc, char **argv)
 		shdepend->dataObj->filtercond = "WHERE classid = 'pg_largeobject'::regclass "
 			"AND dbid = (SELECT oid FROM pg_database "
 			"            WHERE datname = current_database())";
+
+		/*
+		 * If upgrading from v16 or newer, only dump large objects with
+		 * comments/seclabels.  For these upgrades, pg_upgrade can copy/link
+		 * pg_largeobject_metadata's files (which is usually faster) but we
+		 * still need to dump LOs with comments/seclabels here so that the
+		 * subsequent COMMENT and SECURITY LABEL commands work.  pg_upgrade
+		 * can't copy/link the files from older versions because aclitem
+		 * (needed by pg_largeobject_metadata.lomacl) changed its storage
+		 * format in v16.
+		 */
+		if (fout->remoteVersion >= 160000)
+			lo_metadata->dataObj->filtercond = "WHERE oid IN "
+				"(SELECT objoid FROM pg_description "
+				"WHERE classoid = " CppAsString2(LargeObjectRelationId) " "
+				"UNION SELECT objoid FROM pg_seclabel "
+				"WHERE classoid = " CppAsString2(LargeObjectRelationId) ")";
 	}
 
 	/*
@@ -3629,26 +3646,32 @@ dumpDatabase(Archive *fout)
 	/*
 	 * pg_largeobject comes from the old system intact, so set its
 	 * relfrozenxids, relminmxids and relfilenode.
+	 *
+	 * pg_largeobject_metadata also comes from the old system intact for
+	 * upgrades from v16 and newer, so set its relfrozenxids, relminmxids, and
+	 * relfilenode, too.  pg_upgrade can't copy/link the files from older
+	 * versions because aclitem (needed by pg_largeobject_metadata.lomacl)
+	 * changed its storage format in v16.
 	 */
 	if (dopt->binary_upgrade)
 	{
 		PGresult   *lo_res;
 		PQExpBuffer loFrozenQry = createPQExpBuffer();
 		PQExpBuffer loOutQry = createPQExpBuffer();
+		PQExpBuffer lomOutQry = createPQExpBuffer();
 		PQExpBuffer loHorizonQry = createPQExpBuffer();
+		PQExpBuffer lomHorizonQry = createPQExpBuffer();
 		int			ii_relfrozenxid,
 					ii_relfilenode,
 					ii_oid,
 					ii_relminmxid;
 
-		/*
-		 * pg_largeobject
-		 */
 		if (fout->remoteVersion >= 90300)
 			appendPQExpBuffer(loFrozenQry, "SELECT relfrozenxid, relminmxid, relfilenode, oid\n"
 							  "FROM pg_catalog.pg_class\n"
-							  "WHERE oid IN (%u, %u);\n",
-							  LargeObjectRelationId, LargeObjectLOidPNIndexId);
+							  "WHERE oid IN (%u, %u, %u, %u);\n",
+							  LargeObjectRelationId, LargeObjectLOidPNIndexId,
+							  LargeObjectMetadataRelationId, LargeObjectMetadataOidIndexId);
 		else
 			appendPQExpBuffer(loFrozenQry, "SELECT relfrozenxid, 0 AS relminmxid, relfilenode, oid\n"
 							  "FROM pg_catalog.pg_class\n"
@@ -3663,35 +3686,57 @@ dumpDatabase(Archive *fout)
 		ii_oid = PQfnumber(lo_res, "oid");
 
 		appendPQExpBufferStr(loHorizonQry, "\n-- For binary upgrade, set pg_largeobject relfrozenxid and relminmxid\n");
+		appendPQExpBufferStr(lomHorizonQry, "\n-- For binary upgrade, set pg_largeobject_metadata relfrozenxid and relminmxid\n");
 		appendPQExpBufferStr(loOutQry, "\n-- For binary upgrade, preserve pg_largeobject and index relfilenodes\n");
+		appendPQExpBufferStr(lomOutQry, "\n-- For binary upgrade, preserve pg_largeobject_metadata and index relfilenodes\n");
 		for (int i = 0; i < PQntuples(lo_res); ++i)
 		{
 			Oid			oid;
 			RelFileNumber relfilenumber;
+			PQExpBuffer horizonQry;
+			PQExpBuffer outQry;
+
+			oid = atooid(PQgetvalue(lo_res, i, ii_oid));
+			relfilenumber = atooid(PQgetvalue(lo_res, i, ii_relfilenode));
 
-			appendPQExpBuffer(loHorizonQry, "UPDATE pg_catalog.pg_class\n"
+			if (oid == LargeObjectRelationId ||
+				oid == LargeObjectLOidPNIndexId)
+			{
+				horizonQry = loHorizonQry;
+				outQry = loOutQry;
+			}
+			else
+			{
+				horizonQry = lomHorizonQry;
+				outQry = lomOutQry;
+			}
+
+			appendPQExpBuffer(horizonQry, "UPDATE pg_catalog.pg_class\n"
 							  "SET relfrozenxid = '%u', relminmxid = '%u'\n"
 							  "WHERE oid = %u;\n",
 							  atooid(PQgetvalue(lo_res, i, ii_relfrozenxid)),
 							  atooid(PQgetvalue(lo_res, i, ii_relminmxid)),
 							  atooid(PQgetvalue(lo_res, i, ii_oid)));
 
-			oid = atooid(PQgetvalue(lo_res, i, ii_oid));
-			relfilenumber = atooid(PQgetvalue(lo_res, i, ii_relfilenode));
-
-			if (oid == LargeObjectRelationId)
-				appendPQExpBuffer(loOutQry,
+			if (oid == LargeObjectRelationId ||
+				oid == LargeObjectMetadataRelationId)
+				appendPQExpBuffer(outQry,
 								  "SELECT pg_catalog.binary_upgrade_set_next_heap_relfilenode('%u'::pg_catalog.oid);\n",
 								  relfilenumber);
-			else if (oid == LargeObjectLOidPNIndexId)
-				appendPQExpBuffer(loOutQry,
+			else if (oid == LargeObjectLOidPNIndexId ||
+					 oid == LargeObjectMetadataOidIndexId)
+				appendPQExpBuffer(outQry,
 								  "SELECT pg_catalog.binary_upgrade_set_next_index_relfilenode('%u'::pg_catalog.oid);\n",
 								  relfilenumber);
 		}
 
 		appendPQExpBufferStr(loOutQry,
 							 "TRUNCATE pg_catalog.pg_largeobject;\n");
+		appendPQExpBufferStr(lomOutQry,
+							 "TRUNCATE pg_catalog.pg_largeobject_metadata;\n");
+
 		appendPQExpBufferStr(loOutQry, loHorizonQry->data);
+		appendPQExpBufferStr(lomOutQry, lomHorizonQry->data);
 
 		ArchiveEntry(fout, nilCatalogId, createDumpId(),
 					 ARCHIVE_OPTS(.tag = "pg_largeobject",
@@ -3699,11 +3744,20 @@ dumpDatabase(Archive *fout)
 								  .section = SECTION_PRE_DATA,
 								  .createStmt = loOutQry->data));
 
+		if (fout->remoteVersion >= 160000)
+			ArchiveEntry(fout, nilCatalogId, createDumpId(),
+						 ARCHIVE_OPTS(.tag = "pg_largeobject_metadata",
+									  .description = "pg_largeobject_metadata",
+									  .section = SECTION_PRE_DATA,
+									  .createStmt = lomOutQry->data));
+
 		PQclear(lo_res);
 
 		destroyPQExpBuffer(loFrozenQry);
 		destroyPQExpBuffer(loHorizonQry);
+		destroyPQExpBuffer(lomHorizonQry);
 		destroyPQExpBuffer(loOutQry);
+		destroyPQExpBuffer(lomOutQry);
 	}
 
 	PQclear(res);
diff --git a/src/bin/pg_upgrade/Makefile b/src/bin/pg_upgrade/Makefile
index f83d2b5d30955..69fcf593caec9 100644
--- a/src/bin/pg_upgrade/Makefile
+++ b/src/bin/pg_upgrade/Makefile
@@ -3,8 +3,7 @@
 PGFILEDESC = "pg_upgrade - an in-place binary upgrade utility"
 PGAPPICON = win32
 
-# required for 003_upgrade_logical_replication_slots.pl
-EXTRA_INSTALL=contrib/test_decoding
+EXTRA_INSTALL=contrib/test_decoding src/test/modules/dummy_seclabel
 
 subdir = src/bin/pg_upgrade
 top_builddir = ../../..
diff --git a/src/bin/pg_upgrade/info.c b/src/bin/pg_upgrade/info.c
index c39eb077c2fae..7ce0827016803 100644
--- a/src/bin/pg_upgrade/info.c
+++ b/src/bin/pg_upgrade/info.c
@@ -498,7 +498,10 @@ get_rel_infos_query(void)
 	 *
 	 * pg_largeobject contains user data that does not appear in pg_dump
 	 * output, so we have to copy that system table.  It's easiest to do that
-	 * by treating it as a user table.
+	 * by treating it as a user table.  We can do the same for
+	 * pg_largeobject_metadata for upgrades from v16 and newer.  pg_upgrade
+	 * can't copy/link the files from older versions because aclitem (needed
+	 * by pg_largeobject_metadata.lomacl) changed its storage format in v16.
 	 */
 	appendPQExpBuffer(&query,
 					  "WITH regular_heap (reloid, indtable, toastheap) AS ( "
@@ -514,10 +517,12 @@ get_rel_infos_query(void)
 					  "                        'binary_upgrade', 'pg_toast') AND "
 					  "      c.oid >= %u::pg_catalog.oid) OR "
 					  "     (n.nspname = 'pg_catalog' AND "
-					  "      relname IN ('pg_largeobject') ))), ",
+					  "      relname IN ('pg_largeobject'%s) ))), ",
 					  (user_opts.transfer_mode == TRANSFER_MODE_SWAP) ?
 					  ", " CppAsString2(RELKIND_SEQUENCE) : "",
-					  FirstNormalObjectId);
+					  FirstNormalObjectId,
+					  (GET_MAJOR_VERSION(old_cluster.major_version) >= 1600) ?
+					  ", 'pg_largeobject_metadata'" : "");
 
 	/*
 	 * Add a CTE that collects OIDs of toast tables belonging to the tables
diff --git a/src/bin/pg_upgrade/pg_upgrade.c b/src/bin/pg_upgrade/pg_upgrade.c
index d5cd5bf0b3a6b..490e98fa26f2a 100644
--- a/src/bin/pg_upgrade/pg_upgrade.c
+++ b/src/bin/pg_upgrade/pg_upgrade.c
@@ -29,9 +29,9 @@
  *	We control all assignments of pg_enum.oid because these oids are stored
  *	in user tables as enum values.
  *
- *	We control all assignments of pg_authid.oid for historical reasons (the
- *	oids used to be stored in pg_largeobject_metadata, which is now copied via
- *	SQL commands), that might change at some point in the future.
+ *	We control all assignments of pg_authid.oid because the oids are stored in
+ *	pg_largeobject_metadata, which is copied via file transfer for upgrades
+ *	from v16 and newer.
  *
  *	We control all assignments of pg_database.oid because we want the directory
  *	names to match between the old and new cluster.
diff --git a/src/bin/pg_upgrade/t/006_transfer_modes.pl b/src/bin/pg_upgrade/t/006_transfer_modes.pl
index 348f402146234..2f68f0b56aa61 100644
--- a/src/bin/pg_upgrade/t/006_transfer_modes.pl
+++ b/src/bin/pg_upgrade/t/006_transfer_modes.pl
@@ -45,6 +45,22 @@ sub test_mode
 		$old->append_conf('postgresql.conf', "allow_in_place_tablespaces = true");
 	}
 
+	# We can only test security labels if both the old and new installations
+	# have dummy_seclabel.
+	my $test_seclabel = 1;
+	$old->start;
+	if (!$old->check_extension('dummy_seclabel'))
+	{
+		$test_seclabel = 0;
+	}
+	$old->stop;
+	$new->start;
+	if (!$new->check_extension('dummy_seclabel'))
+	{
+		$test_seclabel = 0;
+	}
+	$new->stop;
+
 	# Create a small variety of simple test objects on the old cluster.  We'll
 	# check that these reach the new version after upgrading.
 	$old->start;
@@ -83,6 +99,29 @@ sub test_mode
 		$old->safe_psql('testdb3',
 			"CREATE TABLE test6 AS SELECT generate_series(607, 711)");
 	}
+
+	# While we are here, test handling of large objects.
+	$old->safe_psql('postgres', q|
+		CREATE ROLE regress_lo_1;
+		CREATE ROLE regress_lo_2;
+
+		SELECT lo_from_bytea(4532, '\xffffff00');
+		COMMENT ON LARGE OBJECT 4532 IS 'test';
+
+		SELECT lo_from_bytea(4533, '\x0f0f0f0f');
+		ALTER LARGE OBJECT 4533 OWNER TO regress_lo_1;
+		GRANT SELECT ON LARGE OBJECT 4533 TO regress_lo_2;
+	|);
+
+	if ($test_seclabel)
+	{
+		$old->safe_psql('postgres', q|
+			CREATE EXTENSION dummy_seclabel;
+
+			SELECT lo_from_bytea(4534, '\x00ffffff');
+			SECURITY LABEL ON LARGE OBJECT 4534 IS 'classified';
+		|);
+	}
 	$old->stop;
 
 	my $result = command_ok_or_fails_like(
@@ -132,6 +171,34 @@ sub test_mode
 			$result = $new->safe_psql('testdb3', "SELECT COUNT(*) FROM test6");
 			is($result, '105', "test6 data after pg_upgrade $mode");
 		}
+
+		# Tests for large objects
+		$result = $new->safe_psql('postgres', "SELECT lo_get(4532)");
+		is($result, '\xffffff00', "LO contents after upgrade");
+		$result = $new->safe_psql('postgres',
+			"SELECT obj_description(4532, 'pg_largeobject')");
+		is($result, 'test', "comment on LO after pg_upgrade");
+
+		$result = $new->safe_psql('postgres', "SELECT lo_get(4533)");
+		is($result, '\x0f0f0f0f', "LO contents after upgrade");
+		$result = $new->safe_psql('postgres',
+			"SELECT lomowner::regrole FROM pg_largeobject_metadata WHERE oid = 4533");
+		is($result, 'regress_lo_1', "LO owner after upgrade");
+		$result = $new->safe_psql('postgres',
+			"SELECT lomacl FROM pg_largeobject_metadata WHERE oid = 4533");
+		is($result, '{regress_lo_1=rw/regress_lo_1,regress_lo_2=r/regress_lo_1}',
+			"LO ACL after upgrade");
+
+		if ($test_seclabel)
+		{
+			$result = $new->safe_psql('postgres', "SELECT lo_get(4534)");
+			is($result, '\x00ffffff', "LO contents after upgrade");
+			$result = $new->safe_psql('postgres', q|
+				SELECT label FROM pg_seclabel WHERE objoid = 4534
+				AND classoid = 'pg_largeobject'::regclass
+			|);
+			is($result, 'classified', "seclabel on LO after pg_upgrade");
+		}
 		$new->stop;
 	}
 

From 9af672bcb245950e58198119ba6eb17043fd3a6d Mon Sep 17 00:00:00 2001
From: Jeff Davis <jdavis@postgresql.org>
Date: Mon, 8 Sep 2025 12:29:42 -0700
Subject: [PATCH 27/73] meson: build checksums with extra optimization flags.

Use -funroll-loops and -ftree-vectorize when building checksum.c to
match what autoconf does.

Discussion: https://postgr.es/m/a81f2f7ef34afc24a89c613671ea017e3651329c.camel@j-davis.com
Reviewed-by: Andres Freund <andres@anarazel.de>
---
 src/backend/storage/page/meson.build | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/backend/storage/page/meson.build b/src/backend/storage/page/meson.build
index c3e4a805862a9..112f00ff36552 100644
--- a/src/backend/storage/page/meson.build
+++ b/src/backend/storage/page/meson.build
@@ -1,7 +1,15 @@
 # Copyright (c) 2022-2025, PostgreSQL Global Development Group
 
+checksum_backend_lib = static_library('checksum_backend_lib',
+  'checksum.c',
+  dependencies: backend_build_deps,
+  kwargs: internal_lib_args,
+  c_args: vectorize_cflags + unroll_loops_cflags,
+)
+
+backend_link_with += checksum_backend_lib
+
 backend_sources += files(
   'bufpage.c',
-  'checksum.c',
   'itemptr.c',
 )

From 8ec97e78a7713a1ebf4976b55c19f6c9bc2716d9 Mon Sep 17 00:00:00 2001
From: Melanie Plageman <melanieplageman@gmail.com>
Date: Mon, 8 Sep 2025 17:13:31 -0400
Subject: [PATCH 28/73] Add error codes when vacuum discovers VM corruption

Commit fd6ec93bf890314a and other previous work established the
principle that when an error is potentially reachable in case of on-disk
corruption but is not expected to be reached otherwise,
ERRCODE_DATA_CORRUPTED should be used. This allows log monitoring
software to search for evidence of corruption by filtering on the error
code.

Enhance the existing log messages emitted when the heap page is found to
be inconsistent with the VM by adding this error code.

Suggested-by: Andrey Borodin <x4mmm@yandex-team.ru>
Author: Melanie Plageman <melanieplageman@gmail.com>
Reviewed-by: Robert Haas <robertmhaas@gmail.com>
Discussion: https://postgr.es/m/87DD95AA-274F-4F4F-BAD9-7738E5B1F905%40yandex-team.ru
---
 src/backend/access/heap/vacuumlazy.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c
index 932701d8420dc..981d9380a925c 100644
--- a/src/backend/access/heap/vacuumlazy.c
+++ b/src/backend/access/heap/vacuumlazy.c
@@ -2121,8 +2121,11 @@ lazy_scan_prune(LVRelState *vacrel,
 	else if (all_visible_according_to_vm && !PageIsAllVisible(page) &&
 			 visibilitymap_get_status(vacrel->rel, blkno, &vmbuffer) != 0)
 	{
-		elog(WARNING, "page is not marked all-visible but visibility map bit is set in relation \"%s\" page %u",
-			 vacrel->relname, blkno);
+		ereport(WARNING,
+				(errcode(ERRCODE_DATA_CORRUPTED),
+				 errmsg("page is not marked all-visible but visibility map bit is set in relation \"%s\" page %u",
+						vacrel->relname, blkno)));
+
 		visibilitymap_clear(vacrel->rel, blkno, vmbuffer,
 							VISIBILITYMAP_VALID_BITS);
 	}
@@ -2143,8 +2146,11 @@ lazy_scan_prune(LVRelState *vacrel,
 	 */
 	else if (presult.lpdead_items > 0 && PageIsAllVisible(page))
 	{
-		elog(WARNING, "page containing LP_DEAD items is marked as all-visible in relation \"%s\" page %u",
-			 vacrel->relname, blkno);
+		ereport(WARNING,
+				(errcode(ERRCODE_DATA_CORRUPTED),
+				 errmsg("page containing LP_DEAD items is marked as all-visible in relation \"%s\" page %u",
+						vacrel->relname, blkno)));
+
 		PageClearAllVisible(page);
 		MarkBufferDirty(buf);
 		visibilitymap_clear(vacrel->rel, blkno, vmbuffer,

From 5ac3c1ac22cb325844d0bee37f79f2c11931b32e Mon Sep 17 00:00:00 2001
From: Amit Kapila <akapila@postgresql.org>
Date: Tue, 9 Sep 2025 03:18:22 +0000
Subject: [PATCH 29/73] Fix Coverity issue reported in commit a850be2fe.

Address a potential SIGSEGV that may occur when the tablesync worker
attempts to locate a deleted row while applying changes. This situation
arises during conflict detection for update-deleted scenarios.

To prevent this crash, ensure that the operation is errored out early if
the leader apply worker is unavailable. Since the leader worker maintains
the necessary conflict detection metadata, proceeding without it serves no
purpose and risks reporting incorrect conflict type.

In the passing, improve a nearby comment.

Reported by Tom Lane as per Coverity
Author: shveta malik <shveta.malik@gmail.com>
Reviewed-by: Amit Kapila <amit.kapila16@gmail.com>
Discussion: https://postgr.es/m/334468.1757280992@sss.pgh.pa.us
---
 src/backend/replication/logical/worker.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c
index c0f6bef5c282c..b3cac1023731a 100644
--- a/src/backend/replication/logical/worker.c
+++ b/src/backend/replication/logical/worker.c
@@ -3266,12 +3266,18 @@ FindDeletedTupleInLocalRel(Relation localrel, Oid localidxoid,
 
 		/*
 		 * Obtain the information from the leader apply worker as only the
-		 * leader manages conflict retention (see
+		 * leader manages oldest_nonremovable_xid (see
 		 * maybe_advance_nonremovable_xid() for details).
 		 */
 		LWLockAcquire(LogicalRepWorkerLock, LW_SHARED);
 		leader = logicalrep_worker_find(MyLogicalRepWorker->subid,
 										InvalidOid, false);
+		if (!leader)
+		{
+			ereport(ERROR,
+					(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+					 errmsg("could not detect conflict as the leader apply worker has exited")));
+		}
 
 		SpinLockAcquire(&leader->relmutex);
 		oldestxmin = leader->oldest_nonremovable_xid;

From faf071b553830d39fc583beabcaf56ed65259acc Mon Sep 17 00:00:00 2001
From: Dean Rasheed <dean.a.rasheed@gmail.com>
Date: Tue, 9 Sep 2025 10:39:30 +0100
Subject: [PATCH 30/73] Add date and timestamp variants of random(min, max).

This adds 3 new variants of the random() function:

    random(min date, max date) returns date
    random(min timestamp, max timestamp) returns timestamp
    random(min timestamptz, max timestamptz) returns timestamptz

Each returns a random value x in the range min <= x <= max.

Author: Damien Clochard <damien@dalibo.info>
Reviewed-by: Greg Sabino Mullane <htamfids@gmail.com>
Reviewed-by: Dean Rasheed <dean.a.rasheed@gmail.com>
Reviewed-by: Vik Fearing <vik@postgresfriends.org>
Reviewed-by: Chao Li <li.evan.chao@gmail.com>
Discussion: https://postgr.es/m/f524d8cab5914613d9e624d9ce177d3d@dalibo.info
---
 doc/src/sgml/func/func-datetime.sgml      |  30 +++++++
 doc/src/sgml/func/func-math.sgml          |   3 +-
 src/backend/utils/adt/pseudorandomfuncs.c | 104 ++++++++++++++++++++--
 src/include/catalog/catversion.h          |   2 +-
 src/include/catalog/pg_proc.dat           |  12 +++
 src/test/regress/expected/random.out      |  87 ++++++++++++++++++
 src/test/regress/sql/random.sql           |  26 ++++++
 7 files changed, 254 insertions(+), 10 deletions(-)

diff --git a/doc/src/sgml/func/func-datetime.sgml b/doc/src/sgml/func/func-datetime.sgml
index 482fe45f42ebc..98dd60aa9a7ec 100644
--- a/doc/src/sgml/func/func-datetime.sgml
+++ b/doc/src/sgml/func/func-datetime.sgml
@@ -928,6 +928,36 @@
         </para></entry>
        </row>
 
+       <row>
+        <entry role="func_table_entry"><para role="func_signature">
+         <indexterm>
+          <primary>random</primary>
+         </indexterm>
+         <function>random</function> ( <parameter>min</parameter> <type>date</type>, <parameter>max</parameter> <type>date</type> )
+         <returnvalue>date</returnvalue>
+       </para>
+       <para role="func_signature">
+        <function>random</function> ( <parameter>min</parameter> <type>timestamp</type>, <parameter>max</parameter> <type>timestamp</type> )
+        <returnvalue>timestamp</returnvalue>
+       </para>
+       <para role="func_signature">
+        <function>random</function> ( <parameter>min</parameter> <type>timestamptz</type>, <parameter>max</parameter> <type>timestamptz</type> )
+        <returnvalue>timestamptz</returnvalue>
+       </para>
+       <para>
+        Returns a random value in the range
+        <parameter>min</parameter> &lt;= x &lt;= <parameter>max</parameter>.
+       </para>
+       <para>
+        <literal>random('1979-02-08'::date,'2025-07-03'::date)</literal>
+        <returnvalue>1983-04-21</returnvalue>
+       </para>
+       <para>
+        <literal>random('2000-01-01'::timestamptz, now())</literal>
+        <returnvalue>2015-09-27 09:11:33.732707+00</returnvalue>
+       </para></entry>
+      </row>
+
        <row>
         <entry role="func_table_entry"><para role="func_signature">
          <indexterm>
diff --git a/doc/src/sgml/func/func-math.sgml b/doc/src/sgml/func/func-math.sgml
index 7528dc4cea4b9..fd821c0e70677 100644
--- a/doc/src/sgml/func/func-math.sgml
+++ b/doc/src/sgml/func/func-math.sgml
@@ -1151,7 +1151,8 @@
 
   <para>
    The <function>random()</function> and <function>random_normal()</function>
-   functions listed in <xref linkend="functions-math-random-table"/> use a
+   functions listed in <xref linkend="functions-math-random-table"/> and
+   <xref linkend="functions-datetime-table"/> use a
    deterministic pseudo-random number generator.
    It is fast but not suitable for cryptographic
    applications; see the <xref linkend="pgcrypto"/> module for a more
diff --git a/src/backend/utils/adt/pseudorandomfuncs.c b/src/backend/utils/adt/pseudorandomfuncs.c
index e7b8045f92508..1d2a981491bf5 100644
--- a/src/backend/utils/adt/pseudorandomfuncs.c
+++ b/src/backend/utils/adt/pseudorandomfuncs.c
@@ -17,6 +17,7 @@
 
 #include "common/pg_prng.h"
 #include "miscadmin.h"
+#include "utils/date.h"
 #include "utils/fmgrprotos.h"
 #include "utils/numeric.h"
 #include "utils/timestamp.h"
@@ -25,6 +26,18 @@
 static pg_prng_state prng_state;
 static bool prng_seed_set = false;
 
+/*
+ * Macro for checking the range bounds of random(min, max) functions. Throws
+ * an error if they're the wrong way round.
+ */
+#define CHECK_RANGE_BOUNDS(rmin, rmax) \
+	do { \
+		if ((rmin) > (rmax)) \
+			ereport(ERROR, \
+					errcode(ERRCODE_INVALID_PARAMETER_VALUE), \
+					errmsg("lower bound must be less than or equal to upper bound")); \
+	} while (0)
+
 /*
  * initialize_prng() -
  *
@@ -129,10 +142,7 @@ int4random(PG_FUNCTION_ARGS)
 	int32		rmax = PG_GETARG_INT32(1);
 	int32		result;
 
-	if (rmin > rmax)
-		ereport(ERROR,
-				errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-				errmsg("lower bound must be less than or equal to upper bound"));
+	CHECK_RANGE_BOUNDS(rmin, rmax);
 
 	initialize_prng();
 
@@ -153,10 +163,7 @@ int8random(PG_FUNCTION_ARGS)
 	int64		rmax = PG_GETARG_INT64(1);
 	int64		result;
 
-	if (rmin > rmax)
-		ereport(ERROR,
-				errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-				errmsg("lower bound must be less than or equal to upper bound"));
+	CHECK_RANGE_BOUNDS(rmin, rmax);
 
 	initialize_prng();
 
@@ -177,9 +184,90 @@ numeric_random(PG_FUNCTION_ARGS)
 	Numeric		rmax = PG_GETARG_NUMERIC(1);
 	Numeric		result;
 
+	/* Leave range bound checking to random_numeric() */
+
 	initialize_prng();
 
 	result = random_numeric(&prng_state, rmin, rmax);
 
 	PG_RETURN_NUMERIC(result);
 }
+
+
+/*
+ * date_random() -
+ *
+ *	Returns a random date chosen uniformly in the specified range.
+ */
+Datum
+date_random(PG_FUNCTION_ARGS)
+{
+	int32		rmin = (int32) PG_GETARG_DATEADT(0);
+	int32		rmax = (int32) PG_GETARG_DATEADT(1);
+	DateADT		result;
+
+	CHECK_RANGE_BOUNDS(rmin, rmax);
+
+	if (DATE_IS_NOBEGIN(rmin) || DATE_IS_NOEND(rmax))
+		ereport(ERROR,
+				errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				errmsg("lower and upper bounds must be finite"));
+
+	initialize_prng();
+
+	result = (DateADT) pg_prng_int64_range(&prng_state, rmin, rmax);
+
+	PG_RETURN_DATEADT(result);
+}
+
+/*
+ * timestamp_random() -
+ *
+ *	Returns a random timestamp chosen uniformly in the specified range.
+ */
+Datum
+timestamp_random(PG_FUNCTION_ARGS)
+{
+	int64		rmin = (int64) PG_GETARG_TIMESTAMP(0);
+	int64		rmax = (int64) PG_GETARG_TIMESTAMP(1);
+	Timestamp	result;
+
+	CHECK_RANGE_BOUNDS(rmin, rmax);
+
+	if (TIMESTAMP_IS_NOBEGIN(rmin) || TIMESTAMP_IS_NOEND(rmax))
+		ereport(ERROR,
+				errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				errmsg("lower and upper bounds must be finite"));
+
+	initialize_prng();
+
+	result = (Timestamp) pg_prng_int64_range(&prng_state, rmin, rmax);
+
+	PG_RETURN_TIMESTAMP(result);
+}
+
+/*
+ * timestamptz_random() -
+ *
+ *	Returns a random timestamptz chosen uniformly in the specified range.
+ */
+Datum
+timestamptz_random(PG_FUNCTION_ARGS)
+{
+	int64		rmin = (int64) PG_GETARG_TIMESTAMPTZ(0);
+	int64		rmax = (int64) PG_GETARG_TIMESTAMPTZ(1);
+	TimestampTz result;
+
+	CHECK_RANGE_BOUNDS(rmin, rmax);
+
+	if (TIMESTAMP_IS_NOBEGIN(rmin) || TIMESTAMP_IS_NOEND(rmax))
+		ereport(ERROR,
+				errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				errmsg("lower and upper bounds must be finite"));
+
+	initialize_prng();
+
+	result = (TimestampTz) pg_prng_int64_range(&prng_state, rmin, rmax);
+
+	PG_RETURN_TIMESTAMPTZ(result);
+}
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index 836369f163ef5..ef0d0f92165eb 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -57,6 +57,6 @@
  */
 
 /*							yyyymmddN */
-#define CATALOG_VERSION_NO	202509021
+#define CATALOG_VERSION_NO	202509091
 
 #endif
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 118d6da1ace0e..03e82d28c8767 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -3503,6 +3503,18 @@
   proname => 'random', provolatile => 'v', proparallel => 'r',
   prorettype => 'numeric', proargtypes => 'numeric numeric',
   proargnames => '{min,max}', prosrc => 'numeric_random' },
+{ oid => '6431', descr => 'random date in range',
+  proname => 'random', provolatile => 'v', proparallel => 'r',
+  prorettype => 'date', proargtypes => 'date date',
+  proargnames => '{min,max}', prosrc => 'date_random' },
+{ oid => '6432', descr => 'random timestamp in range',
+  proname => 'random', provolatile => 'v', proparallel => 'r',
+  prorettype => 'timestamp', proargtypes => 'timestamp timestamp',
+  proargnames => '{min,max}', prosrc => 'timestamp_random' },
+{ oid => '6433', descr => 'random timestamptz in range',
+  proname => 'random', provolatile => 'v', proparallel => 'r',
+  prorettype => 'timestamptz', proargtypes => 'timestamptz timestamptz',
+  proargnames => '{min,max}', prosrc => 'timestamptz_random' },
 { oid => '1599', descr => 'set random seed',
   proname => 'setseed', provolatile => 'v', proparallel => 'r',
   prorettype => 'void', proargtypes => 'float8', prosrc => 'setseed' },
diff --git a/src/test/regress/expected/random.out b/src/test/regress/expected/random.out
index 43cf88a36341b..7f17b2a1b12f8 100644
--- a/src/test/regress/expected/random.out
+++ b/src/test/regress/expected/random.out
@@ -536,3 +536,90 @@ SELECT n, random(0, trim_scale(abs(1 - 10.0^(-n)))) FROM generate_series(-20, 20
   20 | 0.60795101234744211935
 (41 rows)
 
+-- random dates
+SELECT random('1979-02-08'::date,'2025-07-03'::date) AS random_date_multiple_years;
+ random_date_multiple_years 
+----------------------------
+ 04-09-1986
+(1 row)
+
+SELECT random('4714-11-24 BC'::date,'5874897-12-31 AD'::date) AS random_date_maximum_range;
+ random_date_maximum_range 
+---------------------------
+ 10-02-2898131
+(1 row)
+
+SELECT random('1979-02-08'::date,'1979-02-08'::date) AS random_date_empty_range;
+ random_date_empty_range 
+-------------------------
+ 02-08-1979
+(1 row)
+
+SELECT random('2024-12-31'::date, '2024-01-01'::date); -- fail
+ERROR:  lower bound must be less than or equal to upper bound
+SELECT random('-infinity'::date, '2024-01-01'::date); -- fail
+ERROR:  lower and upper bounds must be finite
+SELECT random('2024-12-31'::date, 'infinity'::date); -- fail
+ERROR:  lower and upper bounds must be finite
+-- random timestamps
+SELECT random('1979-02-08'::timestamp,'2025-07-03'::timestamp) AS random_timestamp_multiple_years;
+ random_timestamp_multiple_years 
+---------------------------------
+ Fri Jan 27 18:52:05.366009 2017
+(1 row)
+
+SELECT random('4714-11-24 BC'::timestamp,'294276-12-31 23:59:59.999999'::timestamp) AS random_timestamp_maximum_range;
+  random_timestamp_maximum_range   
+-----------------------------------
+ Wed Mar 28 00:45:36.180395 226694
+(1 row)
+
+SELECT random('2024-07-01 12:00:00.000001'::timestamp, '2024-07-01 12:00:00.999999'::timestamp) AS random_narrow_range;
+       random_narrow_range       
+---------------------------------
+ Mon Jul 01 12:00:00.999286 2024
+(1 row)
+
+SELECT random('1979-02-08'::timestamp,'1979-02-08'::timestamp) AS random_timestamp_empty_range;
+ random_timestamp_empty_range 
+------------------------------
+ Thu Feb 08 00:00:00 1979
+(1 row)
+
+SELECT random('2024-12-31'::timestamp, '2024-01-01'::timestamp); -- fail
+ERROR:  lower bound must be less than or equal to upper bound
+SELECT random('-infinity'::timestamp, '2024-01-01'::timestamp); -- fail
+ERROR:  lower and upper bounds must be finite
+SELECT random('2024-12-31'::timestamp, 'infinity'::timestamp); -- fail
+ERROR:  lower and upper bounds must be finite
+-- random timestamps with timezone
+SELECT random('1979-02-08 +01'::timestamptz,'2025-07-03 +02'::timestamptz) AS random_timestamptz_multiple_years;
+  random_timestamptz_multiple_years  
+-------------------------------------
+ Tue Jun 14 04:41:16.652896 2016 PDT
+(1 row)
+
+SELECT random('4714-11-24 BC +00'::timestamptz,'294276-12-31 23:59:59.999999 +00'::timestamptz) AS random_timestamptz_maximum_range;
+   random_timestamptz_maximum_range   
+--------------------------------------
+ Wed Mar 26 14:07:16.980265 31603 PDT
+(1 row)
+
+SELECT random('2024-07-01 12:00:00.000001 +04'::timestamptz, '2024-07-01 12:00:00.999999 +04'::timestamptz) AS random_timestamptz_narrow_range;
+   random_timestamptz_narrow_range   
+-------------------------------------
+ Mon Jul 01 01:00:00.835808 2024 PDT
+(1 row)
+
+SELECT random('1979-02-08 +05'::timestamptz,'1979-02-08 +05'::timestamptz) AS random_timestamptz_empty_range;
+ random_timestamptz_empty_range 
+--------------------------------
+ Wed Feb 07 11:00:00 1979 PST
+(1 row)
+
+SELECT random('2024-01-01 +06'::timestamptz, '2024-01-01 +07'::timestamptz); -- fail
+ERROR:  lower bound must be less than or equal to upper bound
+SELECT random('-infinity'::timestamptz, '2024-01-01 +07'::timestamptz); -- fail
+ERROR:  lower and upper bounds must be finite
+SELECT random('2024-01-01 +06'::timestamptz, 'infinity'::timestamptz); -- fail
+ERROR:  lower and upper bounds must be finite
diff --git a/src/test/regress/sql/random.sql b/src/test/regress/sql/random.sql
index ebfa7539ede25..890f14687ef98 100644
--- a/src/test/regress/sql/random.sql
+++ b/src/test/regress/sql/random.sql
@@ -277,3 +277,29 @@ SELECT random(-1e30, 1e30) FROM generate_series(1, 10);
 SELECT random(-0.4, 0.4) FROM generate_series(1, 10);
 SELECT random(0, 1 - 1e-30) FROM generate_series(1, 10);
 SELECT n, random(0, trim_scale(abs(1 - 10.0^(-n)))) FROM generate_series(-20, 20) n;
+
+-- random dates
+SELECT random('1979-02-08'::date,'2025-07-03'::date) AS random_date_multiple_years;
+SELECT random('4714-11-24 BC'::date,'5874897-12-31 AD'::date) AS random_date_maximum_range;
+SELECT random('1979-02-08'::date,'1979-02-08'::date) AS random_date_empty_range;
+SELECT random('2024-12-31'::date, '2024-01-01'::date); -- fail
+SELECT random('-infinity'::date, '2024-01-01'::date); -- fail
+SELECT random('2024-12-31'::date, 'infinity'::date); -- fail
+
+-- random timestamps
+SELECT random('1979-02-08'::timestamp,'2025-07-03'::timestamp) AS random_timestamp_multiple_years;
+SELECT random('4714-11-24 BC'::timestamp,'294276-12-31 23:59:59.999999'::timestamp) AS random_timestamp_maximum_range;
+SELECT random('2024-07-01 12:00:00.000001'::timestamp, '2024-07-01 12:00:00.999999'::timestamp) AS random_narrow_range;
+SELECT random('1979-02-08'::timestamp,'1979-02-08'::timestamp) AS random_timestamp_empty_range;
+SELECT random('2024-12-31'::timestamp, '2024-01-01'::timestamp); -- fail
+SELECT random('-infinity'::timestamp, '2024-01-01'::timestamp); -- fail
+SELECT random('2024-12-31'::timestamp, 'infinity'::timestamp); -- fail
+
+-- random timestamps with timezone
+SELECT random('1979-02-08 +01'::timestamptz,'2025-07-03 +02'::timestamptz) AS random_timestamptz_multiple_years;
+SELECT random('4714-11-24 BC +00'::timestamptz,'294276-12-31 23:59:59.999999 +00'::timestamptz) AS random_timestamptz_maximum_range;
+SELECT random('2024-07-01 12:00:00.000001 +04'::timestamptz, '2024-07-01 12:00:00.999999 +04'::timestamptz) AS random_timestamptz_narrow_range;
+SELECT random('1979-02-08 +05'::timestamptz,'1979-02-08 +05'::timestamptz) AS random_timestamptz_empty_range;
+SELECT random('2024-01-01 +06'::timestamptz, '2024-01-01 +07'::timestamptz); -- fail
+SELECT random('-infinity'::timestamptz, '2024-01-01 +07'::timestamptz); -- fail
+SELECT random('2024-01-01 +06'::timestamptz, 'infinity'::timestamptz); -- fail

From 81a61fde84ffc74f7b3c7854ed4193cc4d31f78b Mon Sep 17 00:00:00 2001
From: Peter Eisentraut <peter@eisentraut.org>
Date: Tue, 9 Sep 2025 15:33:46 +0200
Subject: [PATCH 31/73] Fix typo in comment

Author: Alexandra Wang <alexandra.wang.oss@gmail.com>
Discussion: https://www.postgresql.org/message-id/CAK98qZ0whQ%3Dc%2BJGXbGSEBxCtLgy6sf-YGYqsKTAGsS-wt0wj%2BA%40mail.gmail.com
---
 src/backend/utils/adt/jsonbsubs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/backend/utils/adt/jsonbsubs.c b/src/backend/utils/adt/jsonbsubs.c
index de64d49851251..e8626d3b4fc6e 100644
--- a/src/backend/utils/adt/jsonbsubs.c
+++ b/src/backend/utils/adt/jsonbsubs.c
@@ -51,7 +51,7 @@ jsonb_subscript_transform(SubscriptingRef *sbsref,
 
 	/*
 	 * Transform and convert the subscript expressions. Jsonb subscripting
-	 * does not support slices, look only and the upper index.
+	 * does not support slices, look only at the upper index.
 	 */
 	foreach(idx, indirection)
 	{

From 530cfa8eb50ca5a2151dfc50a6a5999ec8aff148 Mon Sep 17 00:00:00 2001
From: Nathan Bossart <nathan@postgresql.org>
Date: Tue, 9 Sep 2025 14:09:36 -0500
Subject: [PATCH 32/73] test_slru: Fix LWLock tranche allocation in
 EXEC_BACKEND builds.

Currently, test_slru's shmem_startup_hook unconditionally generates
new LWLock tranche IDs.  This is fine on non-EXEC_BACKEND builds,
where only the postmaster executes this hook, but on EXEC_BACKEND
builds, every backend executes it, too.  To fix, only generate the
tranche IDs in the postmaster process by checking the
IsUnderPostmaster variable.

This is arguably a bug fix and could be back-patched, but since the
damage is limited to some extra unused tranche IDs in a test
module, I'm not going to bother.

Reported-by: Sami Imseih <samimseih@gmail.com>
Reviewed-by: Sami Imseih <samimseih@gmail.com>
Discussion: https://postgr.es/m/CAA5RZ0vaAuonaf12CeDddQJu5xKL%2B6xVyS%2B_q1%2BcH%3D33JXV82w%40mail.gmail.com
---
 src/test/modules/test_slru/test_slru.c | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/src/test/modules/test_slru/test_slru.c b/src/test/modules/test_slru/test_slru.c
index 8c0367eeee424..e963466aef1cd 100644
--- a/src/test/modules/test_slru/test_slru.c
+++ b/src/test/modules/test_slru/test_slru.c
@@ -219,8 +219,8 @@ test_slru_shmem_startup(void)
 	 */
 	const bool	long_segment_names = true;
 	const char	slru_dir_name[] = "pg_test_slru";
-	int			test_tranche_id;
-	int			test_buffer_tranche_id;
+	int			test_tranche_id = -1;
+	int			test_buffer_tranche_id = -1;
 
 	if (prev_shmem_startup_hook)
 		prev_shmem_startup_hook();
@@ -231,10 +231,18 @@ test_slru_shmem_startup(void)
 	 */
 	(void) MakePGDirectory(slru_dir_name);
 
-	/* initialize the SLRU facility */
-	test_tranche_id = LWLockNewTrancheId("test_slru_tranche");
-
-	test_buffer_tranche_id = LWLockNewTrancheId("test_buffer_tranche");
+	/*
+	 * Initialize the SLRU facility.  In EXEC_BACKEND builds, the
+	 * shmem_startup_hook is called in the postmaster and in each backend, but
+	 * we only need to generate the LWLock tranches once.  Note that these
+	 * tranche ID variables are not used by SimpleLruInit() when
+	 * IsUnderPostmaster is true.
+	 */
+	if (!IsUnderPostmaster)
+	{
+		test_tranche_id = LWLockNewTrancheId("test_slru_tranche");
+		test_buffer_tranche_id = LWLockNewTrancheId("test_buffer_tranche");
+	}
 
 	TestSlruCtl->PagePrecedes = test_slru_page_precedes_logically;
 	SimpleLruInit(TestSlruCtl, "TestSLRU",

From d96c854dfc634212193007ca58f8978bc272d457 Mon Sep 17 00:00:00 2001
From: Nathan Bossart <nathan@postgresql.org>
Date: Tue, 9 Sep 2025 14:35:30 -0500
Subject: [PATCH 33/73] Fix documentation for shmem_startup_hook.

This section claims that each backend executes the
shmem_startup_hook shortly after attaching to shared memory, which
is true for EXEC_BACKEND builds, but not for others.  This commit
adds this important detail.

Oversight in commit 964152c476.

Reported-by: Sami Imseih <samimseih@gmail.com>
Reviewed-by: Sami Imseih <samimseih@gmail.com>
Discussion: https://postgr.es/m/CAA5RZ0vEGT1eigGbVt604LkXP6mUPMwPMxQoRCbFny44w%2B9EUQ%40mail.gmail.com
Backpatch-through: 17
---
 doc/src/sgml/xfunc.sgml | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/doc/src/sgml/xfunc.sgml b/doc/src/sgml/xfunc.sgml
index da21ef5689184..04bf919b34384 100644
--- a/doc/src/sgml/xfunc.sgml
+++ b/doc/src/sgml/xfunc.sgml
@@ -3668,11 +3668,14 @@ LWLockRelease(AddinShmemInitLock);
 </programlisting>
       <literal>shmem_startup_hook</literal> provides a convenient place for the
       initialization code, but it is not strictly required that all such code
-      be placed in this hook.  Each backend will execute the registered
-      <literal>shmem_startup_hook</literal> shortly after it attaches to shared
-      memory.  Note that add-ins should still acquire
+      be placed in this hook.  On Windows (and anywhere else where
+      <literal>EXEC_BACKEND</literal> is defined), each backend executes the
+      registered <literal>shmem_startup_hook</literal> shortly after it
+      attaches to shared memory, so add-ins should still acquire
       <function>AddinShmemInitLock</function> within this hook, as shown in the
-      example above.
+      example above.  On other platforms, only the postmaster process executes
+      the <literal>shmem_startup_hook</literal>, and each backend automatically
+      inherits the pointers to shared memory.
      </para>
 
      <para>

From 8c8f7b199d9095dbc2e101a4614043b5ae13bde3 Mon Sep 17 00:00:00 2001
From: Michael Paquier <michael@paquier.xyz>
Date: Wed, 10 Sep 2025 07:23:05 +0900
Subject: [PATCH 34/73] Fix leak with SMgrRelations in startup process

The startup process does not process shared invalidation messages, only
sending them, and never calls AtEOXact_SMgr() which clean up any
unpinned SMgrRelations.  Hence, it is never able to free SMgrRelations
on a periodic basis, bloating its hashtable over time.

Like the checkpointer and the bgwriter, this commit takes a conservative
approach by freeing periodically SMgrRelations when replaying a
checkpoint record, either online or shutdown, so as the startup process
has a way to perform a periodic cleanup.

Issue caused by 21d9c3ee4ef7, so backpatch down to v17.

Author: Jingtang Zhang <mrdrivingduck@gmail.com>
Reviewed-by: Yuhang Qiu <iamqyh@gmail.com>
Discussion: https://postgr.es/m/28C687D4-F335-417E-B06C-6612A0BD5A10@gmail.com
Backpatch-through: 17
---
 src/backend/access/transam/xlog.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 7ffb217915190..0baf0ac6160af 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -8385,6 +8385,14 @@ xlog_redo(XLogReaderState *record)
 							checkPoint.ThisTimeLineID, replayTLI)));
 
 		RecoveryRestartPoint(&checkPoint, record);
+
+		/*
+		 * After replaying a checkpoint record, free all smgr objects.
+		 * Otherwise we would never do so for dropped relations, as the
+		 * startup does not process shared invalidation messages or call
+		 * AtEOXact_SMgr().
+		 */
+		smgrdestroyall();
 	}
 	else if (info == XLOG_CHECKPOINT_ONLINE)
 	{
@@ -8438,6 +8446,14 @@ xlog_redo(XLogReaderState *record)
 							checkPoint.ThisTimeLineID, replayTLI)));
 
 		RecoveryRestartPoint(&checkPoint, record);
+
+		/*
+		 * After replaying a checkpoint record, free all smgr objects.
+		 * Otherwise we would never do so for dropped relations, as the
+		 * startup does not process shared invalidation messages or call
+		 * AtEOXact_SMgr().
+		 */
+		smgrdestroyall();
 	}
 	else if (info == XLOG_OVERWRITE_CONTRECORD)
 	{

From b1187266e077265cb061cbedd502e94179dc7b21 Mon Sep 17 00:00:00 2001
From: Michael Paquier <michael@paquier.xyz>
Date: Wed, 10 Sep 2025 11:20:46 +0900
Subject: [PATCH 35/73] Replace callers of dynahash.h's my_log() by equivalent
 in pg_bitutils.h

All the calls replaced by this commit use 4-byte integers for their
variables used in input of my_log2().  Hence, the limit against
too-large inputs does not really apply.  Thresholds are also applied, as
of:
- In nodeAgg.c, the number of partitions is limited by
HASHAGG_MAX_PARTITIONS.
- In nodeHash.c, ExecChooseHashTableSize() caps its maximum number of
buckets based on HashJoinTuple and palloc() allocation limit.
- In worker.c, the number of subxacts tracked by ApplySubXactData uses
uint32, making pg_ceil_log2_64() safe to use directly.

Several approaches have been discussed, like an integration with
thresholds in pg_bitutils.h, but it was found confusing.  This uses
Dean's idea, which gives a simpler result than what I came up with to be
able to remove dynahash.h.  dynahash.h will be removed in a follow-up
commit, removing some duplication with the ceil log2 routines.

Reviewed-by: Peter Eisentraut <peter@eisentraut.org>
Reviewed-by: Dean Rasheed <dean.a.rasheed@gmail.com>
Discussion: https://postgr.es/m/CAEZATCUJPQD_7sC-wErak2CQGNa6bj2hY-mr8wsBki=kX7f2_A@mail.gmail.com
---
 src/backend/executor/nodeAgg.c           | 3 +--
 src/backend/executor/nodeHash.c          | 7 +++----
 src/backend/replication/logical/worker.c | 3 +--
 3 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c
index 377e016d73225..a4f3d30f307cc 100644
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -267,7 +267,6 @@
 #include "utils/acl.h"
 #include "utils/builtins.h"
 #include "utils/datum.h"
-#include "utils/dynahash.h"
 #include "utils/expandeddatum.h"
 #include "utils/injection_point.h"
 #include "utils/logtape.h"
@@ -2115,7 +2114,7 @@ hash_choose_num_partitions(double input_groups, double hashentrysize,
 	npartitions = (int) dpartitions;
 
 	/* ceil(log2(npartitions)) */
-	partition_bits = my_log2(npartitions);
+	partition_bits = pg_ceil_log2_32(npartitions);
 
 	/* make sure that we don't exhaust the hash bits */
 	if (partition_bits + used_bits >= 32)
diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c
index 8d2201ab67fa5..a3415db4e20f5 100644
--- a/src/backend/executor/nodeHash.c
+++ b/src/backend/executor/nodeHash.c
@@ -36,7 +36,6 @@
 #include "executor/nodeHashjoin.h"
 #include "miscadmin.h"
 #include "port/pg_bitutils.h"
-#include "utils/dynahash.h"
 #include "utils/lsyscache.h"
 #include "utils/memutils.h"
 #include "utils/syscache.h"
@@ -340,7 +339,7 @@ MultiExecParallelHash(HashState *node)
 	 */
 	hashtable->curbatch = -1;
 	hashtable->nbuckets = pstate->nbuckets;
-	hashtable->log2_nbuckets = my_log2(hashtable->nbuckets);
+	hashtable->log2_nbuckets = pg_ceil_log2_32(hashtable->nbuckets);
 	hashtable->totalTuples = pstate->total_tuples;
 
 	/*
@@ -480,7 +479,7 @@ ExecHashTableCreate(HashState *state)
 							&nbuckets, &nbatch, &num_skew_mcvs);
 
 	/* nbuckets must be a power of 2 */
-	log2_nbuckets = my_log2(nbuckets);
+	log2_nbuckets = pg_ceil_log2_32(nbuckets);
 	Assert(nbuckets == (1 << log2_nbuckets));
 
 	/*
@@ -3499,7 +3498,7 @@ ExecParallelHashTableSetCurrentBatch(HashJoinTable hashtable, int batchno)
 		dsa_get_address(hashtable->area,
 						hashtable->batches[batchno].shared->buckets);
 	hashtable->nbuckets = hashtable->parallel_state->nbuckets;
-	hashtable->log2_nbuckets = my_log2(hashtable->nbuckets);
+	hashtable->log2_nbuckets = pg_ceil_log2_32(hashtable->nbuckets);
 	hashtable->current_chunk = NULL;
 	hashtable->current_chunk_shared = InvalidDsaPointer;
 	hashtable->batches[batchno].at_least_one_chunk = false;
diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c
index b3cac1023731a..ee6ac22329fdc 100644
--- a/src/backend/replication/logical/worker.c
+++ b/src/backend/replication/logical/worker.c
@@ -276,7 +276,6 @@
 #include "storage/procarray.h"
 #include "tcop/tcopprot.h"
 #include "utils/acl.h"
-#include "utils/dynahash.h"
 #include "utils/guc.h"
 #include "utils/inval.h"
 #include "utils/lsyscache.h"
@@ -5115,7 +5114,7 @@ subxact_info_read(Oid subid, TransactionId xid)
 	len = sizeof(SubXactInfo) * subxact_data.nsubxacts;
 
 	/* we keep the maximum as a power of 2 */
-	subxact_data.nsubxacts_max = 1 << my_log2(subxact_data.nsubxacts);
+	subxact_data.nsubxacts_max = 1 << pg_ceil_log2_32(subxact_data.nsubxacts);
 
 	/*
 	 * Allocate subxact information in the logical streaming context. We need

From e6da68a6e1d60a037b63a9c9ed36e5ef0a996769 Mon Sep 17 00:00:00 2001
From: Michael Paquier <michael@paquier.xyz>
Date: Wed, 10 Sep 2025 14:11:50 +0900
Subject: [PATCH 36/73] Remove dynahash.h

All the callers of my_log2() are now limited inside dynahash.c, so let's
remove this header.  The same capability is provided by pg_bitutils.h
already.

Discussion: https://postgr.es/m/CAEZATCUJPQD_7sC-wErak2CQGNa6bj2hY-mr8wsBki=kX7f2_A@mail.gmail.com
---
 src/backend/utils/hash/dynahash.c |  4 ++--
 src/include/utils/dynahash.h      | 20 --------------------
 2 files changed, 2 insertions(+), 22 deletions(-)
 delete mode 100644 src/include/utils/dynahash.h

diff --git a/src/backend/utils/hash/dynahash.c b/src/backend/utils/hash/dynahash.c
index 1aeee5be42acd..ac94b9e93c6e3 100644
--- a/src/backend/utils/hash/dynahash.c
+++ b/src/backend/utils/hash/dynahash.c
@@ -102,7 +102,6 @@
 #include "port/pg_bitutils.h"
 #include "storage/shmem.h"
 #include "storage/spin.h"
-#include "utils/dynahash.h"
 #include "utils/memutils.h"
 
 
@@ -281,6 +280,7 @@ static bool init_htab(HTAB *hashp, int64 nelem);
 pg_noreturn static void hash_corrupted(HTAB *hashp);
 static uint32 hash_initial_lookup(HTAB *hashp, uint32 hashvalue,
 								  HASHBUCKET **bucketptr);
+static int	my_log2(int64 num);
 static int64 next_pow2_int64(int64 num);
 static int	next_pow2_int(int64 num);
 static void register_seq_scan(HTAB *hashp);
@@ -1813,7 +1813,7 @@ hash_corrupted(HTAB *hashp)
 }
 
 /* calculate ceil(log base 2) of num */
-int
+static int
 my_log2(int64 num)
 {
 	/*
diff --git a/src/include/utils/dynahash.h b/src/include/utils/dynahash.h
deleted file mode 100644
index a4362d3f65e59..0000000000000
--- a/src/include/utils/dynahash.h
+++ /dev/null
@@ -1,20 +0,0 @@
-/*-------------------------------------------------------------------------
- *
- * dynahash.h
- *	  POSTGRES dynahash.h file definitions
- *
- *
- * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
- * Portions Copyright (c) 1994, Regents of the University of California
- *
- * IDENTIFICATION
- *		src/include/utils/dynahash.h
- *
- *-------------------------------------------------------------------------
- */
-#ifndef DYNAHASH_H
-#define DYNAHASH_H
-
-extern int	my_log2(int64 num);
-
-#endif							/* DYNAHASH_H */

From 33eec809402bfbf3eb0d01ad5b023d3d05fcb3bc Mon Sep 17 00:00:00 2001
From: Peter Eisentraut <peter@eisentraut.org>
Date: Wed, 10 Sep 2025 11:49:53 +0200
Subject: [PATCH 37/73] Fix CREATE TABLE LIKE with not-valid check constraint

In CREATE TABLE ... LIKE, any check constraints copied from the source
table should be set to valid if they are ENFORCED (the default).

Bug introduced in commit ca87c415e2f.

Author: jian he <jian.universality@gmail.com>
Discussion: https://www.postgresql.org/message-id/CACJufxH%3D%2Bod8Wy0P4L3_GpapNwLUP3oAes5UFRJ7yTxrM_M5kg%40mail.gmail.com
---
 src/backend/parser/parse_utilcmd.c              | 3 +--
 src/test/regress/expected/create_table_like.out | 8 ++++++++
 src/test/regress/sql/create_table_like.sql      | 1 +
 3 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/src/backend/parser/parse_utilcmd.c b/src/backend/parser/parse_utilcmd.c
index afcf54169c3b3..e96b38a59d503 100644
--- a/src/backend/parser/parse_utilcmd.c
+++ b/src/backend/parser/parse_utilcmd.c
@@ -1461,7 +1461,6 @@ expandTableLikeClause(RangeVar *heapRel, TableLikeClause *table_like_clause)
 			char	   *ccname = constr->check[ccnum].ccname;
 			char	   *ccbin = constr->check[ccnum].ccbin;
 			bool		ccenforced = constr->check[ccnum].ccenforced;
-			bool		ccvalid = constr->check[ccnum].ccvalid;
 			bool		ccnoinherit = constr->check[ccnum].ccnoinherit;
 			Node	   *ccbin_node;
 			bool		found_whole_row;
@@ -1492,7 +1491,7 @@ expandTableLikeClause(RangeVar *heapRel, TableLikeClause *table_like_clause)
 			n->conname = pstrdup(ccname);
 			n->location = -1;
 			n->is_enforced = ccenforced;
-			n->initially_valid = ccvalid;
+			n->initially_valid = ccenforced;	/* sic */
 			n->is_no_inherit = ccnoinherit;
 			n->raw_expr = NULL;
 			n->cooked_expr = nodeToString(ccbin_node);
diff --git a/src/test/regress/expected/create_table_like.out b/src/test/regress/expected/create_table_like.out
index 29a779c2e9072..d3c35c148475d 100644
--- a/src/test/regress/expected/create_table_like.out
+++ b/src/test/regress/expected/create_table_like.out
@@ -320,6 +320,7 @@ DROP TABLE inhz;
 -- including storage and comments
 CREATE TABLE ctlt1 (a text CHECK (length(a) > 2) ENFORCED PRIMARY KEY,
 	b text CHECK (length(b) > 100) NOT ENFORCED);
+ALTER TABLE ctlt1 ADD CONSTRAINT cc CHECK (length(b) > 100) NOT VALID;
 CREATE INDEX ctlt1_b_key ON ctlt1 (b);
 CREATE INDEX ctlt1_fnidx ON ctlt1 ((a || b));
 CREATE STATISTICS ctlt1_a_b_stat ON a,b FROM ctlt1;
@@ -378,6 +379,7 @@ SELECT conname, description FROM pg_description, pg_constraint c WHERE classoid
 CREATE TABLE ctlt1_inh (LIKE ctlt1 INCLUDING CONSTRAINTS INCLUDING COMMENTS) INHERITS (ctlt1);
 NOTICE:  merging column "a" with inherited definition
 NOTICE:  merging column "b" with inherited definition
+NOTICE:  merging constraint "cc" with inherited definition
 NOTICE:  merging constraint "ctlt1_a_check" with inherited definition
 NOTICE:  merging constraint "ctlt1_b_check" with inherited definition
 \d+ ctlt1_inh
@@ -387,6 +389,7 @@ NOTICE:  merging constraint "ctlt1_b_check" with inherited definition
  a      | text |           | not null |         | main     |              | A
  b      | text |           |          |         | extended |              | B
 Check constraints:
+    "cc" CHECK (length(b) > 100)
     "ctlt1_a_check" CHECK (length(a) > 2)
     "ctlt1_b_check" CHECK (length(b) > 100) NOT ENFORCED
 Not-null constraints:
@@ -409,6 +412,7 @@ NOTICE:  merging multiple inherited definitions of column "a"
  b      | text |           |          |         | extended |              | 
  c      | text |           |          |         | external |              | 
 Check constraints:
+    "cc" CHECK (length(b) > 100)
     "ctlt1_a_check" CHECK (length(a) > 2)
     "ctlt1_b_check" CHECK (length(b) > 100) NOT ENFORCED
     "ctlt3_a_check" CHECK (length(a) < 5)
@@ -430,6 +434,7 @@ NOTICE:  merging column "a" with inherited definition
 Indexes:
     "ctlt13_like_expr_idx" btree ((a || c))
 Check constraints:
+    "cc" CHECK (length(b) > 100)
     "ctlt1_a_check" CHECK (length(a) > 2)
     "ctlt1_b_check" CHECK (length(b) > 100) NOT ENFORCED
     "ctlt3_a_check" CHECK (length(a) < 5)
@@ -456,6 +461,7 @@ Indexes:
     "ctlt_all_b_idx" btree (b)
     "ctlt_all_expr_idx" btree ((a || b))
 Check constraints:
+    "cc" CHECK (length(b) > 100)
     "ctlt1_a_check" CHECK (length(a) > 2)
     "ctlt1_b_check" CHECK (length(b) > 100) NOT ENFORCED
 Statistics objects:
@@ -499,6 +505,7 @@ Indexes:
     "pg_attrdef_b_idx" btree (b)
     "pg_attrdef_expr_idx" btree ((a || b))
 Check constraints:
+    "cc" CHECK (length(b) > 100)
     "ctlt1_a_check" CHECK (length(a) > 2)
     "ctlt1_b_check" CHECK (length(b) > 100) NOT ENFORCED
 Statistics objects:
@@ -524,6 +531,7 @@ Indexes:
     "ctlt1_b_idx" btree (b)
     "ctlt1_expr_idx" btree ((a || b))
 Check constraints:
+    "cc" CHECK (length(b) > 100)
     "ctlt1_a_check" CHECK (length(a) > 2)
     "ctlt1_b_check" CHECK (length(b) > 100) NOT ENFORCED
 Statistics objects:
diff --git a/src/test/regress/sql/create_table_like.sql b/src/test/regress/sql/create_table_like.sql
index bf8702116a74b..93389b57dbf95 100644
--- a/src/test/regress/sql/create_table_like.sql
+++ b/src/test/regress/sql/create_table_like.sql
@@ -130,6 +130,7 @@ DROP TABLE inhz;
 -- including storage and comments
 CREATE TABLE ctlt1 (a text CHECK (length(a) > 2) ENFORCED PRIMARY KEY,
 	b text CHECK (length(b) > 100) NOT ENFORCED);
+ALTER TABLE ctlt1 ADD CONSTRAINT cc CHECK (length(b) > 100) NOT VALID;
 CREATE INDEX ctlt1_b_key ON ctlt1 (b);
 CREATE INDEX ctlt1_fnidx ON ctlt1 ((a || b));
 CREATE STATISTICS ctlt1_a_b_stat ON a,b FROM ctlt1;

From 9016fa7e3bcde8ae4c3d63c707143af147486a10 Mon Sep 17 00:00:00 2001
From: Nathan Bossart <nathan@postgresql.org>
Date: Wed, 10 Sep 2025 11:21:12 -0500
Subject: [PATCH 38/73] meson: Build numeric.c with -ftree-vectorize.

autoconf builds have compiled this file with -ftree-vectorize since
commit 8870917623, but meson builds seem to have missed the memo.

Reviewed-by: Jeff Davis <pgsql@j-davis.com>
Discussion: https://postgr.es/m/aL85CeasM51-0D1h%40nathan
Backpatch-through: 16
---
 src/backend/utils/adt/meson.build | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/backend/utils/adt/meson.build b/src/backend/utils/adt/meson.build
index dac372c3bea3b..12fa0c209127c 100644
--- a/src/backend/utils/adt/meson.build
+++ b/src/backend/utils/adt/meson.build
@@ -1,5 +1,15 @@
 # Copyright (c) 2022-2025, PostgreSQL Global Development Group
 
+# Some code in numeric.c benefits from auto-vectorization
+numeric_backend_lib = static_library('numeric_backend_lib',
+  'numeric.c',
+  dependencies: backend_build_deps,
+  kwargs: internal_lib_args,
+  c_args: vectorize_cflags,
+)
+
+backend_link_with += numeric_backend_lib
+
 backend_sources += files(
   'acl.c',
   'amutils.c',
@@ -61,7 +71,6 @@ backend_sources += files(
   'network_gist.c',
   'network_selfuncs.c',
   'network_spgist.c',
-  'numeric.c',
   'numutils.c',
   'oid.c',
   'oracle_compat.c',

From abdeacdb0920d94dec7500d09f6f29fbb2f6310d Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Wed, 10 Sep 2025 16:05:03 -0400
Subject: [PATCH 39/73] Fix memory leakage in nodeSubplan.c.

If the hash functions used for hashing tuples leaked any memory,
we failed to clean that up, resulting in query-lifespan memory
leakage in queries using hashed subplans.  One way that could
happen is if the values being hashed require de-toasting, since
most of our hash functions don't trouble to clean up de-toasted
inputs.

Prior to commit bf6c614a2, this leakage was largely masked
because TupleHashTableMatch would reset hashtable->tempcxt
(via execTuplesMatch).  But it doesn't do that anymore, and
that's not really the right place for this anyway: doing it
there could reset the tempcxt many times per hash lookup,
or not at all.  Instead put reset calls into ExecHashSubPlan
and buildSubPlanHash.  Along the way to that, rearrange
ExecHashSubPlan so that there's just one place to call
MemoryContextReset instead of several.

This amounts to accepting the de-facto API spec that the caller
of the TupleHashTable routines is responsible for resetting the
tempcxt adequately often.  Although the other callers seem to
get this right, it was not documented anywhere, so add a comment
about it.

Bug: #19040
Reported-by: Haiyang Li <mohen.lhy@alibaba-inc.com>
Author: Haiyang Li <mohen.lhy@alibaba-inc.com>
Reviewed-by: Fei Changhong <feichanghong@qq.com>
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://postgr.es/m/19040-c9b6073ef814f48c@postgresql.org
Backpatch-through: 13
---
 src/backend/executor/execGrouping.c |  6 +++
 src/backend/executor/nodeSubplan.c  | 70 +++++++++++------------------
 2 files changed, 33 insertions(+), 43 deletions(-)

diff --git a/src/backend/executor/execGrouping.c b/src/backend/executor/execGrouping.c
index b540074935386..75087204f0c69 100644
--- a/src/backend/executor/execGrouping.c
+++ b/src/backend/executor/execGrouping.c
@@ -156,6 +156,12 @@ execTuplesHashPrepare(int numCols,
  *
  * Note that the keyColIdx, hashfunctions, and collations arrays must be
  * allocated in storage that will live as long as the hashtable does.
+ *
+ * LookupTupleHashEntry, FindTupleHashEntry, and related functions may leak
+ * memory in the tempcxt.  It is caller's responsibility to reset that context
+ * reasonably often, typically once per tuple.  (We do it that way, rather
+ * than managing an extra context within the hashtable, because in many cases
+ * the caller can specify a tempcxt that it needs to reset per-tuple anyway.)
  */
 TupleHashTable
 BuildTupleHashTable(PlanState *parent,
diff --git a/src/backend/executor/nodeSubplan.c b/src/backend/executor/nodeSubplan.c
index f7f6fc2da0b95..8e55dcc159b0b 100644
--- a/src/backend/executor/nodeSubplan.c
+++ b/src/backend/executor/nodeSubplan.c
@@ -102,6 +102,7 @@ ExecHashSubPlan(SubPlanState *node,
 				ExprContext *econtext,
 				bool *isNull)
 {
+	bool		result = false;
 	SubPlan    *subplan = node->subplan;
 	PlanState  *planstate = node->planstate;
 	TupleTableSlot *slot;
@@ -132,14 +133,6 @@ ExecHashSubPlan(SubPlanState *node,
 	node->projLeft->pi_exprContext = econtext;
 	slot = ExecProject(node->projLeft);
 
-	/*
-	 * Note: because we are typically called in a per-tuple context, we have
-	 * to explicitly clear the projected tuple before returning. Otherwise,
-	 * we'll have a double-free situation: the per-tuple context will probably
-	 * be reset before we're called again, and then the tuple slot will think
-	 * it still needs to free the tuple.
-	 */
-
 	/*
 	 * If the LHS is all non-null, probe for an exact match in the main hash
 	 * table.  If we find one, the result is TRUE. Otherwise, scan the
@@ -161,19 +154,10 @@ ExecHashSubPlan(SubPlanState *node,
 							   slot,
 							   node->cur_eq_comp,
 							   node->lhs_hash_expr) != NULL)
-		{
-			ExecClearTuple(slot);
-			return BoolGetDatum(true);
-		}
-		if (node->havenullrows &&
-			findPartialMatch(node->hashnulls, slot, node->cur_eq_funcs))
-		{
-			ExecClearTuple(slot);
+			result = true;
+		else if (node->havenullrows &&
+				 findPartialMatch(node->hashnulls, slot, node->cur_eq_funcs))
 			*isNull = true;
-			return BoolGetDatum(false);
-		}
-		ExecClearTuple(slot);
-		return BoolGetDatum(false);
 	}
 
 	/*
@@ -186,34 +170,31 @@ ExecHashSubPlan(SubPlanState *node,
 	 * aren't provably unequal to the LHS; if so, the result is UNKNOWN.
 	 * Otherwise, the result is FALSE.
 	 */
-	if (node->hashnulls == NULL)
-	{
-		ExecClearTuple(slot);
-		return BoolGetDatum(false);
-	}
-	if (slotAllNulls(slot))
-	{
-		ExecClearTuple(slot);
+	else if (node->hashnulls == NULL)
+		 /* just return FALSE */ ;
+	else if (slotAllNulls(slot))
 		*isNull = true;
-		return BoolGetDatum(false);
-	}
 	/* Scan partly-null table first, since more likely to get a match */
-	if (node->havenullrows &&
-		findPartialMatch(node->hashnulls, slot, node->cur_eq_funcs))
-	{
-		ExecClearTuple(slot);
+	else if (node->havenullrows &&
+			 findPartialMatch(node->hashnulls, slot, node->cur_eq_funcs))
 		*isNull = true;
-		return BoolGetDatum(false);
-	}
-	if (node->havehashrows &&
-		findPartialMatch(node->hashtable, slot, node->cur_eq_funcs))
-	{
-		ExecClearTuple(slot);
+	else if (node->havehashrows &&
+			 findPartialMatch(node->hashtable, slot, node->cur_eq_funcs))
 		*isNull = true;
-		return BoolGetDatum(false);
-	}
+
+	/*
+	 * Note: because we are typically called in a per-tuple context, we have
+	 * to explicitly clear the projected tuple before returning. Otherwise,
+	 * we'll have a double-free situation: the per-tuple context will probably
+	 * be reset before we're called again, and then the tuple slot will think
+	 * it still needs to free the tuple.
+	 */
 	ExecClearTuple(slot);
-	return BoolGetDatum(false);
+
+	/* Also must reset the hashtempcxt after each hashtable lookup. */
+	MemoryContextReset(node->hashtempcxt);
+
+	return BoolGetDatum(result);
 }
 
 /*
@@ -642,6 +623,9 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext)
 		 * during ExecProject.
 		 */
 		ResetExprContext(innerecontext);
+
+		/* Also must reset the hashtempcxt after each hashtable lookup. */
+		MemoryContextReset(node->hashtempcxt);
 	}
 
 	/*

From bdc6cfcd12f5c95799328e05aa4bfa75cfe3e79f Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Wed, 10 Sep 2025 16:15:08 -0400
Subject: [PATCH 40/73] Eliminate duplicative hashtempcxt in nodeSubplan.c.

Instead of building a separate memory context that's used just
for running hash functions, make the hash functions run in the
per-tuple context of the node's innerecontext.  This saves a
little space at runtime, and it avoids needing to reset two
contexts instead of one inside buildSubPlanHash's main loop.

This largely reverts commit 133924e13.  That's safe to do now
because bf6c614a2 decoupled the evaluation context used by
TupleHashTableMatch from that used for hash function evaluation,
so that there's no longer a risk of resetting the innerecontext
too soon.

Per discussion of bug #19040, although this is not directly
a fix for that.

Author: Tom Lane <tgl@sss.pgh.pa.us>
Reviewed-by: Haiyang Li <mohen.lhy@alibaba-inc.com>
Reviewed-by: Fei Changhong <feichanghong@qq.com>
Discussion: https://postgr.es/m/19040-c9b6073ef814f48c@postgresql.org
---
 src/backend/executor/nodeSubplan.c | 19 +++++--------------
 src/include/nodes/execnodes.h      |  1 -
 2 files changed, 5 insertions(+), 15 deletions(-)

diff --git a/src/backend/executor/nodeSubplan.c b/src/backend/executor/nodeSubplan.c
index 8e55dcc159b0b..53fb56f7388e8 100644
--- a/src/backend/executor/nodeSubplan.c
+++ b/src/backend/executor/nodeSubplan.c
@@ -191,8 +191,8 @@ ExecHashSubPlan(SubPlanState *node,
 	 */
 	ExecClearTuple(slot);
 
-	/* Also must reset the hashtempcxt after each hashtable lookup. */
-	MemoryContextReset(node->hashtempcxt);
+	/* Also must reset the innerecontext after each hashtable lookup. */
+	ResetExprContext(node->innerecontext);
 
 	return BoolGetDatum(result);
 }
@@ -529,7 +529,7 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext)
 											  0,
 											  node->planstate->state->es_query_cxt,
 											  node->hashtablecxt,
-											  node->hashtempcxt,
+											  innerecontext->ecxt_per_tuple_memory,
 											  false);
 
 	if (!subplan->unknownEqFalse)
@@ -558,7 +558,7 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext)
 												  0,
 												  node->planstate->state->es_query_cxt,
 												  node->hashtablecxt,
-												  node->hashtempcxt,
+												  innerecontext->ecxt_per_tuple_memory,
 												  false);
 	}
 	else
@@ -620,12 +620,9 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext)
 
 		/*
 		 * Reset innerecontext after each inner tuple to free any memory used
-		 * during ExecProject.
+		 * during ExecProject and hashtable lookup.
 		 */
 		ResetExprContext(innerecontext);
-
-		/* Also must reset the hashtempcxt after each hashtable lookup. */
-		MemoryContextReset(node->hashtempcxt);
 	}
 
 	/*
@@ -842,7 +839,6 @@ ExecInitSubPlan(SubPlan *subplan, PlanState *parent)
 	sstate->hashtable = NULL;
 	sstate->hashnulls = NULL;
 	sstate->hashtablecxt = NULL;
-	sstate->hashtempcxt = NULL;
 	sstate->innerecontext = NULL;
 	sstate->keyColIdx = NULL;
 	sstate->tab_eq_funcoids = NULL;
@@ -898,11 +894,6 @@ ExecInitSubPlan(SubPlan *subplan, PlanState *parent)
 			AllocSetContextCreate(CurrentMemoryContext,
 								  "Subplan HashTable Context",
 								  ALLOCSET_DEFAULT_SIZES);
-		/* and a small one for the hash tables to use as temp storage */
-		sstate->hashtempcxt =
-			AllocSetContextCreate(CurrentMemoryContext,
-								  "Subplan HashTable Temp Context",
-								  ALLOCSET_SMALL_SIZES);
 		/* and a short-lived exprcontext for function evaluation */
 		sstate->innerecontext = CreateExprContext(estate);
 
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index de782014b2d41..71857feae4823 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -1020,7 +1020,6 @@ typedef struct SubPlanState
 	bool		havehashrows;	/* true if hashtable is not empty */
 	bool		havenullrows;	/* true if hashnulls is not empty */
 	MemoryContext hashtablecxt; /* memory context containing hash tables */
-	MemoryContext hashtempcxt;	/* temp memory context for hash tables */
 	ExprContext *innerecontext; /* econtext for computing inner tuples */
 	int			numCols;		/* number of columns being hashed */
 	/* each of the remaining fields is an array of length numCols: */

From 09036dc71c682b0bf7234ed39c1429ed99fbe442 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Wed, 10 Sep 2025 17:51:24 -0400
Subject: [PATCH 41/73] Avoid faulty alignment of Datums in
 build_sorted_items().

If sizeof(Pointer) is 4 then sizeof(SortItem) will be 12, so that
if data->numrows is odd then we placed the values array at a location
that's not a multiple of 8.  That was fine when sizeof(Datum) was also
4, but in the wake of commit 2a600a93c it makes some alignment-picky
machines unhappy.  (You need a 32-bit machine that nonetheless expects
8-byte alignment of 8-byte quantities, which is an odd-seeming
combination but it does exist outside the Intel universe.)

To fix, MAXALIGN the space allocated to the SortItem array.
In passing, let's make the "len" variable be Size not int,
just for paranoia's sake.

This code was arguably not too safe even before 2a600a93c, but at
present I don't see a strong argument for back-patching.

Reported-by: Tomas Vondra <tomas@vondra.me>
Author: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://postgr.es/m/87036018-8d70-40ad-a0ac-192b07bd7b04@vondra.me
---
 src/backend/statistics/extended_stats.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c
index af0b99243c614..3c3d2d315c6f4 100644
--- a/src/backend/statistics/extended_stats.c
+++ b/src/backend/statistics/extended_stats.c
@@ -986,10 +986,9 @@ build_sorted_items(StatsBuildData *data, int *nitems,
 {
 	int			i,
 				j,
-				len,
 				nrows;
 	int			nvalues = data->numrows * numattrs;
-
+	Size		len;
 	SortItem   *items;
 	Datum	   *values;
 	bool	   *isnull;
@@ -997,14 +996,16 @@ build_sorted_items(StatsBuildData *data, int *nitems,
 	int		   *typlen;
 
 	/* Compute the total amount of memory we need (both items and values). */
-	len = data->numrows * sizeof(SortItem) + nvalues * (sizeof(Datum) + sizeof(bool));
+	len = MAXALIGN(data->numrows * sizeof(SortItem)) +
+		nvalues * (sizeof(Datum) + sizeof(bool));
 
 	/* Allocate the memory and split it into the pieces. */
 	ptr = palloc0(len);
 
 	/* items to sort */
 	items = (SortItem *) ptr;
-	ptr += data->numrows * sizeof(SortItem);
+	/* MAXALIGN ensures that the following Datums are suitably aligned */
+	ptr += MAXALIGN(data->numrows * sizeof(SortItem));
 
 	/* values and null flags */
 	values = (Datum *) ptr;

From c88ce73eda2e0a818d730c5b72475ef99cc9c4cf Mon Sep 17 00:00:00 2001
From: Michael Paquier <michael@paquier.xyz>
Date: Thu, 11 Sep 2025 10:15:33 +0900
Subject: [PATCH 42/73] Fix incorrect file reference in guc.h

GucSource_Names was documented as being in guc.c, but since 0a20ff54f5e6
it is located in guc_tables.c.  The reference to the location of
GucSource_Names is important, as GucSource needs to be kept in sync with
GucSource_Names.

Author: David G. Johnston <david.g.johnston@gmail.com>
Discussion: https://postgr.es/m/CAKFQuwYPgAHWPYjPzK7iXzhSZ6MKR8w20_Nz7ZXpOvx=kZbs7A@mail.gmail.com
Backpatch-through: 16
---
 src/include/utils/guc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/include/utils/guc.h b/src/include/utils/guc.h
index 756e80a2c2fcc..f21ec37da8933 100644
--- a/src/include/utils/guc.h
+++ b/src/include/utils/guc.h
@@ -106,7 +106,7 @@ typedef enum
  * will show as "default" in pg_settings.  If there is a specific reason not
  * to want that, use source == PGC_S_OVERRIDE.
  *
- * NB: see GucSource_Names in guc.c if you change this.
+ * NB: see GucSource_Names in guc_tables.c if you change this.
  */
 typedef enum
 {

From 26eadf4d2b14a8a8d110b214dbb7ef952fbf8e93 Mon Sep 17 00:00:00 2001
From: Michael Paquier <michael@paquier.xyz>
Date: Thu, 11 Sep 2025 17:17:04 +0900
Subject: [PATCH 43/73] Fix description of WAL record blocks in hash_xlog.h

hash_xlog.h included descriptions for the blocks used in WAL records
that were was not completely consistent with how the records are
generated, with one block missing for SQUEEZE_PAGE, and inconsistent
descriptions used for block 0 in VACUUM_ONE_PAGE and MOVE_PAGE_CONTENTS.

This information was incorrect since c11453ce0aea, cross-checking the
logic for the record generation.

Author: Kirill Reshke <reshkekirill@gmail.com>
Reviewed-by: Andrey Borodin <x4mmm@yandex-team.ru>
Discussion: https://postgr.es/m/CALdSSPj1j=a1d1hVA3oabRFz0hSU3KKrYtZPijw4UPUM7LY9zw@mail.gmail.com
Backpatch-through: 13
---
 src/include/access/hash_xlog.h | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/src/include/access/hash_xlog.h b/src/include/access/hash_xlog.h
index 6fe97de4d66f1..5d4671dc4c128 100644
--- a/src/include/access/hash_xlog.h
+++ b/src/include/access/hash_xlog.h
@@ -129,7 +129,7 @@ typedef struct xl_hash_split_complete
  *
  * This data record is used for XLOG_HASH_MOVE_PAGE_CONTENTS
  *
- * Backup Blk 0: bucket page
+ * Backup Blk 0: primary bucket page
  * Backup Blk 1: page containing moved tuples
  * Backup Blk 2: page from which tuples will be removed
  */
@@ -149,12 +149,13 @@ typedef struct xl_hash_move_page_contents
  *
  * This data record is used for XLOG_HASH_SQUEEZE_PAGE
  *
- * Backup Blk 0: page containing tuples moved from freed overflow page
- * Backup Blk 1: freed overflow page
- * Backup Blk 2: page previous to the freed overflow page
- * Backup Blk 3: page next to the freed overflow page
- * Backup Blk 4: bitmap page containing info of freed overflow page
- * Backup Blk 5: meta page
+ * Backup Blk 0: primary bucket page
+ * Backup Blk 1: page containing tuples moved from freed overflow page
+ * Backup Blk 2: freed overflow page
+ * Backup Blk 3: page previous to the freed overflow page
+ * Backup Blk 4: page next to the freed overflow page
+ * Backup Blk 5: bitmap page containing info of freed overflow page
+ * Backup Blk 6: meta page
  */
 typedef struct xl_hash_squeeze_page
 {
@@ -245,7 +246,7 @@ typedef struct xl_hash_init_bitmap_page
  *
  * This data record is used for XLOG_HASH_VACUUM_ONE_PAGE
  *
- * Backup Blk 0: bucket page
+ * Backup Blk 0: primary bucket page
  * Backup Blk 1: meta page
  */
 typedef struct xl_hash_vacuum_one_page

From 9c24111c4dad850bc2625c2113bb3d2dfd592efc Mon Sep 17 00:00:00 2001
From: Dean Rasheed <dean.a.rasheed@gmail.com>
Date: Thu, 11 Sep 2025 09:25:47 +0100
Subject: [PATCH 44/73] doc: Improve description of new random(min, max)
 functions.

Mention that the new variants of random(min, max) are affected by
setseed(), like the original functions.

Reported-by: Marcos Pegoraro <marcos@f10.com.br>
Discussion: https://postgr.es/m/CAB-JLwb1=drA3Le6uZXDBi_tCpeS1qm6XQU7dKwac_x91Z4qDg@mail.gmail.com
---
 doc/src/sgml/func/func-datetime.sgml | 6 ++++++
 doc/src/sgml/func/func-math.sgml     | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/doc/src/sgml/func/func-datetime.sgml b/doc/src/sgml/func/func-datetime.sgml
index 98dd60aa9a7ec..a25da4b5175ca 100644
--- a/doc/src/sgml/func/func-datetime.sgml
+++ b/doc/src/sgml/func/func-datetime.sgml
@@ -948,6 +948,12 @@
         Returns a random value in the range
         <parameter>min</parameter> &lt;= x &lt;= <parameter>max</parameter>.
        </para>
+       <para>
+        Note that these functions use the same pseudo-random number generator
+        as the functions listed in <xref linkend="functions-math-random-table"/>,
+        and respond in the same way to calling
+        <link linkend="function-setseed"><function>setseed()</function></link>.
+       </para>
        <para>
         <literal>random('1979-02-08'::date,'2025-07-03'::date)</literal>
         <returnvalue>1983-04-21</returnvalue>
diff --git a/doc/src/sgml/func/func-math.sgml b/doc/src/sgml/func/func-math.sgml
index fd821c0e70677..9dcf97e7c9e06 100644
--- a/doc/src/sgml/func/func-math.sgml
+++ b/doc/src/sgml/func/func-math.sgml
@@ -1130,7 +1130,7 @@
 
       <row>
        <entry role="func_table_entry"><para role="func_signature">
-        <indexterm>
+        <indexterm id="function-setseed">
          <primary>setseed</primary>
         </indexterm>
         <function>setseed</function> ( <type>double precision</type> )

From 2bbbb2eca9303df590cc79be74b13cad259124a5 Mon Sep 17 00:00:00 2001
From: Dean Rasheed <dean.a.rasheed@gmail.com>
Date: Thu, 11 Sep 2025 09:48:12 +0100
Subject: [PATCH 45/73] doc: Fix indentation in func-datetime.sgml.

Incorrect indentation introduced by commit faf071b5538.
---
 doc/src/sgml/func/func-datetime.sgml | 56 ++++++++++++++--------------
 1 file changed, 28 insertions(+), 28 deletions(-)

diff --git a/doc/src/sgml/func/func-datetime.sgml b/doc/src/sgml/func/func-datetime.sgml
index a25da4b5175ca..8cd7150b0d313 100644
--- a/doc/src/sgml/func/func-datetime.sgml
+++ b/doc/src/sgml/func/func-datetime.sgml
@@ -935,34 +935,34 @@
          </indexterm>
          <function>random</function> ( <parameter>min</parameter> <type>date</type>, <parameter>max</parameter> <type>date</type> )
          <returnvalue>date</returnvalue>
-       </para>
-       <para role="func_signature">
-        <function>random</function> ( <parameter>min</parameter> <type>timestamp</type>, <parameter>max</parameter> <type>timestamp</type> )
-        <returnvalue>timestamp</returnvalue>
-       </para>
-       <para role="func_signature">
-        <function>random</function> ( <parameter>min</parameter> <type>timestamptz</type>, <parameter>max</parameter> <type>timestamptz</type> )
-        <returnvalue>timestamptz</returnvalue>
-       </para>
-       <para>
-        Returns a random value in the range
-        <parameter>min</parameter> &lt;= x &lt;= <parameter>max</parameter>.
-       </para>
-       <para>
-        Note that these functions use the same pseudo-random number generator
-        as the functions listed in <xref linkend="functions-math-random-table"/>,
-        and respond in the same way to calling
-        <link linkend="function-setseed"><function>setseed()</function></link>.
-       </para>
-       <para>
-        <literal>random('1979-02-08'::date,'2025-07-03'::date)</literal>
-        <returnvalue>1983-04-21</returnvalue>
-       </para>
-       <para>
-        <literal>random('2000-01-01'::timestamptz, now())</literal>
-        <returnvalue>2015-09-27 09:11:33.732707+00</returnvalue>
-       </para></entry>
-      </row>
+        </para>
+        <para role="func_signature">
+         <function>random</function> ( <parameter>min</parameter> <type>timestamp</type>, <parameter>max</parameter> <type>timestamp</type> )
+         <returnvalue>timestamp</returnvalue>
+        </para>
+        <para role="func_signature">
+         <function>random</function> ( <parameter>min</parameter> <type>timestamptz</type>, <parameter>max</parameter> <type>timestamptz</type> )
+         <returnvalue>timestamptz</returnvalue>
+        </para>
+        <para>
+         Returns a random value in the range
+         <parameter>min</parameter> &lt;= x &lt;= <parameter>max</parameter>.
+        </para>
+        <para>
+         Note that these functions use the same pseudo-random number generator
+         as the functions listed in <xref linkend="functions-math-random-table"/>,
+         and respond in the same way to calling
+         <link linkend="function-setseed"><function>setseed()</function></link>.
+        </para>
+        <para>
+         <literal>random('1979-02-08'::date,'2025-07-03'::date)</literal>
+         <returnvalue>1983-04-21</returnvalue>
+        </para>
+        <para>
+         <literal>random('2000-01-01'::timestamptz, now())</literal>
+         <returnvalue>2015-09-27 09:11:33.732707+00</returnvalue>
+        </para></entry>
+       </row>
 
        <row>
         <entry role="func_table_entry"><para role="func_signature">

From 01d793698f5921547a7b5e1e003722c17f552574 Mon Sep 17 00:00:00 2001
From: Amit Kapila <akapila@postgresql.org>
Date: Thu, 11 Sep 2025 09:33:48 +0000
Subject: [PATCH 46/73] Fix intermittent test failure introduced in 6456c6e2c4.

The test assumes that a backend will execute COMMIT PREPARED on the
publisher and hit the injection point commit-after-delay-checkpoint within
the commit critical section. This should cause the apply worker on the
subscriber to wait for the transaction to complete.

However, the test does not guarantee that the injection point is actually
triggered, creating a race condition where the apply worker may proceed
prematurely during COMMIT PREPARED.

This commit resolves the issue by explicitly waiting for the injection
point to be hit before continuing with the test, ensuring consistent and
reliable behavior.

Author: Zhijie Hou <houzj.fnst@fujitsu.com>
Reviewed-by: shveta malik <shveta.malik@gmail.com>
Discussion: https://postgr.es/m/TY4PR01MB1690751D1CA8C128B0770EC6F9409A@TY4PR01MB16907.jpnprd01.prod.outlook.com
---
 src/test/subscription/t/035_conflicts.pl | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/test/subscription/t/035_conflicts.pl b/src/test/subscription/t/035_conflicts.pl
index db0d5b464e825..880551fc69d74 100644
--- a/src/test/subscription/t/035_conflicts.pl
+++ b/src/test/subscription/t/035_conflicts.pl
@@ -475,6 +475,9 @@
 		}
 	);
 
+	# Wait until the backend enters the injection point
+	$node_B->wait_for_event('client backend', 'commit-after-delay-checkpoint');
+
 	# Confirm the update is suspended
 	$result =
 	  $node_B->safe_psql('postgres', 'SELECT * FROM tab WHERE a = 1');

From 368c38dd47c209fa95d2df855d62bcde386f3037 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut <peter@eisentraut.org>
Date: Thu, 11 Sep 2025 11:55:29 +0200
Subject: [PATCH 47/73] Remove stray semicolon at global scope

The Sun Studio compiler complains about an empty declaration here.

Note for future historians:  This does not mean that this compiler is
still of current interest for anyone using PostgreSQL.  But we can let
this small fix be its parting gift.

Reviewed-by: Andres Freund <andres@anarazel.de>
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/a0f817ee-fb86-483a-8a14-b6f7f5991b6e%40eisentraut.org
---
 src/backend/replication/logical/slotsync.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/backend/replication/logical/slotsync.c b/src/backend/replication/logical/slotsync.c
index 9d0072a49ed6d..8c061d55bdb51 100644
--- a/src/backend/replication/logical/slotsync.c
+++ b/src/backend/replication/logical/slotsync.c
@@ -1337,7 +1337,7 @@ reset_syncing_flag()
 	SpinLockRelease(&SlotSyncCtx->mutex);
 
 	syncing_slots = false;
-};
+}
 
 /*
  * The main loop of our worker process.

From 4fbe0151455fefbef7abc9d507adb04c978beb0d Mon Sep 17 00:00:00 2001
From: Peter Eisentraut <peter@eisentraut.org>
Date: Thu, 11 Sep 2025 11:55:29 +0200
Subject: [PATCH 48/73] Remove checks for no longer supported GCC versions

Since commit f5e0186f865 (Raise C requirement to C11), we effectively
require at least GCC version 4.7, so checks for older versions can be
removed.

Reviewed-by: Andres Freund <andres@anarazel.de>
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/a0f817ee-fb86-483a-8a14-b6f7f5991b6e%40eisentraut.org
---
 src/include/c.h                        | 8 ++++----
 src/include/port/atomics/generic-gcc.h | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/include/c.h b/src/include/c.h
index 39022f8a9dd75..b580cfa7d3178 100644
--- a/src/include/c.h
+++ b/src/include/c.h
@@ -259,8 +259,8 @@
  * choose not to.  But, if possible, don't force inlining in unoptimized
  * debug builds.
  */
-#if (defined(__GNUC__) && __GNUC__ > 3 && defined(__OPTIMIZE__)) || defined(__SUNPRO_C)
-/* GCC > 3 and Sunpro support always_inline via __attribute__ */
+#if (defined(__GNUC__) && defined(__OPTIMIZE__)) || defined(__SUNPRO_C)
+/* GCC and Sunpro support always_inline via __attribute__ */
 #define pg_attribute_always_inline __attribute__((always_inline)) inline
 #elif defined(_MSC_VER)
 /* MSVC has a special keyword for this */
@@ -277,7 +277,7 @@
  * above, this should be placed before the function's return type and name.
  */
 /* GCC and Sunpro support noinline via __attribute__ */
-#if (defined(__GNUC__) && __GNUC__ > 2) || defined(__SUNPRO_C)
+#if defined(__GNUC__) || defined(__SUNPRO_C)
 #define pg_noinline __attribute__((noinline))
 /* msvc via declspec */
 #elif defined(_MSC_VER)
@@ -369,7 +369,7 @@
  * These should only be used sparingly, in very hot code paths. It's very easy
  * to mis-estimate likelihoods.
  */
-#if __GNUC__ >= 3
+#ifdef __GNUC__
 #define likely(x)	__builtin_expect((x) != 0, 1)
 #define unlikely(x) __builtin_expect((x) != 0, 0)
 #else
diff --git a/src/include/port/atomics/generic-gcc.h b/src/include/port/atomics/generic-gcc.h
index d8f04c89ccac2..e7dfad4f0d5eb 100644
--- a/src/include/port/atomics/generic-gcc.h
+++ b/src/include/port/atomics/generic-gcc.h
@@ -30,14 +30,14 @@
 #define pg_compiler_barrier_impl()	__asm__ __volatile__("" ::: "memory")
 
 /*
- * If we're on GCC 4.1.0 or higher, we should be able to get a memory barrier
+ * If we're on GCC, we should be able to get a memory barrier
  * out of this compiler built-in.  But we prefer to rely on platform specific
  * definitions where possible, and use this only as a fallback.
  */
 #if !defined(pg_memory_barrier_impl)
 #	if defined(HAVE_GCC__ATOMIC_INT32_CAS)
 #		define pg_memory_barrier_impl()		__atomic_thread_fence(__ATOMIC_SEQ_CST)
-#	elif (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1))
+#	elif defined(__GNUC__)
 #		define pg_memory_barrier_impl()		__sync_synchronize()
 #	endif
 #endif /* !defined(pg_memory_barrier_impl) */

From a2b4102a21ad730ce46b059acf49d72151e979f6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81lvaro=20Herrera?= <alvherre@kurilemu.de>
Date: Thu, 11 Sep 2025 18:11:46 +0200
Subject: [PATCH 49/73] Fill testing gap for possible referential integrity
 violation

This commit adds a missing isolation test for (non-PERIOD) foreign keys.
With REPEATABLE READ, one transaction can insert a referencing row while
another deletes the referenced row, and both see a valid state.  But
after they have committed, the table violates referential integrity.

If the INSERT precedes the DELETE, we use a crosscheck snapshot to see
the just-added row, so that the DELETE can raise a foreign key error.
You can see the table violate referential integrity if you change
ri_restrict to pass false for detectNewRows to ri_PerformCheck.

A crosscheck snapshot is not needed when the DELETE comes first, because
the INSERT's trigger takes a FOR KEY SHARE lock that sees the row now
marked for deletion, waits for that transaction to commit, and raises a
serialization error.  I (Paul) added a test for that too though.

We already have a similar test (in ri-triggers.spec) for SERIALIZABLE
snapshot isolation showing that you can implement foreign keys with just
pl/pgSQL, but that test does nothing to validate ri_triggers.c.  We also
have tests (in fk-snapshot.spec) for other concurrency scenarios, but
not this one: we test concurrently deleting both the referencing and
referenced row, when the constraint activates a cascade/set null action.
But those tests don't exercise ri_restrict, and the consequence of
omitting a crosscheck comparison is different: a serialization failure,
not a referential integrity violation.

Author: Paul Jungwirth <pj@illuminatedcomputing.com>
Reviewed-by: Rustam ALLAKOV <rustamallakov@gmail.com>
Reviewed-by: Dean Rasheed <dean.a.rasheed@gmail.com>
Reviewed-by: Robert Haas <robertmhaas@gmail.com>
Discussion: https://postgr.es/m/CA+renyUp=xja80rBaB6NpY3RRdi750y046x28bo_xg29zKY72Q@mail.gmail.com
---
 src/test/isolation/expected/fk-snapshot-2.out | 61 +++++++++++++++++++
 src/test/isolation/isolation_schedule         |  1 +
 src/test/isolation/specs/fk-snapshot-2.spec   | 50 +++++++++++++++
 3 files changed, 112 insertions(+)
 create mode 100644 src/test/isolation/expected/fk-snapshot-2.out
 create mode 100644 src/test/isolation/specs/fk-snapshot-2.spec

diff --git a/src/test/isolation/expected/fk-snapshot-2.out b/src/test/isolation/expected/fk-snapshot-2.out
new file mode 100644
index 0000000000000..0a4c9646fca4e
--- /dev/null
+++ b/src/test/isolation/expected/fk-snapshot-2.out
@@ -0,0 +1,61 @@
+Parsed test spec with 2 sessions
+
+starting permutation: s1rr s2rr s2ins s1del s2c s1c
+step s1rr: BEGIN ISOLATION LEVEL REPEATABLE READ;
+step s2rr: BEGIN ISOLATION LEVEL REPEATABLE READ;
+step s2ins: INSERT INTO child VALUES (1, 1);
+step s1del: DELETE FROM parent WHERE parent_id = 1; <waiting ...>
+step s2c: COMMIT;
+step s1del: <... completed>
+ERROR:  update or delete on table "parent" violates foreign key constraint "child_parent_id_fkey" on table "child"
+step s1c: COMMIT;
+
+starting permutation: s1rr s2rr s1del s2ins s1c s2c
+step s1rr: BEGIN ISOLATION LEVEL REPEATABLE READ;
+step s2rr: BEGIN ISOLATION LEVEL REPEATABLE READ;
+step s1del: DELETE FROM parent WHERE parent_id = 1;
+step s2ins: INSERT INTO child VALUES (1, 1); <waiting ...>
+step s1c: COMMIT;
+step s2ins: <... completed>
+ERROR:  could not serialize access due to concurrent update
+step s2c: COMMIT;
+
+starting permutation: s1rc s2rc s2ins s1del s2c s1c
+step s1rc: BEGIN ISOLATION LEVEL READ COMMITTED;
+step s2rc: BEGIN ISOLATION LEVEL READ COMMITTED;
+step s2ins: INSERT INTO child VALUES (1, 1);
+step s1del: DELETE FROM parent WHERE parent_id = 1; <waiting ...>
+step s2c: COMMIT;
+step s1del: <... completed>
+ERROR:  update or delete on table "parent" violates foreign key constraint "child_parent_id_fkey" on table "child"
+step s1c: COMMIT;
+
+starting permutation: s1rc s2rc s1del s2ins s1c s2c
+step s1rc: BEGIN ISOLATION LEVEL READ COMMITTED;
+step s2rc: BEGIN ISOLATION LEVEL READ COMMITTED;
+step s1del: DELETE FROM parent WHERE parent_id = 1;
+step s2ins: INSERT INTO child VALUES (1, 1); <waiting ...>
+step s1c: COMMIT;
+step s2ins: <... completed>
+ERROR:  insert or update on table "child" violates foreign key constraint "child_parent_id_fkey"
+step s2c: COMMIT;
+
+starting permutation: s1ser s2ser s2ins s1del s2c s1c
+step s1ser: BEGIN ISOLATION LEVEL SERIALIZABLE;
+step s2ser: BEGIN ISOLATION LEVEL SERIALIZABLE;
+step s2ins: INSERT INTO child VALUES (1, 1);
+step s1del: DELETE FROM parent WHERE parent_id = 1; <waiting ...>
+step s2c: COMMIT;
+step s1del: <... completed>
+ERROR:  update or delete on table "parent" violates foreign key constraint "child_parent_id_fkey" on table "child"
+step s1c: COMMIT;
+
+starting permutation: s1ser s2ser s1del s2ins s1c s2c
+step s1ser: BEGIN ISOLATION LEVEL SERIALIZABLE;
+step s2ser: BEGIN ISOLATION LEVEL SERIALIZABLE;
+step s1del: DELETE FROM parent WHERE parent_id = 1;
+step s2ins: INSERT INTO child VALUES (1, 1); <waiting ...>
+step s1c: COMMIT;
+step s2ins: <... completed>
+ERROR:  could not serialize access due to concurrent update
+step s2c: COMMIT;
diff --git a/src/test/isolation/isolation_schedule b/src/test/isolation/isolation_schedule
index 9f1e997d81b00..130525dfd3d68 100644
--- a/src/test/isolation/isolation_schedule
+++ b/src/test/isolation/isolation_schedule
@@ -36,6 +36,7 @@ test: fk-deadlock2
 test: fk-partitioned-1
 test: fk-partitioned-2
 test: fk-snapshot
+test: fk-snapshot-2
 test: subxid-overflow
 test: eval-plan-qual
 test: eval-plan-qual-trigger
diff --git a/src/test/isolation/specs/fk-snapshot-2.spec b/src/test/isolation/specs/fk-snapshot-2.spec
new file mode 100644
index 0000000000000..94cd151aab9d3
--- /dev/null
+++ b/src/test/isolation/specs/fk-snapshot-2.spec
@@ -0,0 +1,50 @@
+# RI Trigger test
+#
+# Test C-based referential integrity enforcement.
+# Under REPEATABLE READ we need some snapshot trickery in C,
+# or we would permit things that violate referential integrity.
+
+setup
+{
+  CREATE TABLE parent (parent_id SERIAL NOT NULL PRIMARY KEY);
+  CREATE TABLE child (
+	child_id SERIAL NOT NULL PRIMARY KEY,
+	parent_id INTEGER REFERENCES parent);
+  INSERT INTO parent VALUES(1);
+}
+
+teardown { DROP TABLE parent, child; }
+
+session s1
+step s1rc	{ BEGIN ISOLATION LEVEL READ COMMITTED; }
+step s1rr	{ BEGIN ISOLATION LEVEL REPEATABLE READ; }
+step s1ser	{ BEGIN ISOLATION LEVEL SERIALIZABLE; }
+step s1del	{ DELETE FROM parent WHERE parent_id = 1; }
+step s1c	{ COMMIT; }
+
+session s2
+step s2rc	{ BEGIN ISOLATION LEVEL READ COMMITTED; }
+step s2rr	{ BEGIN ISOLATION LEVEL REPEATABLE READ; }
+step s2ser	{ BEGIN ISOLATION LEVEL SERIALIZABLE; }
+step s2ins	{ INSERT INTO child VALUES (1, 1); }
+step s2c	{ COMMIT; }
+
+# Violates referential integrity unless we use a crosscheck snapshot,
+# which is up-to-date compared with the transaction's snapshot.
+permutation s1rr s2rr s2ins s1del s2c s1c
+
+# Raises a can't-serialize exception
+# when the INSERT trigger does SELECT FOR KEY SHARE:
+permutation s1rr s2rr s1del s2ins s1c s2c
+
+# Test the same scenarios in READ COMMITTED:
+# A crosscheck snapshot is not required here.
+permutation s1rc s2rc s2ins s1del s2c s1c
+permutation s1rc s2rc s1del s2ins s1c s2c
+
+# Test the same scenarios in SERIALIZABLE:
+# We should report the FK violation:
+permutation s1ser s2ser s2ins s1del s2c s1c
+# We raise a concurrent update error
+# which is good enough:
+permutation s1ser s2ser s1del s2ins s1c s2c

From e8cec3d1791c140398454aa561cf51659dd8243d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81lvaro=20Herrera?= <alvherre@kurilemu.de>
Date: Thu, 11 Sep 2025 18:13:09 +0200
Subject: [PATCH 50/73] Add test for temporal referential integrity

This commit adds an isolation test showing that temporal foreign keys do
not permit referential integrity violations under concurrency, like
fk-snapshot-2.  You can show that the test fails by passing false for
detectNewRows to ri_PerformCheck in ri_restrict.

Author: Paul Jungwirth <pj@illuminatedcomputing.com>
Reviewed-by: Rustam ALLAKOV <rustamallakov@gmail.com>
Reviewed-by: Dean Rasheed <dean.a.rasheed@gmail.com>
Reviewed-by: Robert Haas <robertmhaas@gmail.com>
Discussion: https://postgr.es/m/CA+renyUp=xja80rBaB6NpY3RRdi750y046x28bo_xg29zKY72Q@mail.gmail.com
---
 src/test/isolation/expected/fk-snapshot-3.out | 213 ++++++++++++++++++
 src/test/isolation/isolation_schedule         |   1 +
 src/test/isolation/specs/fk-snapshot-3.spec   |  82 +++++++
 3 files changed, 296 insertions(+)
 create mode 100644 src/test/isolation/expected/fk-snapshot-3.out
 create mode 100644 src/test/isolation/specs/fk-snapshot-3.spec

diff --git a/src/test/isolation/expected/fk-snapshot-3.out b/src/test/isolation/expected/fk-snapshot-3.out
new file mode 100644
index 0000000000000..f98cb72fdac30
--- /dev/null
+++ b/src/test/isolation/expected/fk-snapshot-3.out
@@ -0,0 +1,213 @@
+Parsed test spec with 2 sessions
+
+starting permutation: s1rr s2rr s2ins s1del s2c s1c
+step s1rr: BEGIN ISOLATION LEVEL REPEATABLE READ;
+step s2rr: BEGIN ISOLATION LEVEL REPEATABLE READ;
+step s2ins: 
+  INSERT INTO child VALUES ('[1,2)', '[2020-01-01,2025-01-01)', '[1,2)');
+
+step s1del: DELETE FROM parent WHERE id = '[1,2)'; <waiting ...>
+step s2c: COMMIT;
+step s1del: <... completed>
+ERROR:  update or delete on table "parent" violates foreign key constraint "child_parent_id_valid_at_fkey" on table "child"
+step s1c: COMMIT;
+
+starting permutation: s1rr s2rr s1del s2ins s1c s2c
+step s1rr: BEGIN ISOLATION LEVEL REPEATABLE READ;
+step s2rr: BEGIN ISOLATION LEVEL REPEATABLE READ;
+step s1del: DELETE FROM parent WHERE id = '[1,2)';
+step s2ins: 
+  INSERT INTO child VALUES ('[1,2)', '[2020-01-01,2025-01-01)', '[1,2)');
+ <waiting ...>
+step s1c: COMMIT;
+step s2ins: <... completed>
+ERROR:  could not serialize access due to concurrent update
+step s2c: COMMIT;
+
+starting permutation: s1rc s2rc s2ins s1del s2c s1c
+step s1rc: BEGIN ISOLATION LEVEL READ COMMITTED;
+step s2rc: BEGIN ISOLATION LEVEL READ COMMITTED;
+step s2ins: 
+  INSERT INTO child VALUES ('[1,2)', '[2020-01-01,2025-01-01)', '[1,2)');
+
+step s1del: DELETE FROM parent WHERE id = '[1,2)'; <waiting ...>
+step s2c: COMMIT;
+step s1del: <... completed>
+ERROR:  update or delete on table "parent" violates foreign key constraint "child_parent_id_valid_at_fkey" on table "child"
+step s1c: COMMIT;
+
+starting permutation: s1rc s2rc s1del s2ins s1c s2c
+step s1rc: BEGIN ISOLATION LEVEL READ COMMITTED;
+step s2rc: BEGIN ISOLATION LEVEL READ COMMITTED;
+step s1del: DELETE FROM parent WHERE id = '[1,2)';
+step s2ins: 
+  INSERT INTO child VALUES ('[1,2)', '[2020-01-01,2025-01-01)', '[1,2)');
+ <waiting ...>
+step s1c: COMMIT;
+step s2ins: <... completed>
+ERROR:  insert or update on table "child" violates foreign key constraint "child_parent_id_valid_at_fkey"
+step s2c: COMMIT;
+
+starting permutation: s1ser s2ser s2ins s1del s2c s1c
+step s1ser: BEGIN ISOLATION LEVEL SERIALIZABLE;
+step s2ser: BEGIN ISOLATION LEVEL SERIALIZABLE;
+step s2ins: 
+  INSERT INTO child VALUES ('[1,2)', '[2020-01-01,2025-01-01)', '[1,2)');
+
+step s1del: DELETE FROM parent WHERE id = '[1,2)'; <waiting ...>
+step s2c: COMMIT;
+step s1del: <... completed>
+ERROR:  update or delete on table "parent" violates foreign key constraint "child_parent_id_valid_at_fkey" on table "child"
+step s1c: COMMIT;
+
+starting permutation: s1ser s2ser s1del s2ins s1c s2c
+step s1ser: BEGIN ISOLATION LEVEL SERIALIZABLE;
+step s2ser: BEGIN ISOLATION LEVEL SERIALIZABLE;
+step s1del: DELETE FROM parent WHERE id = '[1,2)';
+step s2ins: 
+  INSERT INTO child VALUES ('[1,2)', '[2020-01-01,2025-01-01)', '[1,2)');
+ <waiting ...>
+step s1c: COMMIT;
+step s2ins: <... completed>
+ERROR:  could not serialize access due to concurrent update
+step s2c: COMMIT;
+
+starting permutation: s1rc s2rc s2ins s1upok s2c s1c
+step s1rc: BEGIN ISOLATION LEVEL READ COMMITTED;
+step s2rc: BEGIN ISOLATION LEVEL READ COMMITTED;
+step s2ins: 
+  INSERT INTO child VALUES ('[1,2)', '[2020-01-01,2025-01-01)', '[1,2)');
+
+step s1upok: UPDATE parent SET valid_at = '[2020-01-01,2026-01-01)' WHERE id = '[1,2)'; <waiting ...>
+step s2c: COMMIT;
+step s1upok: <... completed>
+step s1c: COMMIT;
+
+starting permutation: s1rc s2rc s1upok s2ins s1c s2c
+step s1rc: BEGIN ISOLATION LEVEL READ COMMITTED;
+step s2rc: BEGIN ISOLATION LEVEL READ COMMITTED;
+step s1upok: UPDATE parent SET valid_at = '[2020-01-01,2026-01-01)' WHERE id = '[1,2)';
+step s2ins: 
+  INSERT INTO child VALUES ('[1,2)', '[2020-01-01,2025-01-01)', '[1,2)');
+ <waiting ...>
+step s1c: COMMIT;
+step s2ins: <... completed>
+step s2c: COMMIT;
+
+starting permutation: s1rr s2rr s2ins s1upok s2c s1c
+step s1rr: BEGIN ISOLATION LEVEL REPEATABLE READ;
+step s2rr: BEGIN ISOLATION LEVEL REPEATABLE READ;
+step s2ins: 
+  INSERT INTO child VALUES ('[1,2)', '[2020-01-01,2025-01-01)', '[1,2)');
+
+step s1upok: UPDATE parent SET valid_at = '[2020-01-01,2026-01-01)' WHERE id = '[1,2)'; <waiting ...>
+step s2c: COMMIT;
+step s1upok: <... completed>
+step s1c: COMMIT;
+
+starting permutation: s1rr s2rr s1upok s2ins s1c s2c
+step s1rr: BEGIN ISOLATION LEVEL REPEATABLE READ;
+step s2rr: BEGIN ISOLATION LEVEL REPEATABLE READ;
+step s1upok: UPDATE parent SET valid_at = '[2020-01-01,2026-01-01)' WHERE id = '[1,2)';
+step s2ins: 
+  INSERT INTO child VALUES ('[1,2)', '[2020-01-01,2025-01-01)', '[1,2)');
+ <waiting ...>
+step s1c: COMMIT;
+step s2ins: <... completed>
+ERROR:  could not serialize access due to concurrent update
+step s2c: COMMIT;
+
+starting permutation: s1ser s2ser s2ins s1upok s2c s1c
+step s1ser: BEGIN ISOLATION LEVEL SERIALIZABLE;
+step s2ser: BEGIN ISOLATION LEVEL SERIALIZABLE;
+step s2ins: 
+  INSERT INTO child VALUES ('[1,2)', '[2020-01-01,2025-01-01)', '[1,2)');
+
+step s1upok: UPDATE parent SET valid_at = '[2020-01-01,2026-01-01)' WHERE id = '[1,2)'; <waiting ...>
+step s2c: COMMIT;
+step s1upok: <... completed>
+step s1c: COMMIT;
+
+starting permutation: s1ser s2ser s1upok s2ins s1c s2c
+step s1ser: BEGIN ISOLATION LEVEL SERIALIZABLE;
+step s2ser: BEGIN ISOLATION LEVEL SERIALIZABLE;
+step s1upok: UPDATE parent SET valid_at = '[2020-01-01,2026-01-01)' WHERE id = '[1,2)';
+step s2ins: 
+  INSERT INTO child VALUES ('[1,2)', '[2020-01-01,2025-01-01)', '[1,2)');
+ <waiting ...>
+step s1c: COMMIT;
+step s2ins: <... completed>
+ERROR:  could not serialize access due to concurrent update
+step s2c: COMMIT;
+
+starting permutation: s1rc s2rc s2ins s1upbad s2c s1c
+step s1rc: BEGIN ISOLATION LEVEL READ COMMITTED;
+step s2rc: BEGIN ISOLATION LEVEL READ COMMITTED;
+step s2ins: 
+  INSERT INTO child VALUES ('[1,2)', '[2020-01-01,2025-01-01)', '[1,2)');
+
+step s1upbad: UPDATE parent SET valid_at = '[2020-01-01,2024-01-01)' WHERE id = '[1,2)'; <waiting ...>
+step s2c: COMMIT;
+step s1upbad: <... completed>
+ERROR:  update or delete on table "parent" violates foreign key constraint "child_parent_id_valid_at_fkey" on table "child"
+step s1c: COMMIT;
+
+starting permutation: s1rc s2rc s1upbad s2ins s1c s2c
+step s1rc: BEGIN ISOLATION LEVEL READ COMMITTED;
+step s2rc: BEGIN ISOLATION LEVEL READ COMMITTED;
+step s1upbad: UPDATE parent SET valid_at = '[2020-01-01,2024-01-01)' WHERE id = '[1,2)';
+step s2ins: 
+  INSERT INTO child VALUES ('[1,2)', '[2020-01-01,2025-01-01)', '[1,2)');
+ <waiting ...>
+step s1c: COMMIT;
+step s2ins: <... completed>
+ERROR:  insert or update on table "child" violates foreign key constraint "child_parent_id_valid_at_fkey"
+step s2c: COMMIT;
+
+starting permutation: s1rr s2rr s2ins s1upbad s2c s1c
+step s1rr: BEGIN ISOLATION LEVEL REPEATABLE READ;
+step s2rr: BEGIN ISOLATION LEVEL REPEATABLE READ;
+step s2ins: 
+  INSERT INTO child VALUES ('[1,2)', '[2020-01-01,2025-01-01)', '[1,2)');
+
+step s1upbad: UPDATE parent SET valid_at = '[2020-01-01,2024-01-01)' WHERE id = '[1,2)'; <waiting ...>
+step s2c: COMMIT;
+step s1upbad: <... completed>
+ERROR:  update or delete on table "parent" violates foreign key constraint "child_parent_id_valid_at_fkey" on table "child"
+step s1c: COMMIT;
+
+starting permutation: s1rr s2rr s1upbad s2ins s1c s2c
+step s1rr: BEGIN ISOLATION LEVEL REPEATABLE READ;
+step s2rr: BEGIN ISOLATION LEVEL REPEATABLE READ;
+step s1upbad: UPDATE parent SET valid_at = '[2020-01-01,2024-01-01)' WHERE id = '[1,2)';
+step s2ins: 
+  INSERT INTO child VALUES ('[1,2)', '[2020-01-01,2025-01-01)', '[1,2)');
+ <waiting ...>
+step s1c: COMMIT;
+step s2ins: <... completed>
+ERROR:  could not serialize access due to concurrent update
+step s2c: COMMIT;
+
+starting permutation: s1ser s2ser s2ins s1upbad s2c s1c
+step s1ser: BEGIN ISOLATION LEVEL SERIALIZABLE;
+step s2ser: BEGIN ISOLATION LEVEL SERIALIZABLE;
+step s2ins: 
+  INSERT INTO child VALUES ('[1,2)', '[2020-01-01,2025-01-01)', '[1,2)');
+
+step s1upbad: UPDATE parent SET valid_at = '[2020-01-01,2024-01-01)' WHERE id = '[1,2)'; <waiting ...>
+step s2c: COMMIT;
+step s1upbad: <... completed>
+ERROR:  update or delete on table "parent" violates foreign key constraint "child_parent_id_valid_at_fkey" on table "child"
+step s1c: COMMIT;
+
+starting permutation: s1ser s2ser s1upbad s2ins s1c s2c
+step s1ser: BEGIN ISOLATION LEVEL SERIALIZABLE;
+step s2ser: BEGIN ISOLATION LEVEL SERIALIZABLE;
+step s1upbad: UPDATE parent SET valid_at = '[2020-01-01,2024-01-01)' WHERE id = '[1,2)';
+step s2ins: 
+  INSERT INTO child VALUES ('[1,2)', '[2020-01-01,2025-01-01)', '[1,2)');
+ <waiting ...>
+step s1c: COMMIT;
+step s2ins: <... completed>
+ERROR:  could not serialize access due to concurrent update
+step s2c: COMMIT;
diff --git a/src/test/isolation/isolation_schedule b/src/test/isolation/isolation_schedule
index 130525dfd3d68..5afae33d37036 100644
--- a/src/test/isolation/isolation_schedule
+++ b/src/test/isolation/isolation_schedule
@@ -37,6 +37,7 @@ test: fk-partitioned-1
 test: fk-partitioned-2
 test: fk-snapshot
 test: fk-snapshot-2
+test: fk-snapshot-3
 test: subxid-overflow
 test: eval-plan-qual
 test: eval-plan-qual-trigger
diff --git a/src/test/isolation/specs/fk-snapshot-3.spec b/src/test/isolation/specs/fk-snapshot-3.spec
new file mode 100644
index 0000000000000..90075024f5cc0
--- /dev/null
+++ b/src/test/isolation/specs/fk-snapshot-3.spec
@@ -0,0 +1,82 @@
+# RI Trigger test
+#
+# Test C-based temporal referential integrity enforcement.
+# Under REPEATABLE READ we need some snapshot trickery in C,
+# or we would permit things that violate referential integrity.
+
+setup
+{
+  CREATE TABLE parent (
+	id int4range NOT NULL,
+	valid_at daterange NOT NULL,
+	PRIMARY KEY (id, valid_at WITHOUT OVERLAPS));
+  CREATE TABLE child (
+	id int4range NOT NULL,
+	valid_at daterange NOT NULL,
+	parent_id int4range,
+	FOREIGN KEY (parent_id, PERIOD valid_at) REFERENCES parent);
+  INSERT INTO parent VALUES ('[1,2)', '[2020-01-01,2030-01-01)');
+}
+
+teardown { DROP TABLE parent, child; }
+
+session s1
+step s1rc	{ BEGIN ISOLATION LEVEL READ COMMITTED; }
+step s1rr	{ BEGIN ISOLATION LEVEL REPEATABLE READ; }
+step s1ser	{ BEGIN ISOLATION LEVEL SERIALIZABLE; }
+step s1del	{ DELETE FROM parent WHERE id = '[1,2)'; }
+step s1upok	{ UPDATE parent SET valid_at = '[2020-01-01,2026-01-01)' WHERE id = '[1,2)'; }
+step s1upbad	{ UPDATE parent SET valid_at = '[2020-01-01,2024-01-01)' WHERE id = '[1,2)'; }
+step s1c	{ COMMIT; }
+
+session s2
+step s2rc	{ BEGIN ISOLATION LEVEL READ COMMITTED; }
+step s2rr	{ BEGIN ISOLATION LEVEL REPEATABLE READ; }
+step s2ser	{ BEGIN ISOLATION LEVEL SERIALIZABLE; }
+step s2ins	{
+  INSERT INTO child VALUES ('[1,2)', '[2020-01-01,2025-01-01)', '[1,2)');
+}
+step s2c	{ COMMIT; }
+
+# Violates referential integrity unless we use an up-to-date crosscheck snapshot:
+permutation s1rr s2rr s2ins s1del s2c s1c
+
+# Raises a can't-serialize exception
+# when the INSERT trigger does SELECT FOR KEY SHARE:
+permutation s1rr s2rr s1del s2ins s1c s2c
+
+# Test the same scenarios in READ COMMITTED:
+# A crosscheck snapshot is not required here.
+permutation s1rc s2rc s2ins s1del s2c s1c
+permutation s1rc s2rc s1del s2ins s1c s2c
+
+# Test the same scenarios in SERIALIZABLE:
+# We should report the FK violation:
+permutation s1ser s2ser s2ins s1del s2c s1c
+# We raise a concurrent update error
+# which is good enough:
+permutation s1ser s2ser s1del s2ins s1c s2c
+
+# Also check updating the valid time (without violating RI):
+
+# ...with READ COMMITED:
+permutation s1rc s2rc s2ins s1upok s2c s1c
+permutation s1rc s2rc s1upok s2ins s1c s2c
+# ...with REPEATABLE READ:
+permutation s1rr s2rr s2ins s1upok s2c s1c
+permutation s1rr s2rr s1upok s2ins s1c s2c
+# ...with SERIALIZABLE:
+permutation s1ser s2ser s2ins s1upok s2c s1c
+permutation s1ser s2ser s1upok s2ins s1c s2c
+
+# Also check updating the valid time (while violating RI):
+
+# ...with READ COMMITED:
+permutation s1rc s2rc s2ins s1upbad s2c s1c
+permutation s1rc s2rc s1upbad s2ins s1c s2c
+# ...with REPEATABLE READ:
+permutation s1rr s2rr s2ins s1upbad s2c s1c
+permutation s1rr s2rr s1upbad s2ins s1c s2c
+# ...with SERIALIZABLE:
+permutation s1ser s2ser s2ins s1upbad s2c s1c
+permutation s1ser s2ser s1upbad s2ins s1c s2c

From 1d5800019f68d81139021b8bab159b8578fcaa2b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81lvaro=20Herrera?= <alvherre@kurilemu.de>
Date: Thu, 11 Sep 2025 19:49:57 +0200
Subject: [PATCH 51/73] Improve comment about snapshot macros

The comment mistakenly had "the others" for "the other", but this
commit also reorders the comment so it matches the macros below.  Now we
describe the levels in increasing strictness.  In addition, it seems
easier to follow if we introduce one level at a time, rather than
describing two, followed by "the other" (and then jumping back to one of
the first two).  Finally, reword the sentence about the purpose of the
macros, which was slightly off-point.

Author: Paul Jungwirth <pj@illuminatedcomputing.com>
Reviewed-by: Rustam ALLAKOV <rustamallakov@gmail.com>
Reviewed-by: Dean Rasheed <dean.a.rasheed@gmail.com>
Reviewed-by: Robert Haas <robertmhaas@gmail.com>
Discussion: https://postgr.es/m/CA+renyUp=xja80rBaB6NpY3RRdi750y046x28bo_xg29zKY72Q@mail.gmail.com
---
 src/include/access/xact.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/include/access/xact.h b/src/include/access/xact.h
index b2bc10ee04196..4528e51829e61 100644
--- a/src/include/access/xact.h
+++ b/src/include/access/xact.h
@@ -43,10 +43,11 @@ extern PGDLLIMPORT int XactIsoLevel;
 
 /*
  * We implement three isolation levels internally.
- * The two stronger ones use one snapshot per database transaction;
- * the others use one snapshot per statement.
- * Serializable uses predicate locks in addition to snapshots.
- * These macros should be used to check which isolation level is selected.
+ * The weakest uses one snapshot per statement;
+ * the two stronger levels use one snapshot per database transaction.
+ * Serializable uses predicate locks in addition to the snapshot.
+ * These macros can be used to determine which implementation to use
+ * depending on the prevailing serialization level.
  */
 #define IsolationUsesXactSnapshot() (XactIsoLevel >= XACT_REPEATABLE_READ)
 #define IsolationIsSerializable() (XactIsoLevel == XACT_SERIALIZABLE)

From a0b99fc12203fa179d5b4218a21de30e0e91a7b8 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Thu, 11 Sep 2025 17:11:54 -0400
Subject: [PATCH 52/73] Report the correct is_temporary flag for column
 defaults.

pg_event_trigger_dropped_objects() would report a column default
object with is_temporary = false, even if it belongs to a
temporary table.  This seems clearly wrong, so adjust it to
report the table's temp-ness.

While here, refactor EventTriggerSQLDropAddObject to make its
handling of namespace objects less messy and avoid duplication
of the schema-lookup code.  And add some explicit test coverage
of dropped-object reports for dependencies of temp tables.

Back-patch to v15.  The bug exists further back, but the
GetAttrDefaultColumnAddress function this patch depends on does not,
and it doesn't seem worth adjusting it to cope with the older code.

Author: Antoine Violin <violin.antuan@gmail.com>
Co-authored-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://postgr.es/m/CAFjUV9x3-hv0gihf+CtUc-1it0hh7Skp9iYFhMS7FJjtAeAptA@mail.gmail.com
Backpatch-through: 15
---
 src/backend/commands/event_trigger.c        | 111 ++++++++++++++------
 src/test/regress/expected/event_trigger.out |  37 +++++++
 src/test/regress/sql/event_trigger.sql      |  20 ++++
 3 files changed, 136 insertions(+), 32 deletions(-)

diff --git a/src/backend/commands/event_trigger.c b/src/backend/commands/event_trigger.c
index 631fb0525f1e7..fcdcba009d4e3 100644
--- a/src/backend/commands/event_trigger.c
+++ b/src/backend/commands/event_trigger.c
@@ -21,6 +21,7 @@
 #include "catalog/dependency.h"
 #include "catalog/indexing.h"
 #include "catalog/objectaccess.h"
+#include "catalog/pg_attrdef.h"
 #include "catalog/pg_authid.h"
 #include "catalog/pg_auth_members.h"
 #include "catalog/pg_database.h"
@@ -109,6 +110,8 @@ static Oid	insert_event_trigger_tuple(const char *trigname, const char *eventnam
 static void validate_ddl_tags(const char *filtervar, List *taglist);
 static void validate_table_rewrite_tags(const char *filtervar, List *taglist);
 static void EventTriggerInvoke(List *fn_oid_list, EventTriggerData *trigdata);
+static bool obtain_object_name_namespace(const ObjectAddress *object,
+										 SQLDropObject *obj);
 static const char *stringify_grant_objtype(ObjectType objtype);
 static const char *stringify_adefprivs_objtype(ObjectType objtype);
 static void SetDatabaseHasLoginEventTriggers(void);
@@ -1280,12 +1283,6 @@ EventTriggerSQLDropAddObject(const ObjectAddress *object, bool original, bool no
 
 	Assert(EventTriggerSupportsObject(object));
 
-	/* don't report temp schemas except my own */
-	if (object->classId == NamespaceRelationId &&
-		(isAnyTempNamespace(object->objectId) &&
-		 !isTempNamespace(object->objectId)))
-		return;
-
 	oldcxt = MemoryContextSwitchTo(currentEventTriggerState->cxt);
 
 	obj = palloc0(sizeof(SQLDropObject));
@@ -1293,21 +1290,88 @@ EventTriggerSQLDropAddObject(const ObjectAddress *object, bool original, bool no
 	obj->original = original;
 	obj->normal = normal;
 
+	if (object->classId == NamespaceRelationId)
+	{
+		/* Special handling is needed for temp namespaces */
+		if (isTempNamespace(object->objectId))
+			obj->istemp = true;
+		else if (isAnyTempNamespace(object->objectId))
+		{
+			/* don't report temp schemas except my own */
+			pfree(obj);
+			MemoryContextSwitchTo(oldcxt);
+			return;
+		}
+	}
+	else if (object->classId == AttrDefaultRelationId)
+	{
+		/* We treat a column default as temp if its table is temp */
+		ObjectAddress colobject;
+
+		colobject = GetAttrDefaultColumnAddress(object->objectId);
+		if (OidIsValid(colobject.objectId))
+		{
+			colobject.objectSubId = 0;	/* convert to table reference */
+			if (!obtain_object_name_namespace(&colobject, obj))
+			{
+				pfree(obj);
+				MemoryContextSwitchTo(oldcxt);
+				return;
+			}
+		}
+	}
+	else
+	{
+		/* Generic handling for all other object classes */
+		if (!obtain_object_name_namespace(object, obj))
+		{
+			/* don't report temp objects except my own */
+			pfree(obj);
+			MemoryContextSwitchTo(oldcxt);
+			return;
+		}
+	}
+
+	/* object identity, objname and objargs */
+	obj->objidentity =
+		getObjectIdentityParts(&obj->address, &obj->addrnames, &obj->addrargs,
+							   false);
+
+	/* object type */
+	obj->objecttype = getObjectTypeDescription(&obj->address, false);
+
+	slist_push_head(&(currentEventTriggerState->SQLDropList), &obj->next);
+
+	MemoryContextSwitchTo(oldcxt);
+}
+
+/*
+ * Fill obj->objname, obj->schemaname, and obj->istemp based on object.
+ *
+ * Returns true if this object should be reported, false if it should
+ * be ignored because it is a temporary object of another session.
+ */
+static bool
+obtain_object_name_namespace(const ObjectAddress *object, SQLDropObject *obj)
+{
 	/*
 	 * Obtain schema names from the object's catalog tuple, if one exists;
 	 * this lets us skip objects in temp schemas.  We trust that
 	 * ObjectProperty contains all object classes that can be
 	 * schema-qualified.
+	 *
+	 * Currently, this function does nothing for object classes that are not
+	 * in ObjectProperty, but we might sometime add special cases for that.
 	 */
 	if (is_objectclass_supported(object->classId))
 	{
 		Relation	catalog;
 		HeapTuple	tuple;
 
-		catalog = table_open(obj->address.classId, AccessShareLock);
+		catalog = table_open(object->classId, AccessShareLock);
 		tuple = get_catalog_object_by_oid(catalog,
 										  get_object_attnum_oid(object->classId),
-										  obj->address.objectId);
+										  object->objectId);
 
 		if (tuple)
 		{
@@ -1315,7 +1379,7 @@ EventTriggerSQLDropAddObject(const ObjectAddress *object, bool original, bool no
 			Datum		datum;
 			bool		isnull;
 
-			attnum = get_object_attnum_namespace(obj->address.classId);
+			attnum = get_object_attnum_namespace(object->classId);
 			if (attnum != InvalidAttrNumber)
 			{
 				datum = heap_getattr(tuple, attnum,
@@ -1333,10 +1397,9 @@ EventTriggerSQLDropAddObject(const ObjectAddress *object, bool original, bool no
 					}
 					else if (isAnyTempNamespace(namespaceId))
 					{
-						pfree(obj);
+						/* no need to fill any fields of *obj */
 						table_close(catalog, AccessShareLock);
-						MemoryContextSwitchTo(oldcxt);
-						return;
+						return false;
 					}
 					else
 					{
@@ -1346,10 +1409,10 @@ EventTriggerSQLDropAddObject(const ObjectAddress *object, bool original, bool no
 				}
 			}
 
-			if (get_object_namensp_unique(obj->address.classId) &&
-				obj->address.objectSubId == 0)
+			if (get_object_namensp_unique(object->classId) &&
+				object->objectSubId == 0)
 			{
-				attnum = get_object_attnum_name(obj->address.classId);
+				attnum = get_object_attnum_name(object->classId);
 				if (attnum != InvalidAttrNumber)
 				{
 					datum = heap_getattr(tuple, attnum,
@@ -1362,24 +1425,8 @@ EventTriggerSQLDropAddObject(const ObjectAddress *object, bool original, bool no
 
 		table_close(catalog, AccessShareLock);
 	}
-	else
-	{
-		if (object->classId == NamespaceRelationId &&
-			isTempNamespace(object->objectId))
-			obj->istemp = true;
-	}
 
-	/* object identity, objname and objargs */
-	obj->objidentity =
-		getObjectIdentityParts(&obj->address, &obj->addrnames, &obj->addrargs,
-							   false);
-
-	/* object type */
-	obj->objecttype = getObjectTypeDescription(&obj->address, false);
-
-	slist_push_head(&(currentEventTriggerState->SQLDropList), &obj->next);
-
-	MemoryContextSwitchTo(oldcxt);
+	return true;
 }
 
 /*
diff --git a/src/test/regress/expected/event_trigger.out b/src/test/regress/expected/event_trigger.out
index 7b2198eac6f20..0e090cbc37500 100644
--- a/src/test/regress/expected/event_trigger.out
+++ b/src/test/regress/expected/event_trigger.out
@@ -476,6 +476,43 @@ NOTICE:  NORMAL: orig=f normal=t istemp=f type=table identity=evttrig.part_10_15
 NOTICE:  NORMAL: orig=f normal=t istemp=f type=table identity=evttrig.part_15_20 name={evttrig,part_15_20} args={}
 DROP TABLE a_temp_tbl;
 NOTICE:  NORMAL: orig=t normal=f istemp=t type=table identity=pg_temp.a_temp_tbl name={pg_temp,a_temp_tbl} args={}
+-- check unfiltered results, too
+CREATE OR REPLACE FUNCTION event_trigger_report_dropped()
+ RETURNS event_trigger
+ LANGUAGE plpgsql
+AS $$
+DECLARE r record;
+BEGIN
+    FOR r IN SELECT * from pg_event_trigger_dropped_objects()
+    LOOP
+    RAISE NOTICE 'DROP: orig=% normal=% istemp=% type=% identity=% name=% args=%',
+        r.original, r.normal, r.is_temporary, r.object_type,
+        r.object_identity, r.address_names, r.address_args;
+    END LOOP;
+END; $$;
+NOTICE:  END: command_tag=CREATE FUNCTION type=function identity=public.event_trigger_report_dropped()
+CREATE TABLE evtrg_nontemp_table (f1 int primary key, f2 int default 42);
+NOTICE:  END: command_tag=CREATE TABLE type=table identity=public.evtrg_nontemp_table
+NOTICE:  END: command_tag=CREATE INDEX type=index identity=public.evtrg_nontemp_table_pkey
+DROP TABLE evtrg_nontemp_table;
+NOTICE:  DROP: orig=t normal=f istemp=f type=table identity=public.evtrg_nontemp_table name={public,evtrg_nontemp_table} args={}
+NOTICE:  DROP: orig=f normal=f istemp=f type=type identity=public.evtrg_nontemp_table name={public.evtrg_nontemp_table} args={}
+NOTICE:  DROP: orig=f normal=f istemp=f type=type identity=public.evtrg_nontemp_table[] name={public.evtrg_nontemp_table[]} args={}
+NOTICE:  DROP: orig=f normal=f istemp=f type=default value identity=for public.evtrg_nontemp_table.f2 name={public,evtrg_nontemp_table,f2} args={}
+NOTICE:  DROP: orig=f normal=f istemp=f type=table constraint identity=evtrg_nontemp_table_f1_not_null on public.evtrg_nontemp_table name={public,evtrg_nontemp_table,evtrg_nontemp_table_f1_not_null} args={}
+NOTICE:  DROP: orig=f normal=f istemp=f type=table constraint identity=evtrg_nontemp_table_pkey on public.evtrg_nontemp_table name={public,evtrg_nontemp_table,evtrg_nontemp_table_pkey} args={}
+NOTICE:  DROP: orig=f normal=f istemp=f type=index identity=public.evtrg_nontemp_table_pkey name={public,evtrg_nontemp_table_pkey} args={}
+CREATE TEMP TABLE a_temp_tbl (f1 int primary key, f2 int default 42);
+NOTICE:  END: command_tag=CREATE TABLE type=table identity=pg_temp.a_temp_tbl
+NOTICE:  END: command_tag=CREATE INDEX type=index identity=pg_temp.a_temp_tbl_pkey
+DROP TABLE a_temp_tbl;
+NOTICE:  DROP: orig=t normal=f istemp=t type=table identity=pg_temp.a_temp_tbl name={pg_temp,a_temp_tbl} args={}
+NOTICE:  DROP: orig=f normal=f istemp=t type=type identity=pg_temp.a_temp_tbl name={pg_temp.a_temp_tbl} args={}
+NOTICE:  DROP: orig=f normal=f istemp=t type=type identity=pg_temp.a_temp_tbl[] name={pg_temp.a_temp_tbl[]} args={}
+NOTICE:  DROP: orig=f normal=f istemp=t type=default value identity=for pg_temp.a_temp_tbl.f2 name={pg_temp,a_temp_tbl,f2} args={}
+NOTICE:  DROP: orig=f normal=f istemp=t type=table constraint identity=a_temp_tbl_f1_not_null on pg_temp.a_temp_tbl name={pg_temp,a_temp_tbl,a_temp_tbl_f1_not_null} args={}
+NOTICE:  DROP: orig=f normal=f istemp=t type=table constraint identity=a_temp_tbl_pkey on pg_temp.a_temp_tbl name={pg_temp,a_temp_tbl,a_temp_tbl_pkey} args={}
+NOTICE:  DROP: orig=f normal=f istemp=t type=index identity=pg_temp.a_temp_tbl_pkey name={pg_temp,a_temp_tbl_pkey} args={}
 -- CREATE OPERATOR CLASS without FAMILY clause should report
 -- both CREATE OPERATOR FAMILY and CREATE OPERATOR CLASS
 CREATE OPERATOR CLASS evttrigopclass FOR TYPE int USING btree AS STORAGE int;
diff --git a/src/test/regress/sql/event_trigger.sql b/src/test/regress/sql/event_trigger.sql
index 013546b83057b..ef5978b9697aa 100644
--- a/src/test/regress/sql/event_trigger.sql
+++ b/src/test/regress/sql/event_trigger.sql
@@ -337,6 +337,26 @@ DROP INDEX evttrig.one_idx;
 DROP SCHEMA evttrig CASCADE;
 DROP TABLE a_temp_tbl;
 
+-- check unfiltered results, too
+CREATE OR REPLACE FUNCTION event_trigger_report_dropped()
+ RETURNS event_trigger
+ LANGUAGE plpgsql
+AS $$
+DECLARE r record;
+BEGIN
+    FOR r IN SELECT * from pg_event_trigger_dropped_objects()
+    LOOP
+    RAISE NOTICE 'DROP: orig=% normal=% istemp=% type=% identity=% name=% args=%',
+        r.original, r.normal, r.is_temporary, r.object_type,
+        r.object_identity, r.address_names, r.address_args;
+    END LOOP;
+END; $$;
+
+CREATE TABLE evtrg_nontemp_table (f1 int primary key, f2 int default 42);
+DROP TABLE evtrg_nontemp_table;
+CREATE TEMP TABLE a_temp_tbl (f1 int primary key, f2 int default 42);
+DROP TABLE a_temp_tbl;
+
 -- CREATE OPERATOR CLASS without FAMILY clause should report
 -- both CREATE OPERATOR FAMILY and CREATE OPERATOR CLASS
 CREATE OPERATOR CLASS evttrigopclass FOR TYPE int USING btree AS STORAGE int;

From ed1aad15e09d7d523f4ef413e3c4d410497c8065 Mon Sep 17 00:00:00 2001
From: Nathan Bossart <nathan@postgresql.org>
Date: Thu, 11 Sep 2025 16:13:55 -0500
Subject: [PATCH 53/73] Move named LWLock tranche requests to shared memory.

In EXEC_BACKEND builds, GetNamedLWLockTranche() can segfault when
called outside of the postmaster process, as it might access
NamedLWLockTrancheRequestArray, which won't be initialized.  Given
the lack of reports, this is apparently unusual, presumably because
it is usually called from a shmem_startup_hook like this:

    mystruct = ShmemInitStruct(..., &found);
    if (!found)
    {
        mystruct->locks = GetNamedLWLockTranche(...);
        ...
    }

This genre of shmem_startup_hook evades the aforementioned
segfaults because the struct is initialized in the postmaster, so
all other callers skip the !found path.

We considered modifying the documentation or requiring
GetNamedLWLockTranche() to be called from the postmaster, but
ultimately we decided to simply move the request array to shared
memory (and add it to the BackendParameters struct), thereby
allowing calls outside postmaster on all platforms.  Since the main
shared memory segment is initialized after accepting LWLock tranche
requests, postmaster builds the request array in local memory first
and then copies it to shared memory later.

Given the lack of reports, back-patching seems unnecessary.

Reported-by: Sami Imseih <samimseih@gmail.com>
Reviewed-by: Sami Imseih <samimseih@gmail.com>
Discussion: https://postgr.es/m/CAA5RZ0v1_15QPg5Sqd2Qz5rh_qcsyCeHHmRDY89xVHcy2yt5BQ%40mail.gmail.com
---
 src/backend/postmaster/launch_backend.c |  3 +++
 src/backend/storage/lmgr/lwlock.c       | 31 +++++++++++++++++++++----
 src/include/storage/lwlock.h            |  4 ++++
 3 files changed, 33 insertions(+), 5 deletions(-)

diff --git a/src/backend/postmaster/launch_backend.c b/src/backend/postmaster/launch_backend.c
index a38979c50e4bb..c5ef14e1eaae8 100644
--- a/src/backend/postmaster/launch_backend.c
+++ b/src/backend/postmaster/launch_backend.c
@@ -101,6 +101,7 @@ typedef struct
 	struct InjectionPointsCtl *ActiveInjectionPoints;
 #endif
 	int			NamedLWLockTrancheRequests;
+	NamedLWLockTrancheRequest *NamedLWLockTrancheRequestArray;
 	char	  **LWLockTrancheNames;
 	int		   *LWLockCounter;
 	LWLockPadded *MainLWLockArray;
@@ -761,6 +762,7 @@ save_backend_variables(BackendParameters *param,
 #endif
 
 	param->NamedLWLockTrancheRequests = NamedLWLockTrancheRequests;
+	param->NamedLWLockTrancheRequestArray = NamedLWLockTrancheRequestArray;
 	param->LWLockTrancheNames = LWLockTrancheNames;
 	param->LWLockCounter = LWLockCounter;
 	param->MainLWLockArray = MainLWLockArray;
@@ -1022,6 +1024,7 @@ restore_backend_variables(BackendParameters *param)
 #endif
 
 	NamedLWLockTrancheRequests = param->NamedLWLockTrancheRequests;
+	NamedLWLockTrancheRequestArray = param->NamedLWLockTrancheRequestArray;
 	LWLockTrancheNames = param->LWLockTrancheNames;
 	LWLockCounter = param->LWLockCounter;
 	MainLWLockArray = param->MainLWLockArray;
diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c
index fcbac5213a5c0..46c82c63ca537 100644
--- a/src/backend/storage/lmgr/lwlock.c
+++ b/src/backend/storage/lmgr/lwlock.c
@@ -184,14 +184,13 @@ typedef struct NamedLWLockTrancheRequest
 	int			num_lwlocks;
 } NamedLWLockTrancheRequest;
 
-static NamedLWLockTrancheRequest *NamedLWLockTrancheRequestArray = NULL;
-
 /*
- * NamedLWLockTrancheRequests is the valid length of the request array.  This
- * variable is non-static so that postmaster.c can copy them to child processes
- * in EXEC_BACKEND builds.
+ * NamedLWLockTrancheRequests is the valid length of the request array.  These
+ * variables are non-static so that launch_backend.c can copy them to child
+ * processes in EXEC_BACKEND builds.
  */
 int			NamedLWLockTrancheRequests = 0;
+NamedLWLockTrancheRequest *NamedLWLockTrancheRequestArray = NULL;
 
 /* shared memory counter of registered tranches */
 int		   *LWLockCounter = NULL;
@@ -407,6 +406,14 @@ LWLockShmemSize(void)
 	size = add_size(size, mul_size(MAX_NAMED_TRANCHES, sizeof(char *)));
 	size = add_size(size, mul_size(MAX_NAMED_TRANCHES, NAMEDATALEN));
 
+	/*
+	 * Make space for named tranche requests.  This is done for the benefit of
+	 * EXEC_BACKEND builds, which otherwise wouldn't be able to call
+	 * GetNamedLWLockTranche() outside postmaster.
+	 */
+	size = add_size(size, mul_size(NamedLWLockTrancheRequests,
+								   sizeof(NamedLWLockTrancheRequest)));
+
 	/* Space for the LWLock array, plus room for cache line alignment. */
 	size = add_size(size, LWLOCK_PADDED_SIZE);
 	size = add_size(size, mul_size(numLocks, sizeof(LWLockPadded)));
@@ -443,6 +450,20 @@ CreateLWLocks(void)
 			ptr += NAMEDATALEN;
 		}
 
+		/*
+		 * Move named tranche requests to shared memory.  This is done for the
+		 * benefit of EXEC_BACKEND builds, which otherwise wouldn't be able to
+		 * call GetNamedLWLockTranche() outside postmaster.
+		 */
+		if (NamedLWLockTrancheRequests > 0)
+		{
+			memcpy(ptr, NamedLWLockTrancheRequestArray,
+				   NamedLWLockTrancheRequests * sizeof(NamedLWLockTrancheRequest));
+			pfree(NamedLWLockTrancheRequestArray);
+			NamedLWLockTrancheRequestArray = (NamedLWLockTrancheRequest *) ptr;
+			ptr += NamedLWLockTrancheRequests * sizeof(NamedLWLockTrancheRequest);
+		}
+
 		/* Ensure desired alignment of LWLock array */
 		ptr += LWLOCK_PADDED_SIZE - ((uintptr_t) ptr) % LWLOCK_PADDED_SIZE;
 		MainLWLockArray = (LWLockPadded *) ptr;
diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h
index 0e9cf81a4c766..8e0d0d233b48f 100644
--- a/src/include/storage/lwlock.h
+++ b/src/include/storage/lwlock.h
@@ -73,8 +73,12 @@ typedef union LWLockPadded
 
 extern PGDLLIMPORT LWLockPadded *MainLWLockArray;
 
+/* forward declaration of private type for use only by lwlock.c */
+typedef struct NamedLWLockTrancheRequest NamedLWLockTrancheRequest;
+
 extern PGDLLIMPORT char **LWLockTrancheNames;
 extern PGDLLIMPORT int NamedLWLockTrancheRequests;
+extern PGDLLIMPORT NamedLWLockTrancheRequest *NamedLWLockTrancheRequestArray;
 extern PGDLLIMPORT int *LWLockCounter;
 
 /*

From 306dd13079ed616c414c9411c5deadffea273266 Mon Sep 17 00:00:00 2001
From: Michael Paquier <michael@paquier.xyz>
Date: Fri, 12 Sep 2025 09:55:16 +0900
Subject: [PATCH 54/73] Remove whitespace in comment of pg_stat_statements.c

Introduced by 6b4d23feef6e, spotted while reading this area of the code.
---
 contrib/pg_stat_statements/pg_stat_statements.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index 1cb368c8590ba..0bb0f9333998b 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -139,7 +139,6 @@ typedef enum pgssStoreKind
  * If you add a new key to this struct, make sure to teach pgss_store() to
  * zero the padding bytes.  Otherwise, things will break, because pgss_hash is
  * created using HASH_BLOBS, and thus tag_hash is used to hash this.
-
  */
 typedef struct pgssHashKey
 {

From 528dadf691df3023fbb0bd71da5c6087c2d49d6a Mon Sep 17 00:00:00 2001
From: Michael Paquier <michael@paquier.xyz>
Date: Fri, 12 Sep 2025 10:29:02 +0900
Subject: [PATCH 55/73] Add more information for WAL records of hash index AMs

hashdesc.c was missing a couple of fields in its record descriptions, as
of:
- is_prev_bucket_same_wrt for SQUEEZE_PAGE.
- procid for INIT_META_PAGE.
- old_bucket_flag and new_bucket_flag for SPLIT_ALLOCATE_PAGE.

The author has noted the first hole, and I have spotted the others while
double-checking this area of the code.  Note that the only data missing
now are the offsets stored in VACUUM_ONE_PAGE.  We could perhaps add
them, if somebody sees value in this data, even if it makes the output
larger.  These are discarded here.

Author: Kirill Reshke <reshkekirill@gmail.com>
Discussion: https://postgr.es/m/CALdSSPjc-OVwtZH0Xrkvg7n=2ZwdbMJzqrm_ed_CfjiAzuKVGg@mail.gmail.com
---
 src/backend/access/rmgrdesc/hashdesc.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/src/backend/access/rmgrdesc/hashdesc.c b/src/backend/access/rmgrdesc/hashdesc.c
index 75f43a9152071..2ee5332452f39 100644
--- a/src/backend/access/rmgrdesc/hashdesc.c
+++ b/src/backend/access/rmgrdesc/hashdesc.c
@@ -28,8 +28,10 @@ hash_desc(StringInfo buf, XLogReaderState *record)
 			{
 				xl_hash_init_meta_page *xlrec = (xl_hash_init_meta_page *) rec;
 
-				appendStringInfo(buf, "num_tuples %g, fillfactor %d",
-								 xlrec->num_tuples, xlrec->ffactor);
+				appendStringInfo(buf, "num_tuples %g, procid %u, fillfactor %d",
+								 xlrec->num_tuples,
+								 xlrec->procid,
+								 xlrec->ffactor);
 				break;
 			}
 		case XLOG_HASH_INIT_BITMAP_PAGE:
@@ -58,8 +60,10 @@ hash_desc(StringInfo buf, XLogReaderState *record)
 			{
 				xl_hash_split_allocate_page *xlrec = (xl_hash_split_allocate_page *) rec;
 
-				appendStringInfo(buf, "new_bucket %u, meta_page_masks_updated %c, issplitpoint_changed %c",
+				appendStringInfo(buf, "new_bucket %u, old_bucket_flag %u, new_bucket_flag %u, meta_page_masks_updated %c, issplitpoint_changed %c",
 								 xlrec->new_bucket,
+								 xlrec->old_bucket_flag,
+								 xlrec->new_bucket_flag,
 								 (xlrec->flags & XLH_SPLIT_META_UPDATE_MASKS) ? 'T' : 'F',
 								 (xlrec->flags & XLH_SPLIT_META_UPDATE_SPLITPOINT) ? 'T' : 'F');
 				break;
@@ -85,11 +89,12 @@ hash_desc(StringInfo buf, XLogReaderState *record)
 			{
 				xl_hash_squeeze_page *xlrec = (xl_hash_squeeze_page *) rec;
 
-				appendStringInfo(buf, "prevblkno %u, nextblkno %u, ntups %d, is_primary %c",
+				appendStringInfo(buf, "prevblkno %u, nextblkno %u, ntups %d, is_primary %c, is_previous %c",
 								 xlrec->prevblkno,
 								 xlrec->nextblkno,
 								 xlrec->ntups,
-								 xlrec->is_prim_bucket_same_wrt ? 'T' : 'F');
+								 xlrec->is_prim_bucket_same_wrt ? 'T' : 'F',
+								 xlrec->is_prev_bucket_same_wrt ? 'T' : 'F');
 				break;
 			}
 		case XLOG_HASH_DELETE:

From 2d756ebbe857e3d395d18350bf232300ebd23981 Mon Sep 17 00:00:00 2001
From: Richard Guo <rguo@postgresql.org>
Date: Fri, 12 Sep 2025 11:12:19 +0900
Subject: [PATCH 56/73] Fix misuse of Relids for storing attribute numbers

The typedef Relids (Bitmapset *) is intended to represent set of
relation identifiers, but was incorrectly used in several places to
store sets of attribute numbers.  This is my oversight in e2debb643.

Fix that by replacing such usages with Bitmapset * to reflect the
correct semantics.

Author: Junwang Zhao <zhjwpku@gmail.com>
Reviewed-by: Tender Wang <tndrwang@gmail.com>
Reviewed-by: Richard Guo <guofenglinux@gmail.com>
Discussion: https://postgr.es/m/CAEG8a3LJhp_xriXf39iCz0TsK+M-2biuhDhpLC6Baxw8+ZYT3A@mail.gmail.com
---
 src/backend/optimizer/util/clauses.c | 2 +-
 src/backend/optimizer/util/plancat.c | 6 +++---
 src/include/optimizer/plancat.h      | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c
index 6f0b338d2cdf1..ae0bd073ca917 100644
--- a/src/backend/optimizer/util/clauses.c
+++ b/src/backend/optimizer/util/clauses.c
@@ -4203,7 +4203,7 @@ simplify_function(Oid funcid, Oid result_type, int32 result_typmod,
 bool
 var_is_nonnullable(PlannerInfo *root, Var *var, bool use_rel_info)
 {
-	Relids		notnullattnums = NULL;
+	Bitmapset  *notnullattnums = NULL;
 
 	Assert(IsA(var, Var));
 
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index 4536bdd6cb4d7..572d626b2c4d2 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -62,7 +62,7 @@ get_relation_info_hook_type get_relation_info_hook = NULL;
 typedef struct NotnullHashEntry
 {
 	Oid			relid;			/* OID of the relation */
-	Relids		notnullattnums; /* attnums of NOT NULL columns */
+	Bitmapset  *notnullattnums; /* attnums of NOT NULL columns */
 } NotnullHashEntry;
 
 
@@ -683,7 +683,7 @@ get_relation_notnullatts(PlannerInfo *root, Relation relation)
 	Oid			relid = RelationGetRelid(relation);
 	NotnullHashEntry *hentry;
 	bool		found;
-	Relids		notnullattnums = NULL;
+	Bitmapset  *notnullattnums = NULL;
 
 	/* bail out if the relation has no not-null constraints */
 	if (relation->rd_att->constr == NULL ||
@@ -750,7 +750,7 @@ get_relation_notnullatts(PlannerInfo *root, Relation relation)
  *	  Searches the hash table and returns the column not-null constraint
  *	  information for a given relation.
  */
-Relids
+Bitmapset *
 find_relation_notnullatts(PlannerInfo *root, Oid relid)
 {
 	NotnullHashEntry *hentry;
diff --git a/src/include/optimizer/plancat.h b/src/include/optimizer/plancat.h
index dd8f2cd157f6f..9610707683235 100644
--- a/src/include/optimizer/plancat.h
+++ b/src/include/optimizer/plancat.h
@@ -30,7 +30,7 @@ extern void get_relation_info(PlannerInfo *root, Oid relationObjectId,
 
 extern void get_relation_notnullatts(PlannerInfo *root, Relation relation);
 
-extern Relids find_relation_notnullatts(PlannerInfo *root, Oid relid);
+extern Bitmapset *find_relation_notnullatts(PlannerInfo *root, Oid relid);
 
 extern List *infer_arbiter_indexes(PlannerInfo *root);
 

From e92677e86333562b8dd4972083c8a1abf985d90d Mon Sep 17 00:00:00 2001
From: Peter Eisentraut <peter@eisentraut.org>
Date: Fri, 12 Sep 2025 07:27:48 +0200
Subject: [PATCH 57/73] Silence compiler warnings on clang 21

Clang 21 shows some new compiler warnings, for example:

warning: variable 'dstsize' is uninitialized when passed as a const pointer argument here [-Wuninitialized-const-pointer]

The fix is to initialize the variables when they are defined.  This is
similar to, for example, the existing situation in gistKeyIsEQ().

Discussion: https://www.postgresql.org/message-id/flat/6604ad6e-5934-43ac-8590-15113d6ae4b1%40eisentraut.org
---
 src/backend/access/common/toast_internals.c | 2 +-
 src/backend/access/gist/gistutil.c          | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/backend/access/common/toast_internals.c b/src/backend/access/common/toast_internals.c
index a1d0eed8953ba..75e908c2e80a7 100644
--- a/src/backend/access/common/toast_internals.c
+++ b/src/backend/access/common/toast_internals.c
@@ -135,7 +135,7 @@ toast_save_datum(Relation rel, Datum value,
 		char		data[TOAST_MAX_CHUNK_SIZE + VARHDRSZ];
 		/* ensure union is aligned well enough: */
 		int32		align_it;
-	}			chunk_data;
+	}			chunk_data = {0};	/* silence compiler warning */
 	int32		chunk_size;
 	int32		chunk_seq = 0;
 	char	   *data_p;
diff --git a/src/backend/access/gist/gistutil.c b/src/backend/access/gist/gistutil.c
index c0aa7d0222f39..cdc4ab3151be1 100644
--- a/src/backend/access/gist/gistutil.c
+++ b/src/backend/access/gist/gistutil.c
@@ -157,7 +157,7 @@ gistMakeUnionItVec(GISTSTATE *giststate, IndexTuple *itvec, int len,
 {
 	int			i;
 	GistEntryVector *evec;
-	int			attrsize;
+	int			attrsize = 0;	/* silence compiler warning */
 
 	evec = (GistEntryVector *) palloc((len + 2) * sizeof(GISTENTRY) + GEVHDRSZ);
 
@@ -242,7 +242,7 @@ gistMakeUnionKey(GISTSTATE *giststate, int attno,
 		char		padding[2 * sizeof(GISTENTRY) + GEVHDRSZ];
 	}			storage;
 	GistEntryVector *evec = &storage.gev;
-	int			dstsize;
+	int			dstsize = 0;	/* silence compiler warning */
 
 	evec->n = 2;
 

From 25f36066dd2abde74faa12f08e5e498a95128cd0 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut <peter@eisentraut.org>
Date: Thu, 11 Sep 2025 11:55:29 +0200
Subject: [PATCH 58/73] Remove traces of support for Sun Studio compiler

Per discussion, this compiler suite is no longer maintained, and
it has not been able to compile PostgreSQL since at least PostgreSQL
17.

This removes all the remaining support code for this compiler.

Note that the Solaris operating system continues to be supported, but
using GCC as the compiler.

Reviewed-by: Andres Freund <andres@anarazel.de>
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://www.postgresql.org/message-id/flat/a0f817ee-fb86-483a-8a14-b6f7f5991b6e%40eisentraut.org
---
 config/c-compiler.m4                      |   2 +-
 configure                                 |  53 +---------
 configure.ac                              |  31 +-----
 doc/src/sgml/dfunc.sgml                   |   9 +-
 doc/src/sgml/installation.sgml            |  60 +-----------
 meson.build                               |   2 +-
 src/Makefile.global.in                    |   4 -
 src/backend/port/Makefile                 |  12 ---
 src/backend/port/tas/sunstudio_sparc.s    |  53 ----------
 src/backend/port/tas/sunstudio_x86.s      |  43 --------
 src/backend/storage/lmgr/Makefile         |   6 +-
 src/include/c.h                           |  15 ++-
 src/include/port/atomics.h                |   2 -
 src/include/port/atomics/arch-x86.h       |   2 +-
 src/include/port/atomics/generic-sunpro.h | 113 ----------------------
 src/include/port/solaris.h                |  21 ----
 src/include/storage/s_lock.h              |  24 +----
 src/makefiles/meson.build                 |   4 +-
 src/template/linux                        |  23 -----
 src/template/solaris                      |  29 +-----
 src/tools/pginclude/headerscheck          |   1 -
 21 files changed, 24 insertions(+), 485 deletions(-)
 delete mode 100644 src/backend/port/tas/sunstudio_sparc.s
 delete mode 100644 src/backend/port/tas/sunstudio_x86.s
 delete mode 100644 src/include/port/atomics/generic-sunpro.h

diff --git a/config/c-compiler.m4 b/config/c-compiler.m4
index da40bd6a64755..236a59e8536c2 100644
--- a/config/c-compiler.m4
+++ b/config/c-compiler.m4
@@ -83,7 +83,7 @@ if test x"$pgac_cv__128bit_int" = xyes ; then
   AC_CACHE_CHECK([for __int128 alignment bug], [pgac_cv__128bit_int_bug],
   [AC_RUN_IFELSE([AC_LANG_PROGRAM([
 /* This must match the corresponding code in c.h: */
-#if defined(__GNUC__) || defined(__SUNPRO_C)
+#if defined(__GNUC__)
 #define pg_attribute_aligned(a) __attribute__((aligned(a)))
 #elif defined(_MSC_VER)
 #define pg_attribute_aligned(a) __declspec(align(a))
diff --git a/configure b/configure
index 39c68161ceced..22cd866147b96 100755
--- a/configure
+++ b/configure
@@ -739,7 +739,6 @@ PKG_CONFIG_LIBDIR
 PKG_CONFIG_PATH
 PKG_CONFIG
 DLSUFFIX
-TAS
 GCC
 CPP
 CFLAGS_SL
@@ -760,7 +759,6 @@ CLANG
 LLVM_CONFIG
 AWK
 with_llvm
-SUN_STUDIO_CC
 ac_ct_CXX
 CXXFLAGS
 CXX
@@ -3059,12 +3057,6 @@ $as_echo "$template" >&6; }
 PORTNAME=$template
 
 
-# Initialize default assumption that we do not need separate assembly code
-# for TAS (test-and-set).  This can be overridden by the template file
-# when it's executed.
-need_tas=no
-tas_file=dummy.s
-
 # Default, works for most platforms, override in template file if needed
 DLSUFFIX=".so"
 
@@ -4799,30 +4791,6 @@ else
 fi
 rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
 
-# Check if it's Sun Studio compiler. We assume that
-# __SUNPRO_C will be defined for Sun Studio compilers
-cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h.  */
-
-int
-main ()
-{
-#ifndef __SUNPRO_C
-choke me
-#endif
-  ;
-  return 0;
-}
-_ACEOF
-if ac_fn_c_try_compile "$LINENO"; then :
-  SUN_STUDIO_CC=yes
-else
-  SUN_STUDIO_CC=no
-fi
-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
-
-
-
 
 #
 # LLVM
@@ -6748,7 +6716,7 @@ fi
 # __attribute__((visibility("hidden"))) is supported, if we encounter a
 # compiler that supports one of the supported variants of -fvisibility=hidden
 # but uses a different syntax to mark a symbol as exported.
-if test "$GCC" = yes -o "$SUN_STUDIO_CC" = yes ; then
+if test "$GCC" = yes; then
   { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether ${CC} supports -fvisibility=hidden, for CFLAGS_SL_MODULE" >&5
 $as_echo_n "checking whether ${CC} supports -fvisibility=hidden, for CFLAGS_SL_MODULE... " >&6; }
 if ${pgac_cv_prog_CC_cflags__fvisibility_hidden+:} false; then :
@@ -7731,20 +7699,6 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
 
 
 
-#
-# Set up TAS assembly code if needed; the template file has now had its
-# chance to request this.
-#
-ac_config_links="$ac_config_links src/backend/port/tas.s:src/backend/port/tas/${tas_file}"
-
-
-if test "$need_tas" = yes ; then
-  TAS=tas.o
-else
-  TAS=""
-fi
-
-
 
 cat >>confdefs.h <<_ACEOF
 #define DLSUFFIX "$DLSUFFIX"
@@ -17141,7 +17095,7 @@ else
 /* end confdefs.h.  */
 
 /* This must match the corresponding code in c.h: */
-#if defined(__GNUC__) || defined(__SUNPRO_C)
+#if defined(__GNUC__)
 #define pg_attribute_aligned(a) __attribute__((aligned(a)))
 #elif defined(_MSC_VER)
 #define pg_attribute_aligned(a) __declspec(align(a))
@@ -19344,8 +19298,6 @@ fi
 if test x"$GCC" = x"yes" ; then
   cc_string=`${CC} --version | sed q`
   case $cc_string in [A-Za-z]*) ;; *) cc_string="GCC $cc_string";; esac
-elif test x"$SUN_STUDIO_CC" = x"yes" ; then
-  cc_string=`${CC} -V 2>&1 | sed q`
 else
   cc_string=$CC
 fi
@@ -20142,7 +20094,6 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
 for ac_config_target in $ac_config_targets
 do
   case $ac_config_target in
-    "src/backend/port/tas.s") CONFIG_LINKS="$CONFIG_LINKS src/backend/port/tas.s:src/backend/port/tas/${tas_file}" ;;
     "GNUmakefile") CONFIG_FILES="$CONFIG_FILES GNUmakefile" ;;
     "src/Makefile.global") CONFIG_FILES="$CONFIG_FILES src/Makefile.global" ;;
     "src/backend/port/pg_sema.c") CONFIG_LINKS="$CONFIG_LINKS src/backend/port/pg_sema.c:${SEMA_IMPLEMENTATION}" ;;
diff --git a/configure.ac b/configure.ac
index 066e3976c0aac..e44943aa6fe35 100644
--- a/configure.ac
+++ b/configure.ac
@@ -95,12 +95,6 @@ AC_MSG_RESULT([$template])
 PORTNAME=$template
 AC_SUBST(PORTNAME)
 
-# Initialize default assumption that we do not need separate assembly code
-# for TAS (test-and-set).  This can be overridden by the template file
-# when it's executed.
-need_tas=no
-tas_file=dummy.s
-
 # Default, works for most platforms, override in template file if needed
 DLSUFFIX=".so"
 
@@ -400,14 +394,6 @@ AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], [@%:@ifndef __INTEL_COMPILER
 choke me
 @%:@endif])], [ICC=yes], [ICC=no])
 
-# Check if it's Sun Studio compiler. We assume that
-# __SUNPRO_C will be defined for Sun Studio compilers
-AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], [@%:@ifndef __SUNPRO_C
-choke me
-@%:@endif])], [SUN_STUDIO_CC=yes], [SUN_STUDIO_CC=no])
-
-AC_SUBST(SUN_STUDIO_CC)
-
 
 #
 # LLVM
@@ -618,7 +604,7 @@ fi
 # __attribute__((visibility("hidden"))) is supported, if we encounter a
 # compiler that supports one of the supported variants of -fvisibility=hidden
 # but uses a different syntax to mark a symbol as exported.
-if test "$GCC" = yes -o "$SUN_STUDIO_CC" = yes ; then
+if test "$GCC" = yes; then
   PGAC_PROG_CC_VAR_OPT(CFLAGS_SL_MODULE, [-fvisibility=hidden])
   # For C++ we additionally want -fvisibility-inlines-hidden
   PGAC_PROG_VARCXX_VARFLAGS_OPT(CXX, CXXFLAGS_SL_MODULE, [-fvisibility=hidden])
@@ -774,19 +760,6 @@ AC_PROG_CPP
 AC_SUBST(GCC)
 
 
-#
-# Set up TAS assembly code if needed; the template file has now had its
-# chance to request this.
-#
-AC_CONFIG_LINKS([src/backend/port/tas.s:src/backend/port/tas/${tas_file}])
-
-if test "$need_tas" = yes ; then
-  TAS=tas.o
-else
-  TAS=""
-fi
-AC_SUBST(TAS)
-
 AC_SUBST(DLSUFFIX)dnl
 AC_DEFINE_UNQUOTED([DLSUFFIX], ["$DLSUFFIX"],
                    [Define to the file name extension of dynamically-loadable modules.])
@@ -2478,8 +2451,6 @@ AC_SUBST(LDFLAGS_EX_BE)
 if test x"$GCC" = x"yes" ; then
   cc_string=`${CC} --version | sed q`
   case $cc_string in [[A-Za-z]]*) ;; *) cc_string="GCC $cc_string";; esac
-elif test x"$SUN_STUDIO_CC" = x"yes" ; then
-  cc_string=`${CC} -V 2>&1 | sed q`
 else
   cc_string=$CC
 fi
diff --git a/doc/src/sgml/dfunc.sgml b/doc/src/sgml/dfunc.sgml
index b94aefcd0ca6c..3778efc83ebfa 100644
--- a/doc/src/sgml/dfunc.sgml
+++ b/doc/src/sgml/dfunc.sgml
@@ -157,19 +157,12 @@ ld -Bshareable -o foo.so foo.o
     <listitem>
      <para>
       The compiler flag to create <acronym>PIC</acronym> is
-      <option>-KPIC</option> with the Sun compiler and
       <option>-fPIC</option> with <application>GCC</application>.  To
       link shared libraries, the compiler option is
-      <option>-G</option> with either compiler or alternatively
       <option>-shared</option> with <application>GCC</application>.
 <programlisting>
-cc -KPIC -c foo.c
-cc -G -o foo.so foo.o
-</programlisting>
-      or
-<programlisting>
 gcc -fPIC -c foo.c
-gcc -G -o foo.so foo.o
+gcc -shared -o foo.so foo.o
 </programlisting>
      </para>
     </listitem>
diff --git a/doc/src/sgml/installation.sgml b/doc/src/sgml/installation.sgml
index a4ad80a678211..593202f4fb259 100644
--- a/doc/src/sgml/installation.sgml
+++ b/doc/src/sgml/installation.sgml
@@ -1676,10 +1676,6 @@ build-postgresql:
          using the GCC compiler:
 <screen>
 ./configure CC='gcc -m64' --enable-dtrace DTRACEFLAGS='-64' ...
-</screen>
-         Using Sun's compiler:
-<screen>
-./configure CC='/opt/SUNWspro/bin/cc -xtarget=native64' --enable-dtrace DTRACEFLAGS='-64' ...
 </screen>
         </para>
        </listitem>
@@ -3713,24 +3709,13 @@ xcrun --show-sdk-path
     <title>Required Tools</title>
 
     <para>
-     You can build with either GCC or Sun's compiler suite.  For
-     better code optimization, Sun's compiler is strongly recommended
-     on the SPARC architecture.  If
-     you are using Sun's compiler, be careful not to select
-     <filename>/usr/ucb/cc</filename>;
-     use <filename>/opt/SUNWspro/bin/cc</filename>.
+     Only GCC is supported as the compiler.  Sun's compiler suite is no longer
+     supported.
     </para>
 
     <para>
-     You can download Sun Studio
-     from <ulink url="https://www.oracle.com/technetwork/server-storage/solarisstudio/downloads/"></ulink>.
-     Many GNU tools are integrated into Solaris 10, or they are
-     present on the Solaris companion CD.  If you need packages for
-     older versions of Solaris, you can find these tools
-     at <ulink url="http://www.sunfreeware.com"></ulink>.
-     If you prefer
-     sources, look
-     at <ulink url="https://www.gnu.org/prep/ftp"></ulink>.
+     Many additional dependencies can be installed via the package management
+     system.
     </para>
    </sect3>
 
@@ -3753,27 +3738,6 @@ configure ... LDFLAGS="-R /usr/sfw/lib:/opt/sfw/lib:/usr/local/lib"
     </para>
    </sect3>
 
-   <sect3 id="installation-notes-solaris-comp-opt-perf">
-    <title>Compiling for Optimal Performance</title>
-
-    <para>
-     On the SPARC architecture, Sun Studio is strongly recommended for
-     compilation.  Try using the <option>-xO5</option> optimization
-     flag to generate significantly faster binaries.  Do not use any
-     flags that modify behavior of floating-point operations
-     and <varname>errno</varname> processing (e.g.,
-     <option>-fast</option>).
-    </para>
-
-    <para>
-     If you do not have a reason to use 64-bit binaries on SPARC,
-     prefer the 32-bit version.  The 64-bit operations are slower and
-     64-bit binaries are slower than the 32-bit variants.  On the
-     other hand, 32-bit code on the AMD64 CPU family is not native,
-     so 32-bit code is significantly slower on that CPU family.
-    </para>
-   </sect3>
-
    <sect3 id="installation-notes-solaris-using-dtrace">
     <title>Using DTrace for Tracing PostgreSQL</title>
 
@@ -3781,22 +3745,6 @@ configure ... LDFLAGS="-R /usr/sfw/lib:/opt/sfw/lib:/usr/local/lib"
      Yes, using DTrace is possible.  See <xref linkend="dynamic-trace"/> for
      further information.
     </para>
-
-    <para>
-     If you see the linking of the <command>postgres</command> executable abort with an
-     error message like:
-<screen>
-Undefined                       first referenced
- symbol                             in file
-AbortTransaction                    utils/probes.o
-CommitTransaction                   utils/probes.o
-ld: fatal: Symbol referencing errors. No output written to postgres
-collect2: ld returned 1 exit status
-make: *** [postgres] Error 1
-</screen>
-     your DTrace installation is too old to handle probes in static
-     functions.  You need Solaris 10u4 or newer to use DTrace.
-    </para>
    </sect3>
   </sect2>
 
diff --git a/meson.build b/meson.build
index ab8101d67b26d..d71c7c8267e79 100644
--- a/meson.build
+++ b/meson.build
@@ -1809,7 +1809,7 @@ if cc.links('''
   if not meson.is_cross_build()
     r = cc.run('''
     /* This must match the corresponding code in c.h: */
-    #if defined(__GNUC__) || defined(__SUNPRO_C)
+    #if defined(__GNUC__)
     #define pg_attribute_aligned(a) __attribute__((aligned(a)))
     #elif defined(_MSC_VER)
     #define pg_attribute_aligned(a) __declspec(align(a))
diff --git a/src/Makefile.global.in b/src/Makefile.global.in
index 8b1b357beaa04..0aa389bc71012 100644
--- a/src/Makefile.global.in
+++ b/src/Makefile.global.in
@@ -267,7 +267,6 @@ endif # not PGXS
 
 CC = @CC@
 GCC = @GCC@
-SUN_STUDIO_CC = @SUN_STUDIO_CC@
 CXX = @CXX@
 CFLAGS = @CFLAGS@
 CFLAGS_SL = @CFLAGS_SL@
@@ -796,9 +795,6 @@ ifeq ($(PORTNAME),win32)
 LIBS += -lws2_32
 endif
 
-# Not really standard libc functions, used by the backend.
-TAS         = @TAS@
-
 
 ##########################################################################
 #
diff --git a/src/backend/port/Makefile b/src/backend/port/Makefile
index 47338d9922957..8613ac01aff6d 100644
--- a/src/backend/port/Makefile
+++ b/src/backend/port/Makefile
@@ -22,7 +22,6 @@ top_builddir = ../../..
 include $(top_builddir)/src/Makefile.global
 
 OBJS = \
-	$(TAS) \
 	atomics.o \
 	pg_sema.o \
 	pg_shmem.o
@@ -33,16 +32,5 @@ endif
 
 include $(top_srcdir)/src/backend/common.mk
 
-tas.o: tas.s
-ifeq ($(SUN_STUDIO_CC), yes)
-# preprocess assembler file with cpp
-	$(CC) $(CFLAGS) -c -P $<
-	mv $*.i $*_cpp.s
-	$(CC) $(CFLAGS) -c $*_cpp.s -o $@
-else
-	$(CC) $(CFLAGS) -c $<
-endif
-
 clean:
-	rm -f tas_cpp.s
 	$(MAKE) -C win32 clean
diff --git a/src/backend/port/tas/sunstudio_sparc.s b/src/backend/port/tas/sunstudio_sparc.s
deleted file mode 100644
index 8e0a0965b64ea..0000000000000
--- a/src/backend/port/tas/sunstudio_sparc.s
+++ /dev/null
@@ -1,53 +0,0 @@
-!-------------------------------------------------------------------------
-!
-! sunstudio_sparc.s
-!	  compare and swap for Sun Studio on Sparc
-!
-! Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
-! Portions Copyright (c) 1994, Regents of the University of California
-!
-! IDENTIFICATION
-!	  src/backend/port/tas/sunstudio_sparc.s
-!
-!-------------------------------------------------------------------------
-
-! Fortunately the Sun compiler can process cpp conditionals with -P
-
-! '/' is the comment for x86, while '!' is the comment for Sparc
-
-#if defined(__sparcv9) || defined(__sparc)
-
-	.section        ".text"
-	.align  8
-	.skip   24
-	.align  4
-
-	.global pg_atomic_cas
-pg_atomic_cas:
-
-	! "cas" only works on sparcv9 and sparcv8plus chips, and
-	! requires a compiler targeting these CPUs.  It will fail
-	! on a compiler targeting sparcv8, and of course will not
-	! be understood by a sparcv8 CPU.  gcc continues to use
-	! "ldstub" because it targets sparcv7.
-	!
-	! There is actually a trick for embedding "cas" in a
-	! sparcv8-targeted compiler, but it can only be run
-	! on a sparcv8plus/v9 cpus:
-	!
-	!   http://cvs.opensolaris.org/source/xref/on/usr/src/lib/libc/sparc/threads/sparc.il
-	!
-	! NB: We're assuming we're running on a TSO system here - solaris
-	! userland luckily always has done so.
-
-#if defined(__sparcv9) || defined(__sparcv8plus)
-	cas     [%o0],%o2,%o1
-#else
-	ldstub [%o0],%o1
-#endif
-	mov     %o1,%o0
-	retl
-	nop
-	.type   pg_atomic_cas,2
-	.size   pg_atomic_cas,(.-pg_atomic_cas)
-#endif
diff --git a/src/backend/port/tas/sunstudio_x86.s b/src/backend/port/tas/sunstudio_x86.s
deleted file mode 100644
index 0111ffde45c29..0000000000000
--- a/src/backend/port/tas/sunstudio_x86.s
+++ /dev/null
@@ -1,43 +0,0 @@
-/-------------------------------------------------------------------------
-/
-/ sunstudio_x86.s
-/	  compare and swap for Sun Studio on x86
-/
-/ Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
-/ Portions Copyright (c) 1994, Regents of the University of California
-/
-/ IDENTIFICATION
-/	  src/backend/port/tas/sunstudio_x86.s
-/
-/-------------------------------------------------------------------------
-
-/ Fortunately the Sun compiler can process cpp conditionals with -P
-
-/ '/' is the comment for x86, while '!' is the comment for Sparc
-
-	.file   "tas.s"
-
-#if defined(__amd64)
-	.code64
-#endif
-
-	.globl pg_atomic_cas
-	.type pg_atomic_cas, @function
-
-	.section .text, "ax"
-	.align 16
-
-pg_atomic_cas:
-#if defined(__amd64)
-	movl       %edx,%eax
-	lock
-	cmpxchgl   %esi,(%rdi)
-#else
-	movl    4(%esp), %edx
-	movl    8(%esp), %ecx
-	movl    12(%esp), %eax
-	lock
-	cmpxchgl %ecx, (%edx)
-#endif
-	ret
-	.size pg_atomic_cas, . - pg_atomic_cas
diff --git a/src/backend/storage/lmgr/Makefile b/src/backend/storage/lmgr/Makefile
index 6cbaf23b855f6..a5fbc24ddad6e 100644
--- a/src/backend/storage/lmgr/Makefile
+++ b/src/backend/storage/lmgr/Makefile
@@ -24,13 +24,9 @@ OBJS = \
 
 include $(top_srcdir)/src/backend/common.mk
 
-ifdef TAS
-TASPATH = $(top_builddir)/src/backend/port/tas.o
-endif
-
 s_lock_test: s_lock.c $(top_builddir)/src/common/libpgcommon.a $(top_builddir)/src/port/libpgport.a
 	$(CC) $(CPPFLAGS) $(CFLAGS) -DS_LOCK_TEST=1 $(srcdir)/s_lock.c \
-		$(TASPATH) -L $(top_builddir)/src/common -lpgcommon \
+		-L $(top_builddir)/src/common -lpgcommon \
 		-L $(top_builddir)/src/port -lpgport -lm -o s_lock_test
 
 lwlocknames.h: ../../../include/storage/lwlocklist.h ../../utils/activity/wait_event_names.txt generate-lwlocknames.pl
diff --git a/src/include/c.h b/src/include/c.h
index b580cfa7d3178..f303ba0605a40 100644
--- a/src/include/c.h
+++ b/src/include/c.h
@@ -114,7 +114,6 @@
  * GCC: https://gcc.gnu.org/onlinedocs/gcc/Function-Attributes.html
  * GCC: https://gcc.gnu.org/onlinedocs/gcc/Type-Attributes.html
  * Clang: https://clang.llvm.org/docs/AttributeReference.html
- * Sunpro: https://docs.oracle.com/cd/E18659_01/html/821-1384/gjzke.html
  */
 
 /*
@@ -157,7 +156,7 @@
  */
 #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L
 #define pg_noreturn _Noreturn
-#elif defined(__GNUC__) || defined(__SUNPRO_C)
+#elif defined(__GNUC__)
 #define pg_noreturn __attribute__((noreturn))
 #elif defined(_MSC_VER)
 #define pg_noreturn __declspec(noreturn)
@@ -233,8 +232,8 @@
 #define pg_attribute_printf(f,a)
 #endif
 
-/* GCC and Sunpro support aligned and packed */
-#if defined(__GNUC__) || defined(__SUNPRO_C)
+/* GCC supports aligned and packed */
+#if defined(__GNUC__)
 #define pg_attribute_aligned(a) __attribute__((aligned(a)))
 #define pg_attribute_packed() __attribute__((packed))
 #elif defined(_MSC_VER)
@@ -259,8 +258,8 @@
  * choose not to.  But, if possible, don't force inlining in unoptimized
  * debug builds.
  */
-#if (defined(__GNUC__) && defined(__OPTIMIZE__)) || defined(__SUNPRO_C)
-/* GCC and Sunpro support always_inline via __attribute__ */
+#if defined(__GNUC__) && defined(__OPTIMIZE__)
+/* GCC supports always_inline via __attribute__ */
 #define pg_attribute_always_inline __attribute__((always_inline)) inline
 #elif defined(_MSC_VER)
 /* MSVC has a special keyword for this */
@@ -276,8 +275,8 @@
  * for proper cost attribution.  Note that unlike the pg_attribute_XXX macros
  * above, this should be placed before the function's return type and name.
  */
-/* GCC and Sunpro support noinline via __attribute__ */
-#if defined(__GNUC__) || defined(__SUNPRO_C)
+/* GCC supports noinline via __attribute__ */
+#if defined(__GNUC__)
 #define pg_noinline __attribute__((noinline))
 /* msvc via declspec */
 #elif defined(_MSC_VER)
diff --git a/src/include/port/atomics.h b/src/include/port/atomics.h
index 074136fe4c4a8..96f1858da9722 100644
--- a/src/include/port/atomics.h
+++ b/src/include/port/atomics.h
@@ -88,8 +88,6 @@
 #include "port/atomics/generic-gcc.h"
 #elif defined(_MSC_VER)
 #include "port/atomics/generic-msvc.h"
-#elif defined(__SUNPRO_C) && !defined(__GNUC__)
-#include "port/atomics/generic-sunpro.h"
 #else
 /* Unknown compiler. */
 #endif
diff --git a/src/include/port/atomics/arch-x86.h b/src/include/port/atomics/arch-x86.h
index 8983dd89d53d7..4ba2ccc05913d 100644
--- a/src/include/port/atomics/arch-x86.h
+++ b/src/include/port/atomics/arch-x86.h
@@ -241,6 +241,6 @@ pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_)
  */
 #if defined(__i568__) || defined(__i668__) || /* gcc i586+ */  \
 	(defined(_M_IX86) && _M_IX86 >= 500) || /* msvc i586+ */ \
-	defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) /* gcc, sunpro, msvc */
+	defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) /* gcc, msvc */
 #define PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY
 #endif /* 8 byte single-copy atomicity */
diff --git a/src/include/port/atomics/generic-sunpro.h b/src/include/port/atomics/generic-sunpro.h
deleted file mode 100644
index 09bba0be2037d..0000000000000
--- a/src/include/port/atomics/generic-sunpro.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*-------------------------------------------------------------------------
- *
- * generic-sunpro.h
- *	  Atomic operations for solaris' CC
- *
- * Portions Copyright (c) 2013-2025, PostgreSQL Global Development Group
- *
- * NOTES:
- *
- * Documentation:
- * * manpage for atomic_cas(3C)
- *   http://www.unix.com/man-page/opensolaris/3c/atomic_cas/
- *   http://docs.oracle.com/cd/E23824_01/html/821-1465/atomic-cas-3c.html
- *
- * src/include/port/atomics/generic-sunpro.h
- *
- * -------------------------------------------------------------------------
- */
-
-#ifdef HAVE_MBARRIER_H
-#include <mbarrier.h>
-
-#define pg_compiler_barrier_impl()	__compiler_barrier()
-
-#ifndef pg_memory_barrier_impl
-/*
- * Despite the name this is actually a full barrier. Expanding to mfence/
- * membar #StoreStore | #LoadStore | #StoreLoad | #LoadLoad on x86/sparc
- * respectively.
- */
-#	define pg_memory_barrier_impl()		__machine_rw_barrier()
-#endif
-#ifndef pg_read_barrier_impl
-#	define pg_read_barrier_impl()		__machine_r_barrier()
-#endif
-#ifndef pg_write_barrier_impl
-#	define pg_write_barrier_impl()		__machine_w_barrier()
-#endif
-
-#endif /* HAVE_MBARRIER_H */
-
-/* Older versions of the compiler don't have atomic.h... */
-#ifdef HAVE_ATOMIC_H
-
-#include <atomic.h>
-
-#define PG_HAVE_ATOMIC_U32_SUPPORT
-typedef struct pg_atomic_uint32
-{
-	volatile uint32 value;
-} pg_atomic_uint32;
-
-#define PG_HAVE_ATOMIC_U64_SUPPORT
-typedef struct pg_atomic_uint64
-{
-	/*
-	 * Syntax to enforce variable alignment should be supported by versions
-	 * supporting atomic.h, but it's hard to find accurate documentation. If
-	 * it proves to be a problem, we'll have to add more version checks for 64
-	 * bit support.
-	 */
-	volatile uint64 value pg_attribute_aligned(8);
-} pg_atomic_uint64;
-
-#endif /* HAVE_ATOMIC_H */
-
-
-#ifdef HAVE_ATOMIC_H
-
-#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32
-static inline bool
-pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr,
-									uint32 *expected, uint32 newval)
-{
-	bool	ret;
-	uint32	current;
-
-	current = atomic_cas_32(&ptr->value, *expected, newval);
-	ret = current == *expected;
-	*expected = current;
-	return ret;
-}
-
-#define PG_HAVE_ATOMIC_EXCHANGE_U32
-static inline uint32
-pg_atomic_exchange_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 newval)
-{
-	return atomic_swap_32(&ptr->value, newval);
-}
-
-#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64
-static inline bool
-pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr,
-									uint64 *expected, uint64 newval)
-{
-	bool	ret;
-	uint64	current;
-
-	AssertPointerAlignment(expected, 8);
-	current = atomic_cas_64(&ptr->value, *expected, newval);
-	ret = current == *expected;
-	*expected = current;
-	return ret;
-}
-
-#define PG_HAVE_ATOMIC_EXCHANGE_U64
-static inline uint64
-pg_atomic_exchange_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 newval)
-{
-	return atomic_swap_64(&ptr->value, newval);
-}
-
-#endif /* HAVE_ATOMIC_H */
diff --git a/src/include/port/solaris.h b/src/include/port/solaris.h
index 8ff40007c7f6a..c352361c81d83 100644
--- a/src/include/port/solaris.h
+++ b/src/include/port/solaris.h
@@ -1,26 +1,5 @@
 /* src/include/port/solaris.h */
 
-/*
- * Sort this out for all operating systems some time.  The __xxx
- * symbols are defined on both GCC and Solaris CC, although GCC
- * doesn't document them.  The __xxx__ symbols are only on GCC.
- */
-#if defined(__i386) && !defined(__i386__)
-#define __i386__
-#endif
-
-#if defined(__amd64) && !defined(__amd64__)
-#define __amd64__
-#endif
-
-#if defined(__x86_64) && !defined(__x86_64__)
-#define __x86_64__
-#endif
-
-#if defined(__sparc) && !defined(__sparc__)
-#define __sparc__
-#endif
-
 #if defined(__i386__)
 #include <sys/isa_defs.h>
 #endif
diff --git a/src/include/storage/s_lock.h b/src/include/storage/s_lock.h
index 2f73f9fcf57a2..7f8f566bd407f 100644
--- a/src/include/storage/s_lock.h
+++ b/src/include/storage/s_lock.h
@@ -333,9 +333,9 @@ tas(volatile slock_t *lock)
 	slock_t		_res;
 
 	/*
-	 *	See comment in src/backend/port/tas/sunstudio_sparc.s for why this
-	 *	uses "ldstub", and that file uses "cas".  gcc currently generates
-	 *	sparcv7-targeted binaries, so "cas" use isn't possible.
+	 * "cas" would be better than "ldstub", but it is only present on
+	 * sparcv8plus and later, while some platforms still support sparcv7 or
+	 * sparcv8.  Also, "cas" requires that the system be running in TSO mode.
 	 */
 	__asm__ __volatile__(
 		"	ldstub	[%2], %0	\n"
@@ -594,24 +594,6 @@ tas(volatile slock_t *lock)
 
 #if !defined(HAS_TEST_AND_SET)	/* We didn't trigger above, let's try here */
 
-/* These are in sunstudio_(sparc|x86).s */
-
-#if defined(__SUNPRO_C) && (defined(__i386) || defined(__x86_64__) || defined(__sparc__) || defined(__sparc))
-#define HAS_TEST_AND_SET
-
-#if defined(__i386) || defined(__x86_64__) || defined(__sparcv9) || defined(__sparcv8plus)
-typedef unsigned int slock_t;
-#else
-typedef unsigned char slock_t;
-#endif
-
-extern slock_t pg_atomic_cas(volatile slock_t *lock, slock_t with,
-									  slock_t cmp);
-
-#define TAS(a) (pg_atomic_cas((a), 1, 0) != 0)
-#endif
-
-
 #ifdef _MSC_VER
 typedef LONG slock_t;
 
diff --git a/src/makefiles/meson.build b/src/makefiles/meson.build
index 54dbc059adac7..0def244c9011d 100644
--- a/src/makefiles/meson.build
+++ b/src/makefiles/meson.build
@@ -63,8 +63,6 @@ pgxs_kv = {
   'DLSUFFIX': dlsuffix,
   'EXEEXT': exesuffix,
 
-  'SUN_STUDIO_CC': 'no', # not supported so far
-
   # want the chosen option, rather than the library
   'with_ssl' : ssl_library,
   'with_uuid': uuidopt,
@@ -179,7 +177,7 @@ pgxs_empty = [
   'WANTED_LANGUAGES',
 
   # Not needed because we don't build the server / PLs with the generated makefile
-  'LIBOBJS', 'PG_CRC32C_OBJS', 'TAS',
+  'LIBOBJS', 'PG_CRC32C_OBJS',
   'PG_TEST_EXTRA',
   'DTRACEFLAGS', # only server has dtrace probes
 
diff --git a/src/template/linux b/src/template/linux
index ec3302c4a223f..faefe64254a90 100644
--- a/src/template/linux
+++ b/src/template/linux
@@ -14,26 +14,3 @@ CFLAGS_SL="-fPIC"
 
 # If --enable-profiling is specified, we need -DLINUX_PROFILE
 PLATFORM_PROFILE_FLAGS="-DLINUX_PROFILE"
-
-if test "$SUN_STUDIO_CC" = "yes" ; then
-  CC="$CC -Xa"			# relaxed ISO C mode
-  CFLAGS="-v"			# -v is like gcc -Wall
-  if test "$enable_debug" != yes; then
-    CFLAGS="$CFLAGS -O"		# any optimization breaks debug
-  fi
-
-  # Pick the right test-and-set (TAS) code for the Sun compiler.
-  # We would like to use in-line assembler, but the compiler
-  # requires *.il files to be on every compile line, making
-  # the build system too fragile.
-  case $host_cpu in
-    sparc)
-	need_tas=yes
-	tas_file=sunstudio_sparc.s
-    ;;
-    i?86|x86_64)
-	need_tas=yes
-	tas_file=sunstudio_x86.s
-    ;;
-  esac
-fi
diff --git a/src/template/solaris b/src/template/solaris
index f88b1cdad37f8..a4d8d38a8f852 100644
--- a/src/template/solaris
+++ b/src/template/solaris
@@ -1,31 +1,4 @@
 # src/template/solaris
 
 # Extra CFLAGS for code that will go into a shared library
-if test "$GCC" = yes ; then
-  CFLAGS_SL="-fPIC"
-else
-  CFLAGS_SL="-KPIC"
-fi
-
-if test "$SUN_STUDIO_CC" = yes ; then
-  CC="$CC -Xa"			# relaxed ISO C mode
-  CFLAGS="-v"			# -v is like gcc -Wall
-  if test "$enable_debug" != yes; then
-    CFLAGS="$CFLAGS -O"		# any optimization breaks debug
-  fi
-
-  # Pick the right test-and-set (TAS) code for the Sun compiler.
-  # We would like to use in-line assembler, but the compiler
-  # requires *.il files to be on every compile line, making
-  # the build system too fragile.
-  case $host_cpu in
-    sparc)
-	need_tas=yes
-	tas_file=sunstudio_sparc.s
-    ;;
-    i?86|x86_64)
-	need_tas=yes
-	tas_file=sunstudio_x86.s
-    ;;
-  esac
-fi
+CFLAGS_SL="-fPIC"
diff --git a/src/tools/pginclude/headerscheck b/src/tools/pginclude/headerscheck
index 17138a7569e4f..d017490a5386a 100755
--- a/src/tools/pginclude/headerscheck
+++ b/src/tools/pginclude/headerscheck
@@ -114,7 +114,6 @@ do
 	test "$f" = src/include/port/atomics/generic.h && continue
 	test "$f" = src/include/port/atomics/generic-gcc.h && continue
 	test "$f" = src/include/port/atomics/generic-msvc.h && continue
-	test "$f" = src/include/port/atomics/generic-sunpro.h && continue
 
 	# sepgsql.h depends on headers that aren't there on most platforms.
 	test "$f" = contrib/sepgsql/sepgsql.h && continue

From 2aac62be8cbb870ccf8c5b3fbb8a4e4aa8a14a73 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut <peter@eisentraut.org>
Date: Fri, 12 Sep 2025 07:57:06 +0200
Subject: [PATCH 59/73] Default to log_lock_waits=on

If someone is stuck behind a lock for more than a second, that is
almost always a problem that is worth a log entry.

Author: Laurenz Albe <laurenz.albe@cybertec.at>
Reviewed-By: Michael Banck <mbanck@gmx.net>
Reviewed-By: Robert Haas <robertmhaas@gmail.com>
Reviewed-By: Christoph Berg <myon@debian.org>
Reviewed-By: Stephen Frost <sfrost@snowman.net>
Discussion: https://postgr.es/m/b8b8502915e50f44deb111bc0b43a99e2733e117.camel%40cybertec.at
---
 doc/src/sgml/config.sgml                      | 2 +-
 src/backend/storage/lmgr/proc.c               | 2 +-
 src/backend/utils/misc/guc_parameters.dat     | 2 +-
 src/backend/utils/misc/postgresql.conf.sample | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 2a3685f474a96..3c33d5d0fbcae 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -7929,7 +7929,7 @@ log_line_prefix = '%m [%p] %q%u@%d/%a '
         Controls whether a log message is produced when a session waits
         longer than <xref linkend="guc-deadlock-timeout"/> to acquire a
         lock.  This is useful in determining if lock waits are causing
-        poor performance.  The default is <literal>off</literal>.
+        poor performance.  The default is <literal>on</literal>.
         Only superusers and users with the appropriate <literal>SET</literal>
         privilege can change this setting.
        </para>
diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c
index e9ef0fbfe32cb..96f29aafc391e 100644
--- a/src/backend/storage/lmgr/proc.c
+++ b/src/backend/storage/lmgr/proc.c
@@ -60,7 +60,7 @@ int			LockTimeout = 0;
 int			IdleInTransactionSessionTimeout = 0;
 int			TransactionTimeout = 0;
 int			IdleSessionTimeout = 0;
-bool		log_lock_waits = false;
+bool		log_lock_waits = true;
 
 /* Pointer to this process's PGPROC struct, if any */
 PGPROC	   *MyProc = NULL;
diff --git a/src/backend/utils/misc/guc_parameters.dat b/src/backend/utils/misc/guc_parameters.dat
index 0da01627cfec1..6bc6be13d2ad2 100644
--- a/src/backend/utils/misc/guc_parameters.dat
+++ b/src/backend/utils/misc/guc_parameters.dat
@@ -566,7 +566,7 @@
 { name => 'log_lock_waits', type => 'bool', context => 'PGC_SUSET', group => 'LOGGING_WHAT',
   short_desc => 'Logs long lock waits.',
   variable => 'log_lock_waits',
-  boot_val => 'false',
+  boot_val => 'true',
 },
 
 { name => 'log_lock_failures', type => 'bool', context => 'PGC_SUSET', group => 'LOGGING_WHAT',
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 26c0869356485..c36fcb9ab6105 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -624,7 +624,7 @@
 					#        processes
 					#   %% = '%'
 					# e.g. '<%u%%%d> '
-#log_lock_waits = off			# log lock waits >= deadlock_timeout
+#log_lock_waits = on			# log lock waits >= deadlock_timeout
 #log_lock_failures = off		# log lock failures
 #log_recovery_conflict_waits = off	# log standby recovery conflict waits
 					# >= deadlock_timeout

From 675ddc4d704f4cde0bc72244263a9efbb0d32cb8 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut <peter@eisentraut.org>
Date: Fri, 12 Sep 2025 08:13:05 +0200
Subject: [PATCH 60/73] Improve pgbench definition of yyscan_t

It was defining yyscan_t as a macro while the rest of the code uses a
typedef with #ifdef guards around it.  The latter is also what the
flex generated code uses.  So it seems best to make it look like those
other places for consistency.

The old way also had a potential for conflict if some code included
multiple headers providing yyscan_t.  exprscan.l includes

    #include "fe_utils/psqlscan_int.h"
    #include "pgbench.h"

and fe_utils/psqlscan_int.h contains

    #ifndef YY_TYPEDEF_YY_SCANNER_T
    #define YY_TYPEDEF_YY_SCANNER_T
    typedef void *yyscan_t;
    #endif

which was then followed by pgbench.h

    #define yyscan_t  void *

and then the generated code in exprscan.c

    #ifndef YY_TYPEDEF_YY_SCANNER_T
    #define YY_TYPEDEF_YY_SCANNER_T
    typedef void* yyscan_t;
    #endif

This works, but if the #ifdef guard in psqlscan_int.h is removed, this
fails.

We want to move toward allowing repeat typedefs, per C11, but for that
we need to make sure they are all the same.

Reviewed-by: Chao Li <li.evan.chao@gmail.com>
Discussion: https://www.postgresql.org/message-id/flat/10d32190-f31b-40a5-b177-11db55597355@eisentraut.org
---
 src/bin/pgbench/pgbench.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/bin/pgbench/pgbench.h b/src/bin/pgbench/pgbench.h
index e053c9e2eb63d..f8b7b497d1ee0 100644
--- a/src/bin/pgbench/pgbench.h
+++ b/src/bin/pgbench/pgbench.h
@@ -16,11 +16,14 @@
 /*
  * This file is included outside exprscan.l, in places where we can't see
  * flex's definition of typedef yyscan_t.  Fortunately, it's documented as
- * being "void *", so we can use a macro to keep the function declarations
+ * being "void *", so we can use typedef to keep the function declarations
  * here looking like the definitions in exprscan.l.  exprparse.y and
  * pgbench.c also use this to be able to declare things as "yyscan_t".
  */
-#define yyscan_t  void *
+#ifndef YY_TYPEDEF_YY_SCANNER_T
+#define YY_TYPEDEF_YY_SCANNER_T
+typedef void *yyscan_t;
+#endif
 
 /*
  * Likewise, we can't see exprparse.y's definition of union YYSTYPE here,

From ae0e1be9f2a20f6b64072dcee5b8dd7b9027a8fa Mon Sep 17 00:00:00 2001
From: Peter Eisentraut <peter@eisentraut.org>
Date: Fri, 12 Sep 2025 08:13:05 +0200
Subject: [PATCH 61/73] Allow redeclaration of typedef yyscan_t

This is allowed in C11, so we don't need the workaround guards against
it anymore.  This effectively reverts commit 382092a0cd2 that put
these guards in place.

Reviewed-by: Chao Li <li.evan.chao@gmail.com>
Discussion: https://www.postgresql.org/message-id/flat/10d32190-f31b-40a5-b177-11db55597355@eisentraut.org
---
 contrib/cube/cubedata.h                     | 3 ---
 contrib/seg/segdata.h                       | 3 ---
 src/backend/utils/adt/jsonpath_internal.h   | 3 ---
 src/bin/pgbench/pgbench.h                   | 3 ---
 src/include/bootstrap/bootstrap.h           | 3 ---
 src/include/fe_utils/psqlscan_int.h         | 6 ------
 src/include/replication/syncrep.h           | 3 ---
 src/include/replication/walsender_private.h | 3 ---
 src/pl/plpgsql/src/plpgsql.h                | 3 ---
 9 files changed, 30 deletions(-)

diff --git a/contrib/cube/cubedata.h b/contrib/cube/cubedata.h
index ad1e2bd699810..8bfcc6e99a27d 100644
--- a/contrib/cube/cubedata.h
+++ b/contrib/cube/cubedata.h
@@ -62,10 +62,7 @@ typedef struct NDBOX
 /* for cubescan.l and cubeparse.y */
 /* All grammar constructs return strings */
 #define YYSTYPE char *
-#ifndef YY_TYPEDEF_YY_SCANNER_T
-#define YY_TYPEDEF_YY_SCANNER_T
 typedef void *yyscan_t;
-#endif
 
 /* in cubescan.l */
 extern int	cube_yylex(YYSTYPE *yylval_param, yyscan_t yyscanner);
diff --git a/contrib/seg/segdata.h b/contrib/seg/segdata.h
index 4347c31c28e94..7bc7c83dca309 100644
--- a/contrib/seg/segdata.h
+++ b/contrib/seg/segdata.h
@@ -16,10 +16,7 @@ extern int	significant_digits(const char *s);
 
 /* for segscan.l and segparse.y */
 union YYSTYPE;
-#ifndef YY_TYPEDEF_YY_SCANNER_T
-#define YY_TYPEDEF_YY_SCANNER_T
 typedef void *yyscan_t;
-#endif
 
 /* in segscan.l */
 extern int	seg_yylex(union YYSTYPE *yylval_param, yyscan_t yyscanner);
diff --git a/src/backend/utils/adt/jsonpath_internal.h b/src/backend/utils/adt/jsonpath_internal.h
index f78069857d02b..19567aca6f775 100644
--- a/src/backend/utils/adt/jsonpath_internal.h
+++ b/src/backend/utils/adt/jsonpath_internal.h
@@ -22,10 +22,7 @@ typedef struct JsonPathString
 	int			total;
 } JsonPathString;
 
-#ifndef YY_TYPEDEF_YY_SCANNER_T
-#define YY_TYPEDEF_YY_SCANNER_T
 typedef void *yyscan_t;
-#endif
 
 #include "utils/jsonpath.h"
 #include "jsonpath_gram.h"
diff --git a/src/bin/pgbench/pgbench.h b/src/bin/pgbench/pgbench.h
index f8b7b497d1ee0..d55d30e0ef954 100644
--- a/src/bin/pgbench/pgbench.h
+++ b/src/bin/pgbench/pgbench.h
@@ -20,10 +20,7 @@
  * here looking like the definitions in exprscan.l.  exprparse.y and
  * pgbench.c also use this to be able to declare things as "yyscan_t".
  */
-#ifndef YY_TYPEDEF_YY_SCANNER_T
-#define YY_TYPEDEF_YY_SCANNER_T
 typedef void *yyscan_t;
-#endif
 
 /*
  * Likewise, we can't see exprparse.y's definition of union YYSTYPE here,
diff --git a/src/include/bootstrap/bootstrap.h b/src/include/bootstrap/bootstrap.h
index befc4fa1b3d87..5ad347ec290fa 100644
--- a/src/include/bootstrap/bootstrap.h
+++ b/src/include/bootstrap/bootstrap.h
@@ -56,10 +56,7 @@ extern void boot_get_type_io_data(Oid typid,
 								  Oid *typoutput);
 
 union YYSTYPE;
-#ifndef YY_TYPEDEF_YY_SCANNER_T
-#define YY_TYPEDEF_YY_SCANNER_T
 typedef void *yyscan_t;
-#endif
 
 extern int	boot_yyparse(yyscan_t yyscanner);
 extern int	boot_yylex_init(yyscan_t *yyscannerp);
diff --git a/src/include/fe_utils/psqlscan_int.h b/src/include/fe_utils/psqlscan_int.h
index 2a3a9d7c82aaa..a1ebf226cf499 100644
--- a/src/include/fe_utils/psqlscan_int.h
+++ b/src/include/fe_utils/psqlscan_int.h
@@ -51,14 +51,8 @@
  * validity checking; in actual use, this file should always be included
  * from the body of a flex file, where these symbols are already defined.
  */
-#ifndef YY_TYPEDEF_YY_BUFFER_STATE
-#define YY_TYPEDEF_YY_BUFFER_STATE
 typedef struct yy_buffer_state *YY_BUFFER_STATE;
-#endif
-#ifndef YY_TYPEDEF_YY_SCANNER_T
-#define YY_TYPEDEF_YY_SCANNER_T
 typedef void *yyscan_t;
-#endif
 
 /*
  * We use a stack of flex buffers to handle substitution of psql variables.
diff --git a/src/include/replication/syncrep.h b/src/include/replication/syncrep.h
index 675669a79f7d3..dc2b118b16629 100644
--- a/src/include/replication/syncrep.h
+++ b/src/include/replication/syncrep.h
@@ -97,10 +97,7 @@ extern void SyncRepUpdateSyncStandbysDefined(void);
  * in syncrep_gram.y and syncrep_scanner.l
  */
 union YYSTYPE;
-#ifndef YY_TYPEDEF_YY_SCANNER_T
-#define YY_TYPEDEF_YY_SCANNER_T
 typedef void *yyscan_t;
-#endif
 extern int	syncrep_yyparse(SyncRepConfigData **syncrep_parse_result_p, char **syncrep_parse_error_msg_p, yyscan_t yyscanner);
 extern int	syncrep_yylex(union YYSTYPE *yylval_param, char **syncrep_parse_error_msg_p, yyscan_t yyscanner);
 extern void syncrep_yyerror(SyncRepConfigData **syncrep_parse_result_p, char **syncrep_parse_error_msg_p, yyscan_t yyscanner, const char *str);
diff --git a/src/include/replication/walsender_private.h b/src/include/replication/walsender_private.h
index e98701038f506..384b8a78b9462 100644
--- a/src/include/replication/walsender_private.h
+++ b/src/include/replication/walsender_private.h
@@ -141,10 +141,7 @@ extern void WalSndSetState(WalSndState state);
  * repl_scanner.l
  */
 union YYSTYPE;
-#ifndef YY_TYPEDEF_YY_SCANNER_T
-#define YY_TYPEDEF_YY_SCANNER_T
 typedef void *yyscan_t;
-#endif
 extern int	replication_yyparse(Node **replication_parse_result_p, yyscan_t yyscanner);
 extern int	replication_yylex(union YYSTYPE *yylval_param, yyscan_t yyscanner);
 pg_noreturn extern void replication_yyerror(Node **replication_parse_result_p, yyscan_t yyscanner, const char *message);
diff --git a/src/pl/plpgsql/src/plpgsql.h b/src/pl/plpgsql/src/plpgsql.h
index 41e52b8ce7183..5f193a3718399 100644
--- a/src/pl/plpgsql/src/plpgsql.h
+++ b/src/pl/plpgsql/src/plpgsql.h
@@ -1307,10 +1307,7 @@ extern void plpgsql_dumptree(PLpgSQL_function *func);
  */
 union YYSTYPE;
 #define YYLTYPE int
-#ifndef YY_TYPEDEF_YY_SCANNER_T
-#define YY_TYPEDEF_YY_SCANNER_T
 typedef void *yyscan_t;
-#endif
 extern int	plpgsql_yylex(union YYSTYPE *yylvalp, YYLTYPE *yyllocp, yyscan_t yyscanner);
 extern int	plpgsql_token_length(yyscan_t yyscanner);
 extern void plpgsql_push_back_token(int token, union YYSTYPE *yylvalp, YYLTYPE *yyllocp, yyscan_t yyscanner);

From 20d541a200e9dfed8affef9e798ff35ca0f30b8e Mon Sep 17 00:00:00 2001
From: Andres Freund <andres@anarazel.de>
Date: Fri, 12 Sep 2025 10:18:31 -0400
Subject: [PATCH 62/73] ci: openbsd: Increase RAM disk's size

Its size was ~3.8GB before, which sometimes was not enough. OpenBSD CI task
often were failing due to no space left on device. Increase the RAM disk size
to ~4.6 GB.

Author: Nazir Bilal Yavuz <byavuz81@gmail.com>
Discussion: https://postgr.es/m/CAN55FZ2XVVPJRJmGB2DsL3gOrOinWh=HWvj6GO1cHzJ=6LwTag@mail.gmail.com
Backpatch-through: 18, where openbsd was added to CI
---
 src/tools/ci/gcp_ram_disk.sh | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/tools/ci/gcp_ram_disk.sh b/src/tools/ci/gcp_ram_disk.sh
index d48634512ac28..18dbb2037f5dc 100755
--- a/src/tools/ci/gcp_ram_disk.sh
+++ b/src/tools/ci/gcp_ram_disk.sh
@@ -15,7 +15,12 @@ case "`uname`" in
     umount /dev/sd0j # unused /usr/obj partition
     printf "m j\n\n\nswap\nw\nq\n" | disklabel -E sd0
     swapon /dev/sd0j
-    mount -t mfs -o rw,noatime,nodev,-s=8000000 swap $CIRRUS_WORKING_DIR
+    # Remove the per-process data segment limit so that mount_mfs can allocate
+    # large memory filesystems. Without this, mount_mfs mmap() may fail with
+    # "Cannot allocate memory" if the requested size exceeds the current
+    # datasize limit.
+    ulimit -d unlimited
+    mount -t mfs -o rw,noatime,nodev,-s=10000000 swap $CIRRUS_WORKING_DIR
     ;;
 esac
 

From 7dcea51c2a4dcf7c512bbd4f618d1d3620f9d3d7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81lvaro=20Herrera?= <alvherre@kurilemu.de>
Date: Fri, 12 Sep 2025 18:47:25 +0200
Subject: [PATCH 63/73] Avoid unexpected changes of CurrentResourceOwner and
 CurrentMemoryContext

Users of logical decoding can encounter an unexpected change of
CurrentResourceOwner and CurrentMemoryContext.  The problem is that,
unlike other call sites of RollbackAndReleaseCurrentSubTransaction(), in
reorderbuffer.c we fail to restore the original values of these global
variables after being clobbered by subtransaction abort.  This patch
saves the values prior to the call and restores them eventually.

In addition, logical.c and logicalfuncs.c had a hack to restore resource
owner, presumably because of lack of this restore.  Remove that.
Instead, because the test coverage here is not very consistent, add an
Assert() to ensure that the resowner is kept identical; this would make
it easy to detect other cases of bugs were we fail to restore resowner
properly.  This could be removed later.

This is arguably an old bug, but there appears to be no reason to
backpatch it and it's risky to do so, so refrain for now.

Author: Antonin Houska <ah@cybertec.at>
Reported-by: Mihail Nikalayeu <mihailnikalayeu@gmail.com>
Reviewed-by: Euler Taveira <euler@eulerto.com>
Discussion: https://postgr.es/m/119497.1756892972@localhost
---
 src/backend/replication/logical/logical.c     | 19 +++++++++++--------
 .../replication/logical/logicalfuncs.c        | 19 +++++++++++--------
 .../replication/logical/reorderbuffer.c       | 15 +++++++++++++++
 3 files changed, 37 insertions(+), 16 deletions(-)

diff --git a/src/backend/replication/logical/logical.c b/src/backend/replication/logical/logical.c
index 7e363a7c05b4f..c68c0481f427a 100644
--- a/src/backend/replication/logical/logical.c
+++ b/src/backend/replication/logical/logical.c
@@ -2082,7 +2082,7 @@ LogicalSlotAdvanceAndCheckSnapState(XLogRecPtr moveto,
 									bool *found_consistent_snapshot)
 {
 	LogicalDecodingContext *ctx;
-	ResourceOwner old_resowner = CurrentResourceOwner;
+	ResourceOwner old_resowner PG_USED_FOR_ASSERTS_ONLY = CurrentResourceOwner;
 	XLogRecPtr	retlsn;
 
 	Assert(moveto != InvalidXLogRecPtr);
@@ -2141,21 +2141,24 @@ LogicalSlotAdvanceAndCheckSnapState(XLogRecPtr moveto,
 			 * might still have critical updates to do.
 			 */
 			if (record)
+			{
 				LogicalDecodingProcessRecord(ctx, ctx->reader);
 
+				/*
+				 * We used to have bugs where logical decoding would fail to
+				 * preserve the resource owner.  That's important here, so
+				 * verify that that doesn't happen anymore.  XXX this could be
+				 * removed once it's been battle-tested.
+				 */
+				Assert(CurrentResourceOwner == old_resowner);
+			}
+
 			CHECK_FOR_INTERRUPTS();
 		}
 
 		if (found_consistent_snapshot && DecodingContextReady(ctx))
 			*found_consistent_snapshot = true;
 
-		/*
-		 * Logical decoding could have clobbered CurrentResourceOwner during
-		 * transaction management, so restore the executor's value.  (This is
-		 * a kluge, but it's not worth cleaning up right now.)
-		 */
-		CurrentResourceOwner = old_resowner;
-
 		if (ctx->reader->EndRecPtr != InvalidXLogRecPtr)
 		{
 			LogicalConfirmReceivedLocation(moveto);
diff --git a/src/backend/replication/logical/logicalfuncs.c b/src/backend/replication/logical/logicalfuncs.c
index ca53caac2f2f5..25f890ddeedac 100644
--- a/src/backend/replication/logical/logicalfuncs.c
+++ b/src/backend/replication/logical/logicalfuncs.c
@@ -107,7 +107,7 @@ pg_logical_slot_get_changes_guts(FunctionCallInfo fcinfo, bool confirm, bool bin
 	XLogRecPtr	end_of_wal;
 	XLogRecPtr	wait_for_wal_lsn;
 	LogicalDecodingContext *ctx;
-	ResourceOwner old_resowner = CurrentResourceOwner;
+	ResourceOwner old_resowner PG_USED_FOR_ASSERTS_ONLY = CurrentResourceOwner;
 	ArrayType  *arr;
 	Size		ndim;
 	List	   *options = NIL;
@@ -263,8 +263,18 @@ pg_logical_slot_get_changes_guts(FunctionCallInfo fcinfo, bool confirm, bool bin
 			 * store the description into our tuplestore.
 			 */
 			if (record != NULL)
+			{
 				LogicalDecodingProcessRecord(ctx, ctx->reader);
 
+				/*
+				 * We used to have bugs where logical decoding would fail to
+				 * preserve the resource owner.  Verify that that doesn't
+				 * happen anymore.  XXX this could be removed once it's been
+				 * battle-tested.
+				 */
+				Assert(CurrentResourceOwner == old_resowner);
+			}
+
 			/* check limits */
 			if (upto_lsn != InvalidXLogRecPtr &&
 				upto_lsn <= ctx->reader->EndRecPtr)
@@ -275,13 +285,6 @@ pg_logical_slot_get_changes_guts(FunctionCallInfo fcinfo, bool confirm, bool bin
 			CHECK_FOR_INTERRUPTS();
 		}
 
-		/*
-		 * Logical decoding could have clobbered CurrentResourceOwner during
-		 * transaction management, so restore the executor's value.  (This is
-		 * a kluge, but it's not worth cleaning up right now.)
-		 */
-		CurrentResourceOwner = old_resowner;
-
 		/*
 		 * Next time, start where we left off. (Hunting things, the family
 		 * business..)
diff --git a/src/backend/replication/logical/reorderbuffer.c b/src/backend/replication/logical/reorderbuffer.c
index 34cf05668ae84..4736f993c3743 100644
--- a/src/backend/replication/logical/reorderbuffer.c
+++ b/src/backend/replication/logical/reorderbuffer.c
@@ -2215,6 +2215,7 @@ ReorderBufferProcessTXN(ReorderBuffer *rb, ReorderBufferTXN *txn,
 {
 	bool		using_subtxn;
 	MemoryContext ccxt = CurrentMemoryContext;
+	ResourceOwner cowner = CurrentResourceOwner;
 	ReorderBufferIterTXNState *volatile iterstate = NULL;
 	volatile XLogRecPtr prev_lsn = InvalidXLogRecPtr;
 	ReorderBufferChange *volatile specinsert = NULL;
@@ -2692,7 +2693,11 @@ ReorderBufferProcessTXN(ReorderBuffer *rb, ReorderBufferTXN *txn,
 		}
 
 		if (using_subtxn)
+		{
 			RollbackAndReleaseCurrentSubTransaction();
+			MemoryContextSwitchTo(ccxt);
+			CurrentResourceOwner = cowner;
+		}
 
 		/*
 		 * We are here due to one of the four reasons: 1. Decoding an
@@ -2751,7 +2756,11 @@ ReorderBufferProcessTXN(ReorderBuffer *rb, ReorderBufferTXN *txn,
 		}
 
 		if (using_subtxn)
+		{
 			RollbackAndReleaseCurrentSubTransaction();
+			MemoryContextSwitchTo(ccxt);
+			CurrentResourceOwner = cowner;
+		}
 
 		/*
 		 * The error code ERRCODE_TRANSACTION_ROLLBACK indicates a concurrent
@@ -3244,6 +3253,8 @@ ReorderBufferImmediateInvalidation(ReorderBuffer *rb, uint32 ninvalidations,
 								   SharedInvalidationMessage *invalidations)
 {
 	bool		use_subtxn = IsTransactionOrTransactionBlock();
+	MemoryContext ccxt = CurrentMemoryContext;
+	ResourceOwner cowner = CurrentResourceOwner;
 	int			i;
 
 	if (use_subtxn)
@@ -3262,7 +3273,11 @@ ReorderBufferImmediateInvalidation(ReorderBuffer *rb, uint32 ninvalidations,
 		LocalExecuteInvalidationMessage(&invalidations[i]);
 
 	if (use_subtxn)
+	{
 		RollbackAndReleaseCurrentSubTransaction();
+		MemoryContextSwitchTo(ccxt);
+		CurrentResourceOwner = cowner;
+	}
 }
 
 /*

From 796962922e6938fdad4dbf810fb2a5dfcfc5f45a Mon Sep 17 00:00:00 2001
From: Peter Geoghegan <pg@bowt.ie>
Date: Fri, 12 Sep 2025 13:23:00 -0400
Subject: [PATCH 64/73] Always commute strategy when preprocessing DESC keys.

A recently added nbtree preprocessing step failed to account for the
fact that DESC columns already had their B-Tree strategy number commuted
at this point in preprocessing.  As a result, preprocessing could output
a set of scan keys where one or more keys had the correct strategy
number, but used the wrong comparison routine.

To fix, make the faulty code path that looks up a more restrictive
replacement operator/comparison routine commute its requested inequality
strategy (while outputting the transformed strategy number as before).
This makes the final transformed scan key comport with the approach
preprocessing has always used to deal with DESC columns (which is
described by comments above _bt_fix_scankey_strategy).

Oversight in commit commit b3f1a13f, which made nbtree preprocessing
perform transformations on skip array inequalities that can reduce the
total number of index searches.

Author: Peter Geoghegan <pg@bowt.ie>
Reported-By: Natalya Aksman <natalya@timescale.com>
Discussion: https://postgr.es/m/19049-b7df801e71de41b2@postgresql.org
Backpatch-through: 18
---
 src/backend/access/nbtree/nbtpreprocesskeys.c | 24 ++++++++++++++-----
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/src/backend/access/nbtree/nbtpreprocesskeys.c b/src/backend/access/nbtree/nbtpreprocesskeys.c
index 936b93f157a8b..07a3ff11a0b87 100644
--- a/src/backend/access/nbtree/nbtpreprocesskeys.c
+++ b/src/backend/access/nbtree/nbtpreprocesskeys.c
@@ -1412,6 +1412,7 @@ _bt_skiparray_strat_decrement(IndexScanDesc scan, ScanKey arraysk,
 	Datum		orig_sk_argument = high_compare->sk_argument,
 				new_sk_argument;
 	bool		uflow;
+	int16		lookupstrat;
 
 	Assert(high_compare->sk_strategy == BTLessStrategyNumber);
 
@@ -1433,9 +1434,14 @@ _bt_skiparray_strat_decrement(IndexScanDesc scan, ScanKey arraysk,
 		return;
 	}
 
-	/* Look up <= operator (might fail) */
-	leop = get_opfamily_member(opfamily, opcintype, opcintype,
-							   BTLessEqualStrategyNumber);
+	/*
+	 * Look up <= operator (might fail), accounting for the fact that a
+	 * high_compare on a DESC column already had its strategy commuted
+	 */
+	lookupstrat = BTLessEqualStrategyNumber;
+	if (high_compare->sk_flags & SK_BT_DESC)
+		lookupstrat = BTGreaterEqualStrategyNumber; /* commute this too */
+	leop = get_opfamily_member(opfamily, opcintype, opcintype, lookupstrat);
 	if (!OidIsValid(leop))
 		return;
 	cmp_proc = get_opcode(leop);
@@ -1464,6 +1470,7 @@ _bt_skiparray_strat_increment(IndexScanDesc scan, ScanKey arraysk,
 	Datum		orig_sk_argument = low_compare->sk_argument,
 				new_sk_argument;
 	bool		oflow;
+	int16		lookupstrat;
 
 	Assert(low_compare->sk_strategy == BTGreaterStrategyNumber);
 
@@ -1485,9 +1492,14 @@ _bt_skiparray_strat_increment(IndexScanDesc scan, ScanKey arraysk,
 		return;
 	}
 
-	/* Look up >= operator (might fail) */
-	geop = get_opfamily_member(opfamily, opcintype, opcintype,
-							   BTGreaterEqualStrategyNumber);
+	/*
+	 * Look up >= operator (might fail), accounting for the fact that a
+	 * low_compare on a DESC column already had its strategy commuted
+	 */
+	lookupstrat = BTGreaterEqualStrategyNumber;
+	if (low_compare->sk_flags & SK_BT_DESC)
+		lookupstrat = BTLessEqualStrategyNumber; /* commute this too */
+	geop = get_opfamily_member(opfamily, opcintype, opcintype, lookupstrat);
 	if (!OidIsValid(geop))
 		return;
 	cmp_proc = get_opcode(geop);

From 4adb0380b9bff5ec6424a9e87f76f8974b025367 Mon Sep 17 00:00:00 2001
From: Noah Misch <noah@leadboat.com>
Date: Fri, 12 Sep 2025 12:44:14 -0700
Subject: [PATCH 65/73] Replace tests of ALTER DATABASE RESET TABLESPACE.

This unblocks rejection of that syntax.  One copy was a misspelling of
"SET TABLESPACE pg_default" that instead made no persistent changes.
The other copy just needed to populate a DATABASEOID syscache entry.
This slightly raises database.sql test coverage of catcache.c, while
dbcommands.c coverage remains the same.

Reported-by: Tom Lane <tgl@sss.pgh.pa.us>
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://postgr.es/m/1802710.1757608564@sss.pgh.pa.us
---
 src/test/regress/expected/database.out | 4 ++--
 src/test/regress/sql/database.sql      | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/test/regress/expected/database.out b/src/test/regress/expected/database.out
index 4cbdbdf84d0c5..6b879b0f62a75 100644
--- a/src/test/regress/expected/database.out
+++ b/src/test/regress/expected/database.out
@@ -2,7 +2,7 @@ CREATE DATABASE regression_tbd
 	ENCODING utf8 LC_COLLATE "C" LC_CTYPE "C" TEMPLATE template0;
 ALTER DATABASE regression_tbd RENAME TO regression_utf8;
 ALTER DATABASE regression_utf8 SET TABLESPACE regress_tblspace;
-ALTER DATABASE regression_utf8 RESET TABLESPACE;
+ALTER DATABASE regression_utf8 SET TABLESPACE pg_default;
 ALTER DATABASE regression_utf8 CONNECTION_LIMIT 123;
 -- Test PgDatabaseToastTable.  Doing this with GRANT would be slow.
 BEGIN;
@@ -10,7 +10,7 @@ UPDATE pg_database
 SET datacl = array_fill(makeaclitem(10, 10, 'USAGE', false), ARRAY[5e5::int])
 WHERE datname = 'regression_utf8';
 -- load catcache entry, if nothing else does
-ALTER DATABASE regression_utf8 RESET TABLESPACE;
+ALTER DATABASE regression_utf8 RENAME TO regression_rename_rolled_back;
 ROLLBACK;
 CREATE ROLE regress_datdba_before;
 CREATE ROLE regress_datdba_after;
diff --git a/src/test/regress/sql/database.sql b/src/test/regress/sql/database.sql
index 46ad2634781ea..4ef361272911e 100644
--- a/src/test/regress/sql/database.sql
+++ b/src/test/regress/sql/database.sql
@@ -2,7 +2,7 @@ CREATE DATABASE regression_tbd
 	ENCODING utf8 LC_COLLATE "C" LC_CTYPE "C" TEMPLATE template0;
 ALTER DATABASE regression_tbd RENAME TO regression_utf8;
 ALTER DATABASE regression_utf8 SET TABLESPACE regress_tblspace;
-ALTER DATABASE regression_utf8 RESET TABLESPACE;
+ALTER DATABASE regression_utf8 SET TABLESPACE pg_default;
 ALTER DATABASE regression_utf8 CONNECTION_LIMIT 123;
 
 -- Test PgDatabaseToastTable.  Doing this with GRANT would be slow.
@@ -11,7 +11,7 @@ UPDATE pg_database
 SET datacl = array_fill(makeaclitem(10, 10, 'USAGE', false), ARRAY[5e5::int])
 WHERE datname = 'regression_utf8';
 -- load catcache entry, if nothing else does
-ALTER DATABASE regression_utf8 RESET TABLESPACE;
+ALTER DATABASE regression_utf8 RENAME TO regression_rename_rolled_back;
 ROLLBACK;
 
 CREATE ROLE regress_datdba_before;

From f14ea34d6e563374cd71b4e7b91cf8d2f60aabb3 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Fri, 12 Sep 2025 17:43:15 -0400
Subject: [PATCH 66/73] Fix oversights in pg_event_trigger_dropped_objects()
 fixes.

Commit a0b99fc12 caused pg_event_trigger_dropped_objects()
to not fill the object_name field for schemas, which it
should have; and caused it to fill the object_name field
for default values, which it should not have.

In addition, triggers and RLS policies really should behave
the same way as we're making column defaults do; that is,
they should have is_temporary = true if they belong to a
temporary table.

Fix those things, and upgrade event_trigger.sql's woefully
inadequate test coverage of these secondary output columns.

As before, back-patch only to v15.

Reported-by: Sergey Shinderuk <s.shinderuk@postgrespro.ru>
Author: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://postgr.es/m/bd7b4651-1c26-4d30-832b-f942fabcb145@postgrespro.ru
Backpatch-through: 15
---
 src/backend/commands/event_trigger.c        |  87 ++++++++-
 src/test/regress/expected/event_trigger.out | 188 ++++++++++++--------
 src/test/regress/sql/event_trigger.sql      |  55 ++++--
 3 files changed, 244 insertions(+), 86 deletions(-)

diff --git a/src/backend/commands/event_trigger.c b/src/backend/commands/event_trigger.c
index fcdcba009d4e3..f34868da5ab94 100644
--- a/src/backend/commands/event_trigger.c
+++ b/src/backend/commands/event_trigger.c
@@ -30,6 +30,7 @@
 #include "catalog/pg_opclass.h"
 #include "catalog/pg_opfamily.h"
 #include "catalog/pg_parameter_acl.h"
+#include "catalog/pg_policy.h"
 #include "catalog/pg_proc.h"
 #include "catalog/pg_tablespace.h"
 #include "catalog/pg_trigger.h"
@@ -1302,6 +1303,7 @@ EventTriggerSQLDropAddObject(const ObjectAddress *object, bool original, bool no
 			MemoryContextSwitchTo(oldcxt);
 			return;
 		}
+		obj->objname = get_namespace_name(object->objectId);
 	}
 	else if (object->classId == AttrDefaultRelationId)
 	{
@@ -1311,7 +1313,6 @@ EventTriggerSQLDropAddObject(const ObjectAddress *object, bool original, bool no
 		colobject = GetAttrDefaultColumnAddress(object->objectId);
 		if (OidIsValid(colobject.objectId))
 		{
-			colobject.objectSubId = 0;	/* convert to table reference */
 			if (!obtain_object_name_namespace(&colobject, obj))
 			{
 				pfree(obj);
@@ -1320,6 +1321,90 @@ EventTriggerSQLDropAddObject(const ObjectAddress *object, bool original, bool no
 			}
 		}
 	}
+	else if (object->classId == TriggerRelationId)
+	{
+		/* Similarly, a trigger is temp if its table is temp */
+		/* Sadly, there's no lsyscache.c support for trigger objects */
+		Relation	pg_trigger_rel;
+		ScanKeyData skey[1];
+		SysScanDesc sscan;
+		HeapTuple	tuple;
+		Oid			relid;
+
+		/* Fetch the trigger's table OID the hard way */
+		pg_trigger_rel = table_open(TriggerRelationId, AccessShareLock);
+		ScanKeyInit(&skey[0],
+					Anum_pg_trigger_oid,
+					BTEqualStrategyNumber, F_OIDEQ,
+					ObjectIdGetDatum(object->objectId));
+		sscan = systable_beginscan(pg_trigger_rel, TriggerOidIndexId, true,
+								   NULL, 1, skey);
+		tuple = systable_getnext(sscan);
+		if (HeapTupleIsValid(tuple))
+			relid = ((Form_pg_trigger) GETSTRUCT(tuple))->tgrelid;
+		else
+			relid = InvalidOid; /* shouldn't happen */
+		systable_endscan(sscan);
+		table_close(pg_trigger_rel, AccessShareLock);
+		/* Do nothing if we didn't find the trigger */
+		if (OidIsValid(relid))
+		{
+			ObjectAddress relobject;
+
+			relobject.classId = RelationRelationId;
+			relobject.objectId = relid;
+			/* Arbitrarily set objectSubId nonzero so as not to fill objname */
+			relobject.objectSubId = 1;
+			if (!obtain_object_name_namespace(&relobject, obj))
+			{
+				pfree(obj);
+				MemoryContextSwitchTo(oldcxt);
+				return;
+			}
+		}
+	}
+	else if (object->classId == PolicyRelationId)
+	{
+		/* Similarly, a policy is temp if its table is temp */
+		/* Sadly, there's no lsyscache.c support for policy objects */
+		Relation	pg_policy_rel;
+		ScanKeyData skey[1];
+		SysScanDesc sscan;
+		HeapTuple	tuple;
+		Oid			relid;
+
+		/* Fetch the policy's table OID the hard way */
+		pg_policy_rel = table_open(PolicyRelationId, AccessShareLock);
+		ScanKeyInit(&skey[0],
+					Anum_pg_policy_oid,
+					BTEqualStrategyNumber, F_OIDEQ,
+					ObjectIdGetDatum(object->objectId));
+		sscan = systable_beginscan(pg_policy_rel, PolicyOidIndexId, true,
+								   NULL, 1, skey);
+		tuple = systable_getnext(sscan);
+		if (HeapTupleIsValid(tuple))
+			relid = ((Form_pg_policy) GETSTRUCT(tuple))->polrelid;
+		else
+			relid = InvalidOid; /* shouldn't happen */
+		systable_endscan(sscan);
+		table_close(pg_policy_rel, AccessShareLock);
+		/* Do nothing if we didn't find the policy */
+		if (OidIsValid(relid))
+		{
+			ObjectAddress relobject;
+
+			relobject.classId = RelationRelationId;
+			relobject.objectId = relid;
+			/* Arbitrarily set objectSubId nonzero so as not to fill objname */
+			relobject.objectSubId = 1;
+			if (!obtain_object_name_namespace(&relobject, obj))
+			{
+				pfree(obj);
+				MemoryContextSwitchTo(oldcxt);
+				return;
+			}
+		}
+	}
 	else
 	{
 		/* Generic handling for all other object classes */
diff --git a/src/test/regress/expected/event_trigger.out b/src/test/regress/expected/event_trigger.out
index 0e090cbc37500..16e4530708cc9 100644
--- a/src/test/regress/expected/event_trigger.out
+++ b/src/test/regress/expected/event_trigger.out
@@ -228,9 +228,15 @@ INSERT INTO undroppable_objs VALUES
 ('table', 'schema_one.table_three'),
 ('table', 'audit_tbls.schema_two_table_three');
 CREATE TABLE dropped_objects (
-	type text,
-	schema text,
-	object text
+	object_type text,
+	schema_name text,
+	object_name text,
+	object_identity text,
+	address_names text[],
+	address_args text[],
+	is_temporary bool,
+	original bool,
+	normal bool
 );
 -- This tests errors raised within event triggers; the one in audit_tbls
 -- uses 2nd-level recursive invocation via test_evtrig_dropped_objects().
@@ -268,8 +274,12 @@ BEGIN
         END IF;
 
 	INSERT INTO dropped_objects
-		(type, schema, object) VALUES
-		(obj.object_type, obj.schema_name, obj.object_identity);
+		(object_type, schema_name, object_name,
+		 object_identity, address_names, address_args,
+		 is_temporary, original, normal) VALUES
+		(obj.object_type, obj.schema_name, obj.object_name,
+		 obj.object_identity, obj.address_names, obj.address_args,
+		 obj.is_temporary, obj.original, obj.normal);
     END LOOP;
 END
 $$;
@@ -325,42 +335,44 @@ NOTICE:  table "audit_tbls_schema_two_table_three" does not exist, skipping
 NOTICE:  table "schema_one_table_one" does not exist, skipping
 NOTICE:  table "schema_one_table two" does not exist, skipping
 NOTICE:  table "schema_one_table_three" does not exist, skipping
-SELECT * FROM dropped_objects WHERE schema IS NULL OR schema <> 'pg_toast';
-     type     |   schema   |               object                
---------------+------------+-------------------------------------
- table column | schema_one | schema_one.table_one.a
- schema       |            | schema_two
- table        | schema_two | schema_two.table_two
- type         | schema_two | schema_two.table_two
- type         | schema_two | schema_two.table_two[]
- table        | audit_tbls | audit_tbls.schema_two_table_three
- type         | audit_tbls | audit_tbls.schema_two_table_three
- type         | audit_tbls | audit_tbls.schema_two_table_three[]
- table        | schema_two | schema_two.table_three
- type         | schema_two | schema_two.table_three
- type         | schema_two | schema_two.table_three[]
- function     | schema_two | schema_two.add(integer,integer)
- aggregate    | schema_two | schema_two.newton(integer)
- schema       |            | schema_one
- table        | schema_one | schema_one.table_one
- type         | schema_one | schema_one.table_one
- type         | schema_one | schema_one.table_one[]
- table        | schema_one | schema_one."table two"
- type         | schema_one | schema_one."table two"
- type         | schema_one | schema_one."table two"[]
- table        | schema_one | schema_one.table_three
- type         | schema_one | schema_one.table_three
- type         | schema_one | schema_one.table_three[]
+-- exclude TOAST objects because they have unstable names
+SELECT * FROM dropped_objects
+  WHERE schema_name IS NULL OR schema_name <> 'pg_toast';
+ object_type  | schema_name |       object_name       |           object_identity           |             address_names             |   address_args    | is_temporary | original | normal 
+--------------+-------------+-------------------------+-------------------------------------+---------------------------------------+-------------------+--------------+----------+--------
+ table column | schema_one  |                         | schema_one.table_one.a              | {schema_one,table_one,a}              | {}                | f            | t        | f
+ schema       |             | schema_two              | schema_two                          | {schema_two}                          | {}                | f            | t        | f
+ table        | schema_two  | table_two               | schema_two.table_two                | {schema_two,table_two}                | {}                | f            | f        | t
+ type         | schema_two  | table_two               | schema_two.table_two                | {schema_two.table_two}                | {}                | f            | f        | f
+ type         | schema_two  | _table_two              | schema_two.table_two[]              | {schema_two.table_two[]}              | {}                | f            | f        | f
+ table        | audit_tbls  | schema_two_table_three  | audit_tbls.schema_two_table_three   | {audit_tbls,schema_two_table_three}   | {}                | f            | t        | f
+ type         | audit_tbls  | schema_two_table_three  | audit_tbls.schema_two_table_three   | {audit_tbls.schema_two_table_three}   | {}                | f            | f        | f
+ type         | audit_tbls  | _schema_two_table_three | audit_tbls.schema_two_table_three[] | {audit_tbls.schema_two_table_three[]} | {}                | f            | f        | f
+ table        | schema_two  | table_three             | schema_two.table_three              | {schema_two,table_three}              | {}                | f            | f        | t
+ type         | schema_two  | table_three             | schema_two.table_three              | {schema_two.table_three}              | {}                | f            | f        | f
+ type         | schema_two  | _table_three            | schema_two.table_three[]            | {schema_two.table_three[]}            | {}                | f            | f        | f
+ function     | schema_two  |                         | schema_two.add(integer,integer)     | {schema_two,add}                      | {integer,integer} | f            | f        | t
+ aggregate    | schema_two  |                         | schema_two.newton(integer)          | {schema_two,newton}                   | {integer}         | f            | f        | t
+ schema       |             | schema_one              | schema_one                          | {schema_one}                          | {}                | f            | t        | f
+ table        | schema_one  | table_one               | schema_one.table_one                | {schema_one,table_one}                | {}                | f            | f        | t
+ type         | schema_one  | table_one               | schema_one.table_one                | {schema_one.table_one}                | {}                | f            | f        | f
+ type         | schema_one  | _table_one              | schema_one.table_one[]              | {schema_one.table_one[]}              | {}                | f            | f        | f
+ table        | schema_one  | table two               | schema_one."table two"              | {schema_one,"table two"}              | {}                | f            | f        | t
+ type         | schema_one  | table two               | schema_one."table two"              | {"schema_one.\"table two\""}          | {}                | f            | f        | f
+ type         | schema_one  | _table two              | schema_one."table two"[]            | {"schema_one.\"table two\"[]"}        | {}                | f            | f        | f
+ table        | schema_one  | table_three             | schema_one.table_three              | {schema_one,table_three}              | {}                | f            | f        | t
+ type         | schema_one  | table_three             | schema_one.table_three              | {schema_one.table_three}              | {}                | f            | f        | f
+ type         | schema_one  | _table_three            | schema_one.table_three[]            | {schema_one.table_three[]}            | {}                | f            | f        | f
 (23 rows)
 
 DROP OWNED BY regress_evt_user;
 NOTICE:  schema "audit_tbls" does not exist, skipping
-SELECT * FROM dropped_objects WHERE type = 'schema';
-  type  | schema |   object   
---------+--------+------------
- schema |        | schema_two
- schema |        | schema_one
- schema |        | audit_tbls
+SELECT * FROM dropped_objects WHERE object_type = 'schema';
+ object_type | schema_name | object_name | object_identity | address_names | address_args | is_temporary | original | normal 
+-------------+-------------+-------------+-----------------+---------------+--------------+--------------+----------+--------
+ schema      |             | schema_two  | schema_two      | {schema_two}  | {}           | f            | t        | f
+ schema      |             | schema_one  | schema_one      | {schema_one}  | {}           | f            | t        | f
+ schema      |             | audit_tbls  | audit_tbls      | {audit_tbls}  | {}           | f            | t        | f
 (3 rows)
 
 DROP ROLE regress_evt_user;
@@ -378,9 +390,10 @@ BEGIN
     IF NOT r.normal AND NOT r.original THEN
         CONTINUE;
     END IF;
-    RAISE NOTICE 'NORMAL: orig=% normal=% istemp=% type=% identity=% name=% args=%',
+    RAISE NOTICE 'NORMAL: orig=% normal=% istemp=% type=% identity=% schema=% name=% addr=% args=%',
         r.original, r.normal, r.is_temporary, r.object_type,
-        r.object_identity, r.address_names, r.address_args;
+        r.object_identity, r.schema_name, r.object_name,
+        r.address_names, r.address_args;
     END LOOP;
 END; $$;
 CREATE EVENT TRIGGER regress_event_trigger_report_dropped ON sql_drop
@@ -436,18 +449,18 @@ CREATE TABLE evttrig.part_15_20 PARTITION OF evttrig.part_10_20 (id)
   FOR VALUES FROM (15) TO (20);
 NOTICE:  END: command_tag=CREATE TABLE type=table identity=evttrig.part_15_20
 ALTER TABLE evttrig.two DROP COLUMN col_c;
-NOTICE:  NORMAL: orig=t normal=f istemp=f type=table column identity=evttrig.two.col_c name={evttrig,two,col_c} args={}
-NOTICE:  NORMAL: orig=f normal=t istemp=f type=table constraint identity=two_col_c_check on evttrig.two name={evttrig,two,two_col_c_check} args={}
+NOTICE:  NORMAL: orig=t normal=f istemp=f type=table column identity=evttrig.two.col_c schema=evttrig name=<NULL> addr={evttrig,two,col_c} args={}
+NOTICE:  NORMAL: orig=f normal=t istemp=f type=table constraint identity=two_col_c_check on evttrig.two schema=evttrig name=<NULL> addr={evttrig,two,two_col_c_check} args={}
 NOTICE:  END: command_tag=ALTER TABLE type=table identity=evttrig.two
 ALTER TABLE evttrig.one ALTER COLUMN col_b DROP DEFAULT;
-NOTICE:  NORMAL: orig=t normal=f istemp=f type=default value identity=for evttrig.one.col_b name={evttrig,one,col_b} args={}
+NOTICE:  NORMAL: orig=t normal=f istemp=f type=default value identity=for evttrig.one.col_b schema=evttrig name=<NULL> addr={evttrig,one,col_b} args={}
 NOTICE:  END: command_tag=ALTER TABLE type=table identity=evttrig.one
 ALTER TABLE evttrig.one DROP CONSTRAINT one_pkey;
-NOTICE:  NORMAL: orig=t normal=f istemp=f type=table constraint identity=one_pkey on evttrig.one name={evttrig,one,one_pkey} args={}
+NOTICE:  NORMAL: orig=t normal=f istemp=f type=table constraint identity=one_pkey on evttrig.one schema=evttrig name=<NULL> addr={evttrig,one,one_pkey} args={}
 NOTICE:  END: command_tag=ALTER TABLE type=table identity=evttrig.one
 ALTER TABLE evttrig.one DROP COLUMN col_c;
-NOTICE:  NORMAL: orig=t normal=f istemp=f type=table column identity=evttrig.one.col_c name={evttrig,one,col_c} args={}
-NOTICE:  NORMAL: orig=f normal=t istemp=f type=default value identity=for evttrig.one.col_c name={evttrig,one,col_c} args={}
+NOTICE:  NORMAL: orig=t normal=f istemp=f type=table column identity=evttrig.one.col_c schema=evttrig name=<NULL> addr={evttrig,one,col_c} args={}
+NOTICE:  NORMAL: orig=f normal=t istemp=f type=default value identity=for evttrig.one.col_c schema=evttrig name=<NULL> addr={evttrig,one,col_c} args={}
 NOTICE:  END: command_tag=ALTER TABLE type=table identity=evttrig.one
 ALTER TABLE evttrig.id ALTER COLUMN col_d SET DATA TYPE bigint;
 NOTICE:  END: command_tag=ALTER SEQUENCE type=sequence identity=evttrig.id_col_d_seq
@@ -456,26 +469,26 @@ ALTER TABLE evttrig.id ALTER COLUMN col_d DROP IDENTITY,
   ALTER COLUMN col_d SET DATA TYPE int;
 NOTICE:  END: command_tag=ALTER TABLE type=table identity=evttrig.id
 DROP INDEX evttrig.one_idx;
-NOTICE:  NORMAL: orig=t normal=f istemp=f type=index identity=evttrig.one_idx name={evttrig,one_idx} args={}
+NOTICE:  NORMAL: orig=t normal=f istemp=f type=index identity=evttrig.one_idx schema=evttrig name=one_idx addr={evttrig,one_idx} args={}
 DROP SCHEMA evttrig CASCADE;
 NOTICE:  drop cascades to 4 other objects
 DETAIL:  drop cascades to table evttrig.one
 drop cascades to table evttrig.two
 drop cascades to table evttrig.id
 drop cascades to table evttrig.parted
-NOTICE:  NORMAL: orig=t normal=f istemp=f type=schema identity=evttrig name={evttrig} args={}
-NOTICE:  NORMAL: orig=f normal=t istemp=f type=table identity=evttrig.one name={evttrig,one} args={}
-NOTICE:  NORMAL: orig=f normal=t istemp=f type=sequence identity=evttrig.one_col_a_seq name={evttrig,one_col_a_seq} args={}
-NOTICE:  NORMAL: orig=f normal=t istemp=f type=default value identity=for evttrig.one.col_a name={evttrig,one,col_a} args={}
-NOTICE:  NORMAL: orig=f normal=t istemp=f type=table identity=evttrig.two name={evttrig,two} args={}
-NOTICE:  NORMAL: orig=f normal=t istemp=f type=table identity=evttrig.id name={evttrig,id} args={}
-NOTICE:  NORMAL: orig=f normal=t istemp=f type=table identity=evttrig.parted name={evttrig,parted} args={}
-NOTICE:  NORMAL: orig=f normal=t istemp=f type=table identity=evttrig.part_1_10 name={evttrig,part_1_10} args={}
-NOTICE:  NORMAL: orig=f normal=t istemp=f type=table identity=evttrig.part_10_20 name={evttrig,part_10_20} args={}
-NOTICE:  NORMAL: orig=f normal=t istemp=f type=table identity=evttrig.part_10_15 name={evttrig,part_10_15} args={}
-NOTICE:  NORMAL: orig=f normal=t istemp=f type=table identity=evttrig.part_15_20 name={evttrig,part_15_20} args={}
+NOTICE:  NORMAL: orig=t normal=f istemp=f type=schema identity=evttrig schema=<NULL> name=evttrig addr={evttrig} args={}
+NOTICE:  NORMAL: orig=f normal=t istemp=f type=table identity=evttrig.one schema=evttrig name=one addr={evttrig,one} args={}
+NOTICE:  NORMAL: orig=f normal=t istemp=f type=sequence identity=evttrig.one_col_a_seq schema=evttrig name=one_col_a_seq addr={evttrig,one_col_a_seq} args={}
+NOTICE:  NORMAL: orig=f normal=t istemp=f type=default value identity=for evttrig.one.col_a schema=evttrig name=<NULL> addr={evttrig,one,col_a} args={}
+NOTICE:  NORMAL: orig=f normal=t istemp=f type=table identity=evttrig.two schema=evttrig name=two addr={evttrig,two} args={}
+NOTICE:  NORMAL: orig=f normal=t istemp=f type=table identity=evttrig.id schema=evttrig name=id addr={evttrig,id} args={}
+NOTICE:  NORMAL: orig=f normal=t istemp=f type=table identity=evttrig.parted schema=evttrig name=parted addr={evttrig,parted} args={}
+NOTICE:  NORMAL: orig=f normal=t istemp=f type=table identity=evttrig.part_1_10 schema=evttrig name=part_1_10 addr={evttrig,part_1_10} args={}
+NOTICE:  NORMAL: orig=f normal=t istemp=f type=table identity=evttrig.part_10_20 schema=evttrig name=part_10_20 addr={evttrig,part_10_20} args={}
+NOTICE:  NORMAL: orig=f normal=t istemp=f type=table identity=evttrig.part_10_15 schema=evttrig name=part_10_15 addr={evttrig,part_10_15} args={}
+NOTICE:  NORMAL: orig=f normal=t istemp=f type=table identity=evttrig.part_15_20 schema=evttrig name=part_15_20 addr={evttrig,part_15_20} args={}
 DROP TABLE a_temp_tbl;
-NOTICE:  NORMAL: orig=t normal=f istemp=t type=table identity=pg_temp.a_temp_tbl name={pg_temp,a_temp_tbl} args={}
+NOTICE:  NORMAL: orig=t normal=f istemp=t type=table identity=pg_temp.a_temp_tbl schema=pg_temp name=a_temp_tbl addr={pg_temp,a_temp_tbl} args={}
 -- check unfiltered results, too
 CREATE OR REPLACE FUNCTION event_trigger_report_dropped()
  RETURNS event_trigger
@@ -485,34 +498,61 @@ DECLARE r record;
 BEGIN
     FOR r IN SELECT * from pg_event_trigger_dropped_objects()
     LOOP
-    RAISE NOTICE 'DROP: orig=% normal=% istemp=% type=% identity=% name=% args=%',
+    RAISE NOTICE 'DROP: orig=% normal=% istemp=% type=% identity=% schema=% name=% addr=% args=%',
         r.original, r.normal, r.is_temporary, r.object_type,
-        r.object_identity, r.address_names, r.address_args;
+        r.object_identity, r.schema_name, r.object_name,
+        r.address_names, r.address_args;
     END LOOP;
 END; $$;
 NOTICE:  END: command_tag=CREATE FUNCTION type=function identity=public.event_trigger_report_dropped()
+CREATE FUNCTION event_trigger_dummy_trigger()
+ RETURNS trigger
+ LANGUAGE plpgsql
+AS $$
+BEGIN
+    RETURN new;
+END; $$;
+NOTICE:  END: command_tag=CREATE FUNCTION type=function identity=public.event_trigger_dummy_trigger()
 CREATE TABLE evtrg_nontemp_table (f1 int primary key, f2 int default 42);
 NOTICE:  END: command_tag=CREATE TABLE type=table identity=public.evtrg_nontemp_table
 NOTICE:  END: command_tag=CREATE INDEX type=index identity=public.evtrg_nontemp_table_pkey
+CREATE TRIGGER evtrg_nontemp_trig
+  BEFORE INSERT ON evtrg_nontemp_table
+  EXECUTE FUNCTION event_trigger_dummy_trigger();
+NOTICE:  END: command_tag=CREATE TRIGGER type=trigger identity=evtrg_nontemp_trig on public.evtrg_nontemp_table
+CREATE POLICY evtrg_nontemp_pol ON evtrg_nontemp_table USING (f2 > 0);
+NOTICE:  END: command_tag=CREATE POLICY type=policy identity=evtrg_nontemp_pol on public.evtrg_nontemp_table
 DROP TABLE evtrg_nontemp_table;
-NOTICE:  DROP: orig=t normal=f istemp=f type=table identity=public.evtrg_nontemp_table name={public,evtrg_nontemp_table} args={}
-NOTICE:  DROP: orig=f normal=f istemp=f type=type identity=public.evtrg_nontemp_table name={public.evtrg_nontemp_table} args={}
-NOTICE:  DROP: orig=f normal=f istemp=f type=type identity=public.evtrg_nontemp_table[] name={public.evtrg_nontemp_table[]} args={}
-NOTICE:  DROP: orig=f normal=f istemp=f type=default value identity=for public.evtrg_nontemp_table.f2 name={public,evtrg_nontemp_table,f2} args={}
-NOTICE:  DROP: orig=f normal=f istemp=f type=table constraint identity=evtrg_nontemp_table_f1_not_null on public.evtrg_nontemp_table name={public,evtrg_nontemp_table,evtrg_nontemp_table_f1_not_null} args={}
-NOTICE:  DROP: orig=f normal=f istemp=f type=table constraint identity=evtrg_nontemp_table_pkey on public.evtrg_nontemp_table name={public,evtrg_nontemp_table,evtrg_nontemp_table_pkey} args={}
-NOTICE:  DROP: orig=f normal=f istemp=f type=index identity=public.evtrg_nontemp_table_pkey name={public,evtrg_nontemp_table_pkey} args={}
+NOTICE:  DROP: orig=t normal=f istemp=f type=table identity=public.evtrg_nontemp_table schema=public name=evtrg_nontemp_table addr={public,evtrg_nontemp_table} args={}
+NOTICE:  DROP: orig=f normal=f istemp=f type=type identity=public.evtrg_nontemp_table schema=public name=evtrg_nontemp_table addr={public.evtrg_nontemp_table} args={}
+NOTICE:  DROP: orig=f normal=f istemp=f type=type identity=public.evtrg_nontemp_table[] schema=public name=_evtrg_nontemp_table addr={public.evtrg_nontemp_table[]} args={}
+NOTICE:  DROP: orig=f normal=f istemp=f type=default value identity=for public.evtrg_nontemp_table.f2 schema=public name=<NULL> addr={public,evtrg_nontemp_table,f2} args={}
+NOTICE:  DROP: orig=f normal=f istemp=f type=table constraint identity=evtrg_nontemp_table_f1_not_null on public.evtrg_nontemp_table schema=public name=<NULL> addr={public,evtrg_nontemp_table,evtrg_nontemp_table_f1_not_null} args={}
+NOTICE:  DROP: orig=f normal=f istemp=f type=table constraint identity=evtrg_nontemp_table_pkey on public.evtrg_nontemp_table schema=public name=<NULL> addr={public,evtrg_nontemp_table,evtrg_nontemp_table_pkey} args={}
+NOTICE:  DROP: orig=f normal=f istemp=f type=index identity=public.evtrg_nontemp_table_pkey schema=public name=evtrg_nontemp_table_pkey addr={public,evtrg_nontemp_table_pkey} args={}
+NOTICE:  DROP: orig=f normal=f istemp=f type=trigger identity=evtrg_nontemp_trig on public.evtrg_nontemp_table schema=public name=<NULL> addr={public,evtrg_nontemp_table,evtrg_nontemp_trig} args={}
+NOTICE:  DROP: orig=f normal=t istemp=f type=policy identity=evtrg_nontemp_pol on public.evtrg_nontemp_table schema=public name=<NULL> addr={public,evtrg_nontemp_table,evtrg_nontemp_pol} args={}
 CREATE TEMP TABLE a_temp_tbl (f1 int primary key, f2 int default 42);
 NOTICE:  END: command_tag=CREATE TABLE type=table identity=pg_temp.a_temp_tbl
 NOTICE:  END: command_tag=CREATE INDEX type=index identity=pg_temp.a_temp_tbl_pkey
+CREATE TRIGGER a_temp_trig
+  BEFORE INSERT ON a_temp_tbl
+  EXECUTE FUNCTION event_trigger_dummy_trigger();
+NOTICE:  END: command_tag=CREATE TRIGGER type=trigger identity=a_temp_trig on pg_temp.a_temp_tbl
+CREATE POLICY a_temp_pol ON a_temp_tbl USING (f2 > 0);
+NOTICE:  END: command_tag=CREATE POLICY type=policy identity=a_temp_pol on pg_temp.a_temp_tbl
 DROP TABLE a_temp_tbl;
-NOTICE:  DROP: orig=t normal=f istemp=t type=table identity=pg_temp.a_temp_tbl name={pg_temp,a_temp_tbl} args={}
-NOTICE:  DROP: orig=f normal=f istemp=t type=type identity=pg_temp.a_temp_tbl name={pg_temp.a_temp_tbl} args={}
-NOTICE:  DROP: orig=f normal=f istemp=t type=type identity=pg_temp.a_temp_tbl[] name={pg_temp.a_temp_tbl[]} args={}
-NOTICE:  DROP: orig=f normal=f istemp=t type=default value identity=for pg_temp.a_temp_tbl.f2 name={pg_temp,a_temp_tbl,f2} args={}
-NOTICE:  DROP: orig=f normal=f istemp=t type=table constraint identity=a_temp_tbl_f1_not_null on pg_temp.a_temp_tbl name={pg_temp,a_temp_tbl,a_temp_tbl_f1_not_null} args={}
-NOTICE:  DROP: orig=f normal=f istemp=t type=table constraint identity=a_temp_tbl_pkey on pg_temp.a_temp_tbl name={pg_temp,a_temp_tbl,a_temp_tbl_pkey} args={}
-NOTICE:  DROP: orig=f normal=f istemp=t type=index identity=pg_temp.a_temp_tbl_pkey name={pg_temp,a_temp_tbl_pkey} args={}
+NOTICE:  DROP: orig=t normal=f istemp=t type=table identity=pg_temp.a_temp_tbl schema=pg_temp name=a_temp_tbl addr={pg_temp,a_temp_tbl} args={}
+NOTICE:  DROP: orig=f normal=f istemp=t type=type identity=pg_temp.a_temp_tbl schema=pg_temp name=a_temp_tbl addr={pg_temp.a_temp_tbl} args={}
+NOTICE:  DROP: orig=f normal=f istemp=t type=type identity=pg_temp.a_temp_tbl[] schema=pg_temp name=_a_temp_tbl addr={pg_temp.a_temp_tbl[]} args={}
+NOTICE:  DROP: orig=f normal=f istemp=t type=default value identity=for pg_temp.a_temp_tbl.f2 schema=pg_temp name=<NULL> addr={pg_temp,a_temp_tbl,f2} args={}
+NOTICE:  DROP: orig=f normal=f istemp=t type=table constraint identity=a_temp_tbl_f1_not_null on pg_temp.a_temp_tbl schema=pg_temp name=<NULL> addr={pg_temp,a_temp_tbl,a_temp_tbl_f1_not_null} args={}
+NOTICE:  DROP: orig=f normal=f istemp=t type=table constraint identity=a_temp_tbl_pkey on pg_temp.a_temp_tbl schema=pg_temp name=<NULL> addr={pg_temp,a_temp_tbl,a_temp_tbl_pkey} args={}
+NOTICE:  DROP: orig=f normal=f istemp=t type=index identity=pg_temp.a_temp_tbl_pkey schema=pg_temp name=a_temp_tbl_pkey addr={pg_temp,a_temp_tbl_pkey} args={}
+NOTICE:  DROP: orig=f normal=f istemp=t type=trigger identity=a_temp_trig on pg_temp.a_temp_tbl schema=pg_temp name=<NULL> addr={pg_temp,a_temp_tbl,a_temp_trig} args={}
+NOTICE:  DROP: orig=f normal=t istemp=t type=policy identity=a_temp_pol on pg_temp.a_temp_tbl schema=pg_temp name=<NULL> addr={pg_temp,a_temp_tbl,a_temp_pol} args={}
+DROP FUNCTION event_trigger_dummy_trigger();
+NOTICE:  DROP: orig=t normal=f istemp=f type=function identity=public.event_trigger_dummy_trigger() schema=public name=<NULL> addr={public,event_trigger_dummy_trigger} args={}
 -- CREATE OPERATOR CLASS without FAMILY clause should report
 -- both CREATE OPERATOR FAMILY and CREATE OPERATOR CLASS
 CREATE OPERATOR CLASS evttrigopclass FOR TYPE int USING btree AS STORAGE int;
diff --git a/src/test/regress/sql/event_trigger.sql b/src/test/regress/sql/event_trigger.sql
index ef5978b9697aa..c613c0cfd439b 100644
--- a/src/test/regress/sql/event_trigger.sql
+++ b/src/test/regress/sql/event_trigger.sql
@@ -202,9 +202,15 @@ INSERT INTO undroppable_objs VALUES
 ('table', 'audit_tbls.schema_two_table_three');
 
 CREATE TABLE dropped_objects (
-	type text,
-	schema text,
-	object text
+	object_type text,
+	schema_name text,
+	object_name text,
+	object_identity text,
+	address_names text[],
+	address_args text[],
+	is_temporary bool,
+	original bool,
+	normal bool
 );
 
 -- This tests errors raised within event triggers; the one in audit_tbls
@@ -245,8 +251,12 @@ BEGIN
         END IF;
 
 	INSERT INTO dropped_objects
-		(type, schema, object) VALUES
-		(obj.object_type, obj.schema_name, obj.object_identity);
+		(object_type, schema_name, object_name,
+		 object_identity, address_names, address_args,
+		 is_temporary, original, normal) VALUES
+		(obj.object_type, obj.schema_name, obj.object_name,
+		 obj.object_identity, obj.address_names, obj.address_args,
+		 obj.is_temporary, obj.original, obj.normal);
     END LOOP;
 END
 $$;
@@ -263,10 +273,12 @@ DROP SCHEMA schema_one, schema_two CASCADE;
 DELETE FROM undroppable_objs WHERE object_identity = 'schema_one.table_three';
 DROP SCHEMA schema_one, schema_two CASCADE;
 
-SELECT * FROM dropped_objects WHERE schema IS NULL OR schema <> 'pg_toast';
+-- exclude TOAST objects because they have unstable names
+SELECT * FROM dropped_objects
+  WHERE schema_name IS NULL OR schema_name <> 'pg_toast';
 
 DROP OWNED BY regress_evt_user;
-SELECT * FROM dropped_objects WHERE type = 'schema';
+SELECT * FROM dropped_objects WHERE object_type = 'schema';
 
 DROP ROLE regress_evt_user;
 
@@ -285,9 +297,10 @@ BEGIN
     IF NOT r.normal AND NOT r.original THEN
         CONTINUE;
     END IF;
-    RAISE NOTICE 'NORMAL: orig=% normal=% istemp=% type=% identity=% name=% args=%',
+    RAISE NOTICE 'NORMAL: orig=% normal=% istemp=% type=% identity=% schema=% name=% addr=% args=%',
         r.original, r.normal, r.is_temporary, r.object_type,
-        r.object_identity, r.address_names, r.address_args;
+        r.object_identity, r.schema_name, r.object_name,
+        r.address_names, r.address_args;
     END LOOP;
 END; $$;
 CREATE EVENT TRIGGER regress_event_trigger_report_dropped ON sql_drop
@@ -346,17 +359,37 @@ DECLARE r record;
 BEGIN
     FOR r IN SELECT * from pg_event_trigger_dropped_objects()
     LOOP
-    RAISE NOTICE 'DROP: orig=% normal=% istemp=% type=% identity=% name=% args=%',
+    RAISE NOTICE 'DROP: orig=% normal=% istemp=% type=% identity=% schema=% name=% addr=% args=%',
         r.original, r.normal, r.is_temporary, r.object_type,
-        r.object_identity, r.address_names, r.address_args;
+        r.object_identity, r.schema_name, r.object_name,
+        r.address_names, r.address_args;
     END LOOP;
 END; $$;
 
+CREATE FUNCTION event_trigger_dummy_trigger()
+ RETURNS trigger
+ LANGUAGE plpgsql
+AS $$
+BEGIN
+    RETURN new;
+END; $$;
+
 CREATE TABLE evtrg_nontemp_table (f1 int primary key, f2 int default 42);
+CREATE TRIGGER evtrg_nontemp_trig
+  BEFORE INSERT ON evtrg_nontemp_table
+  EXECUTE FUNCTION event_trigger_dummy_trigger();
+CREATE POLICY evtrg_nontemp_pol ON evtrg_nontemp_table USING (f2 > 0);
 DROP TABLE evtrg_nontemp_table;
+
 CREATE TEMP TABLE a_temp_tbl (f1 int primary key, f2 int default 42);
+CREATE TRIGGER a_temp_trig
+  BEFORE INSERT ON a_temp_tbl
+  EXECUTE FUNCTION event_trigger_dummy_trigger();
+CREATE POLICY a_temp_pol ON a_temp_tbl USING (f2 > 0);
 DROP TABLE a_temp_tbl;
 
+DROP FUNCTION event_trigger_dummy_trigger();
+
 -- CREATE OPERATOR CLASS without FAMILY clause should report
 -- both CREATE OPERATOR FAMILY and CREATE OPERATOR CLASS
 CREATE OPERATOR CLASS evttrigopclass FOR TYPE int USING btree AS STORAGE int;

From 9a71989a8f61d7ee003c443a979a1bd43a08ff63 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Fri, 12 Sep 2025 18:10:11 -0400
Subject: [PATCH 67/73] Reject "ALTER DATABASE/USER ... RESET foo" with invalid
 GUC name.

If the database or user had no entry in pg_db_role_setting,
RESET silently did nothing --- including not checking the
validity of the given GUC name.  This is quite inconsistent
and surprising, because you *would* get such an error if there
were any pg_db_role_setting entry, even though it contains
values for unrelated GUCs.

While this is clearly a bug, changing it in stable branches seems
unwise.  The effect will be that some ALTER commands that formerly
were no-ops will now be errors, and people don't like that sort of
thing in minor releases.

Author: Vitaly Davydov <v.davydov@postgrespro.ru>
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://postgr.es/m/30783e-68c28a00-9-41004480@130449754
---
 src/backend/catalog/pg_db_role_setting.c            |  9 +++++++++
 .../modules/unsafe_tests/expected/setconfig.out     | 13 +++++++++++++
 src/test/modules/unsafe_tests/sql/setconfig.sql     |  9 +++++++++
 3 files changed, 31 insertions(+)

diff --git a/src/backend/catalog/pg_db_role_setting.c b/src/backend/catalog/pg_db_role_setting.c
index 090fc07c28acb..832e49a34bea5 100644
--- a/src/backend/catalog/pg_db_role_setting.c
+++ b/src/backend/catalog/pg_db_role_setting.c
@@ -151,6 +151,15 @@ AlterSetting(Oid databaseid, Oid roleid, VariableSetStmt *setstmt)
 
 		CatalogTupleInsert(rel, newtuple);
 	}
+	else
+	{
+		/*
+		 * RESET doesn't need to change any state if there's no pre-existing
+		 * pg_db_role_setting entry, but for consistency we should still check
+		 * that the option is valid and we're allowed to set it.
+		 */
+		(void) GUCArrayDelete(NULL, setstmt->name);
+	}
 
 	InvokeObjectPostAlterHookArg(DbRoleSettingRelationId,
 								 databaseid, 0, roleid, false);
diff --git a/src/test/modules/unsafe_tests/expected/setconfig.out b/src/test/modules/unsafe_tests/expected/setconfig.out
index 5f42443e144b9..37e33709012c2 100644
--- a/src/test/modules/unsafe_tests/expected/setconfig.out
+++ b/src/test/modules/unsafe_tests/expected/setconfig.out
@@ -62,6 +62,19 @@ SELECT current_user, session_user;
 SET ROLE NONE;
 DO $$BEGIN EXECUTE format(
 	'ALTER DATABASE %I RESET role', current_catalog); END$$;
+-- Test some error cases
+DO $$BEGIN EXECUTE format(
+	'ALTER DATABASE %I SET bogus = 0', current_catalog); END$$;
+ERROR:  unrecognized configuration parameter "bogus"
+CONTEXT:  SQL statement "ALTER DATABASE contrib_regression SET bogus = 0"
+PL/pgSQL function inline_code_block line 1 at EXECUTE
+DO $$BEGIN EXECUTE format(
+	'ALTER DATABASE %I RESET bogus', current_catalog); END$$;
+ERROR:  unrecognized configuration parameter "bogus"
+CONTEXT:  SQL statement "ALTER DATABASE contrib_regression RESET bogus"
+PL/pgSQL function inline_code_block line 1 at EXECUTE
+ALTER USER regress_authenticated_user_db_ssa RESET bogus;
+ERROR:  unrecognized configuration parameter "bogus"
 -- Test connection string options
 \c -reuse-previous=on "user=regress_authenticated_user_db_sr options=-crole=regress_current_user"
 SELECT current_user, session_user;
diff --git a/src/test/modules/unsafe_tests/sql/setconfig.sql b/src/test/modules/unsafe_tests/sql/setconfig.sql
index 81296d1091b47..d9e1fc908a125 100644
--- a/src/test/modules/unsafe_tests/sql/setconfig.sql
+++ b/src/test/modules/unsafe_tests/sql/setconfig.sql
@@ -50,6 +50,15 @@ DO $$BEGIN EXECUTE format(
 	'ALTER DATABASE %I RESET role', current_catalog); END$$;
 
 
+-- Test some error cases
+
+DO $$BEGIN EXECUTE format(
+	'ALTER DATABASE %I SET bogus = 0', current_catalog); END$$;
+DO $$BEGIN EXECUTE format(
+	'ALTER DATABASE %I RESET bogus', current_catalog); END$$;
+ALTER USER regress_authenticated_user_db_ssa RESET bogus;
+
+
 -- Test connection string options
 
 \c -reuse-previous=on "user=regress_authenticated_user_db_sr options=-crole=regress_current_user"

From 88824e68611a88a4ef7218c093810a94f86e12e0 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Fri, 12 Sep 2025 18:45:06 -0400
Subject: [PATCH 68/73] Avoid context dependency in test case added by
 9a71989a8.

It's not quite clear to me why this didn't show up in my local
check-world testing, but some of the buildfarm evidently runs
this test with a different database name.  Adjust the test
so that that doesn't affect the reported error messages.
---
 src/test/modules/unsafe_tests/expected/setconfig.out | 8 ++++----
 src/test/modules/unsafe_tests/sql/setconfig.sql      | 4 ++++
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/test/modules/unsafe_tests/expected/setconfig.out b/src/test/modules/unsafe_tests/expected/setconfig.out
index 37e33709012c2..5318f075d1e8c 100644
--- a/src/test/modules/unsafe_tests/expected/setconfig.out
+++ b/src/test/modules/unsafe_tests/expected/setconfig.out
@@ -63,18 +63,18 @@ SET ROLE NONE;
 DO $$BEGIN EXECUTE format(
 	'ALTER DATABASE %I RESET role', current_catalog); END$$;
 -- Test some error cases
+-- We have to use terse mode so that the database name doesn't
+-- appear in the error output.
+\set VERBOSITY terse
 DO $$BEGIN EXECUTE format(
 	'ALTER DATABASE %I SET bogus = 0', current_catalog); END$$;
 ERROR:  unrecognized configuration parameter "bogus"
-CONTEXT:  SQL statement "ALTER DATABASE contrib_regression SET bogus = 0"
-PL/pgSQL function inline_code_block line 1 at EXECUTE
 DO $$BEGIN EXECUTE format(
 	'ALTER DATABASE %I RESET bogus', current_catalog); END$$;
 ERROR:  unrecognized configuration parameter "bogus"
-CONTEXT:  SQL statement "ALTER DATABASE contrib_regression RESET bogus"
-PL/pgSQL function inline_code_block line 1 at EXECUTE
 ALTER USER regress_authenticated_user_db_ssa RESET bogus;
 ERROR:  unrecognized configuration parameter "bogus"
+\set VERBOSITY default
 -- Test connection string options
 \c -reuse-previous=on "user=regress_authenticated_user_db_sr options=-crole=regress_current_user"
 SELECT current_user, session_user;
diff --git a/src/test/modules/unsafe_tests/sql/setconfig.sql b/src/test/modules/unsafe_tests/sql/setconfig.sql
index d9e1fc908a125..4349490f94117 100644
--- a/src/test/modules/unsafe_tests/sql/setconfig.sql
+++ b/src/test/modules/unsafe_tests/sql/setconfig.sql
@@ -51,12 +51,16 @@ DO $$BEGIN EXECUTE format(
 
 
 -- Test some error cases
+-- We have to use terse mode so that the database name doesn't
+-- appear in the error output.
 
+\set VERBOSITY terse
 DO $$BEGIN EXECUTE format(
 	'ALTER DATABASE %I SET bogus = 0', current_catalog); END$$;
 DO $$BEGIN EXECUTE format(
 	'ALTER DATABASE %I RESET bogus', current_catalog); END$$;
 ALTER USER regress_authenticated_user_db_ssa RESET bogus;
+\set VERBOSITY default
 
 
 -- Test connection string options

From f6edf403a99923b98f8b8b3398c7ef32e1ae9a3e Mon Sep 17 00:00:00 2001
From: Alexander Korotkov <akorotkov@postgresql.org>
Date: Sat, 13 Sep 2025 20:38:52 +0300
Subject: [PATCH 69/73] Specify locale provider for pg_regress --no-locale

pg_regress has a --no-locale option that forces the temporary database to
have C locale.  However, currently, locale C only exists in the 'builtin'
locale provider.  This makes 'pg_regress --no-locale' fail when the default
locale provider is not 'builtin'.  This commit makes 'pg_regress --no-locale'
specify both LOCALE='C' and LOCALE_PROVIDER='builtin'.

Discussion: https://postgr.es/m/b54921f95e23b4391b1613e9053a3d58%40postgrespro.ru
Author: Oleg Tselebrovskiy <o.tselebrovskiy@postgrespro.ru>
Reviewed-by: Alexander Korotkov <aekorotkov@gmail.com>
---
 src/test/regress/pg_regress.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/test/regress/pg_regress.c b/src/test/regress/pg_regress.c
index 5d85dcc62f0a5..61c035a39834a 100644
--- a/src/test/regress/pg_regress.c
+++ b/src/test/regress/pg_regress.c
@@ -1968,10 +1968,10 @@ create_database(const char *dbname)
 	 */
 	if (encoding)
 		psql_add_command(buf, "CREATE DATABASE \"%s\" TEMPLATE=template0 ENCODING='%s'%s", dbname, encoding,
-						 (nolocale) ? " LOCALE='C'" : "");
+						 (nolocale) ? " LOCALE='C' LOCALE_PROVIDER='builtin'" : "");
 	else
 		psql_add_command(buf, "CREATE DATABASE \"%s\" TEMPLATE=template0%s", dbname,
-						 (nolocale) ? " LOCALE='C'" : "");
+						 (nolocale) ? " LOCALE='C' LOCALE_PROVIDER='builtin'" : "");
 	psql_add_command(buf,
 					 "ALTER DATABASE \"%s\" SET lc_messages TO 'C';"
 					 "ALTER DATABASE \"%s\" SET lc_monetary TO 'C';"

From 7e9c216b5236cc61f677787b35e8c8f28f5f6959 Mon Sep 17 00:00:00 2001
From: Nathan Bossart <nathan@postgresql.org>
Date: Sat, 13 Sep 2025 14:50:02 -0500
Subject: [PATCH 70/73] Re-pgindent nbtpreprocesskeys.c after commit
 796962922e.

Backpatch-through: 18
---
 src/backend/access/nbtree/nbtpreprocesskeys.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/backend/access/nbtree/nbtpreprocesskeys.c b/src/backend/access/nbtree/nbtpreprocesskeys.c
index 07a3ff11a0b87..71ddd68fd548c 100644
--- a/src/backend/access/nbtree/nbtpreprocesskeys.c
+++ b/src/backend/access/nbtree/nbtpreprocesskeys.c
@@ -1498,7 +1498,7 @@ _bt_skiparray_strat_increment(IndexScanDesc scan, ScanKey arraysk,
 	 */
 	lookupstrat = BTGreaterEqualStrategyNumber;
 	if (low_compare->sk_flags & SK_BT_DESC)
-		lookupstrat = BTLessEqualStrategyNumber; /* commute this too */
+		lookupstrat = BTLessEqualStrategyNumber;	/* commute this too */
 	geop = get_opfamily_member(opfamily, opcintype, opcintype, lookupstrat);
 	if (!OidIsValid(geop))
 		return;

From 95bdc672282722fb52656a81fefe18296015708e Mon Sep 17 00:00:00 2001
From: Nathan Bossart <nathan@postgresql.org>
Date: Sat, 13 Sep 2025 14:55:38 -0500
Subject: [PATCH 71/73] Add commit 7e9c216b52 to .git-blame-ignore-revs.

---
 .git-blame-ignore-revs | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs
index f83e2fc658664..65aecdffaca6e 100644
--- a/.git-blame-ignore-revs
+++ b/.git-blame-ignore-revs
@@ -14,6 +14,9 @@
 #
 # $ git log --pretty=format:"%H # %cd%n# %s" $PGINDENTGITHASH -1 --date=iso
 
+7e9c216b5236cc61f677787b35e8c8f28f5f6959 # 2025-09-13 14:50:02 -0500
+# Re-pgindent nbtpreprocesskeys.c after commit 796962922e.
+
 1d1612aec7688139e1a5506df1366b4b6a69605d # 2025-07-29 09:10:41 -0400
 # Run pgindent.
 

From cdf7feb96562071f15ceb070272d7e84246d943d Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Sat, 13 Sep 2025 16:55:51 -0400
Subject: [PATCH 72/73] Amend recent fix for SIMILAR TO regex conversion.

Commit e3ffc3e91 fixed the translation of character classes in
SIMILAR TO regular expressions.  Unfortunately the fix broke a corner
case: if there is an escape character right after the opening bracket
(for example in "[\q]"), a closing bracket right after the escape
sequence would not be seen as closing the character class.

There were two more oversights: a backslash or a nested opening bracket
right at the beginning of a character class should remove the special
meaning from any following caret or closing bracket.

This bug suggests that this code needs to be more readable, so also
rename the variables "charclass_depth" and "charclass_start" to
something more meaningful, rewrite an "if" cascade to be more
consistent, and improve the commentary.

Reported-by: Dominique Devienne <ddevienne@gmail.com>
Reported-by: Stephan Springl <springl-psql@bfw-online.de>
Author: Laurenz Albe <laurenz.albe@cybertec.at>
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://postgr.es/m/CAFCRh-8NwJd0jq6P=R3qhHyqU7hw0BTor3W0SvUcii24et+zAw@mail.gmail.com
Backpatch-through: 13
---
 src/backend/utils/adt/regexp.c        | 97 +++++++++++++++++++--------
 src/test/regress/expected/strings.out |  9 +++
 src/test/regress/sql/strings.sql      |  3 +
 3 files changed, 82 insertions(+), 27 deletions(-)

diff --git a/src/backend/utils/adt/regexp.c b/src/backend/utils/adt/regexp.c
index 6e2864cbbda8c..b0cdef9b19fa2 100644
--- a/src/backend/utils/adt/regexp.c
+++ b/src/backend/utils/adt/regexp.c
@@ -774,10 +774,8 @@ similar_escape_internal(text *pat_text, text *esc_text)
 				elen;
 	bool		afterescape = false;
 	int			nquotes = 0;
-	int			charclass_depth = 0;	/* Nesting level of character classes,
-										 * encompassed by square brackets */
-	int			charclass_start = 0;	/* State of the character class start,
-										 * for carets */
+	int			bracket_depth = 0;	/* square bracket nesting level */
+	int			charclass_pos = 0;	/* position inside a character class */
 
 	p = VARDATA_ANY(pat_text);
 	plen = VARSIZE_ANY_EXHDR(pat_text);
@@ -836,6 +834,17 @@ similar_escape_internal(text *pat_text, text *esc_text)
 	 * the relevant part separators in the above expansion.  If the result
 	 * of this function is used in a plain regexp match (SIMILAR TO), the
 	 * escape-double-quotes have no effect on the match behavior.
+	 *
+	 * While we don't fully validate character classes (bracket expressions),
+	 * we do need to parse them well enough to know where they end.
+	 * "charclass_pos" tracks where we are in a character class.
+	 * Its value is uninteresting when bracket_depth is 0.
+	 * But when bracket_depth > 0, it will be
+	 *   1: right after the opening '[' (a following '^' will negate
+	 *      the class, while ']' is a literal character)
+	 *   2: right after a '^' after the opening '[' (']' is still a literal
+	 *      character)
+	 *   3 or more: further inside the character class (']' ends the class)
 	 *----------
 	 */
 
@@ -907,7 +916,7 @@ similar_escape_internal(text *pat_text, text *esc_text)
 		/* fast path */
 		if (afterescape)
 		{
-			if (pchar == '"' && charclass_depth < 1)	/* escape-double-quote? */
+			if (pchar == '"' && bracket_depth < 1)	/* escape-double-quote? */
 			{
 				/* emit appropriate part separator, per notes above */
 				if (nquotes == 0)
@@ -948,6 +957,12 @@ similar_escape_internal(text *pat_text, text *esc_text)
 				 */
 				*r++ = '\\';
 				*r++ = pchar;
+
+				/*
+				 * If we encounter an escaped character in a character class,
+				 * we are no longer at the beginning.
+				 */
+				charclass_pos = 3;
 			}
 			afterescape = false;
 		}
@@ -956,41 +971,69 @@ similar_escape_internal(text *pat_text, text *esc_text)
 			/* SQL escape character; do not send to output */
 			afterescape = true;
 		}
-		else if (charclass_depth > 0)
+		else if (bracket_depth > 0)
 		{
+			/* inside a character class */
 			if (pchar == '\\')
+			{
+				/*
+				 * If we're here, backslash is not the SQL escape character,
+				 * so treat it as a literal class element, which requires
+				 * doubling it.  (This matches our behavior for backslashes
+				 * outside character classes.)
+				 */
 				*r++ = '\\';
+			}
 			*r++ = pchar;
 
-			/*
-			 * Ignore a closing bracket at the start of a character class.
-			 * Such a bracket is taken literally rather than closing the
-			 * class.  "charclass_start" is 1 right at the beginning of a
-			 * class and 2 after an initial caret.
-			 */
-			if (pchar == ']' && charclass_start > 2)
-				charclass_depth--;
+			/* parse the character class well enough to identify ending ']' */
+			if (pchar == ']' && charclass_pos > 2)
+			{
+				/* found the real end of a bracket pair */
+				bracket_depth--;
+				/* don't reset charclass_pos, this may be an inner bracket */
+			}
 			else if (pchar == '[')
-				charclass_depth++;
+			{
+				/* start of a nested bracket pair */
+				bracket_depth++;
 
-			/*
-			 * If there is a caret right after the opening bracket, it negates
-			 * the character class, but a following closing bracket should
-			 * still be treated as a normal character.  That holds only for
-			 * the first caret, so only the values 1 and 2 mean that closing
-			 * brackets should be taken literally.
-			 */
-			if (pchar == '^')
-				charclass_start++;
+				/*
+				 * We are no longer at the beginning of a character class.
+				 * (The nested bracket pair is a collating element, not a
+				 * character class in its own right.)
+				 */
+				charclass_pos = 3;
+			}
+			else if (pchar == '^')
+			{
+				/*
+				 * A caret right after the opening bracket negates the
+				 * character class.  In that case, the following will
+				 * increment charclass_pos from 1 to 2, so that a following
+				 * ']' is still a literal character and does not end the
+				 * character class.  If we are further inside a character
+				 * class, charclass_pos might get incremented past 3, which is
+				 * fine.
+				 */
+				charclass_pos++;
+			}
 			else
-				charclass_start = 3;	/* definitely past the start */
+			{
+				/*
+				 * Anything else (including a backslash or leading ']') is an
+				 * element of the character class, so we are no longer at the
+				 * beginning of the class.
+				 */
+				charclass_pos = 3;
+			}
 		}
 		else if (pchar == '[')
 		{
 			/* start of a character class */
 			*r++ = pchar;
-			charclass_depth++;
-			charclass_start = 1;
+			bracket_depth = 1;
+			charclass_pos = 1;
 		}
 		else if (pchar == '%')
 		{
diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out
index ba302da51e7b2..2d6cb02ad6085 100644
--- a/src/test/regress/expected/strings.out
+++ b/src/test/regress/expected/strings.out
@@ -693,6 +693,15 @@ EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '[^^]^';
    Filter: (f1 ~ '^(?:[^^]\^)$'::text)
 (2 rows)
 
+-- Closing square bracket after an escape sequence at the beginning of
+-- a character closes the character class
+EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '[|a]%' ESCAPE '|';
+              QUERY PLAN               
+---------------------------------------
+ Seq Scan on text_tbl
+   Filter: (f1 ~ '^(?:[\a].*)$'::text)
+(2 rows)
+
 -- Test backslash escapes in regexp_replace's replacement string
 SELECT regexp_replace('1112223333', E'(\\d{3})(\\d{3})(\\d{4})', E'(\\1) \\2-\\3');
  regexp_replace 
diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql
index b94004cc08ce6..5ed421d62059d 100644
--- a/src/test/regress/sql/strings.sql
+++ b/src/test/regress/sql/strings.sql
@@ -218,6 +218,9 @@ EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '[]%][^]%][^%]%';
 -- Closing square bracket effective after two carets at the beginning
 -- of character class.
 EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '[^^]^';
+-- Closing square bracket after an escape sequence at the beginning of
+-- a character closes the character class
+EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '[|a]%' ESCAPE '|';
 
 -- Test backslash escapes in regexp_replace's replacement string
 SELECT regexp_replace('1112223333', E'(\\d{3})(\\d{3})(\\d{4})', E'(\\1) \\2-\\3');

From 454c046094ab3431c2ce0c540c46e623bc05bd1a Mon Sep 17 00:00:00 2001
From: Peter Geoghegan <pg@bowt.ie>
Date: Sat, 13 Sep 2025 21:01:33 -0400
Subject: [PATCH 73/73] nbtree: Always set skipScan flag on rescan.

The TimescaleDB extension expects to be able to change an nbtree scan's
keys across rescans.  The issue arises in the extension's implementation
of loose index scan.  This is arguably a misuse of the index AM API,
though apparently it worked until recently.  It stopped working when the
skipScan flag was added to BTScanOpaqueData by commit 8a510275, though.
The flag wouldn't reliably track whether the scan (actually, the current
rescan) has any skip arrays, leading to confusion in _bt_set_startikey.

nbtree preprocessing will now defensively initialize the scan's skipScan
flag in all cases, including the case where _bt_preprocess_array_keys
returns early due to the (re)scan not using arrays.  While nbtree isn't
obligated to support this use case (at least not according to my reading
of the index AM API), it still seems like a good idea to be consistent
here, on general robustness grounds.

Author: Peter Geoghegan <pg@bowt.ie>
Reported-By: Natalya Aksman <natalya@timescale.com>
Discussion: https://postgr.es/m/CAJumhcirfMojbk20+W0YimbNDkwdECvJprQGQ-XqK--ph09nQw@mail.gmail.com
Backpatch-through: 18
---
 src/backend/access/nbtree/nbtpreprocesskeys.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/backend/access/nbtree/nbtpreprocesskeys.c b/src/backend/access/nbtree/nbtpreprocesskeys.c
index 71ddd68fd548c..7b7d7860d8f53 100644
--- a/src/backend/access/nbtree/nbtpreprocesskeys.c
+++ b/src/backend/access/nbtree/nbtpreprocesskeys.c
@@ -1854,6 +1854,7 @@ _bt_preprocess_array_keys(IndexScanDesc scan, int *new_numberOfKeys)
 	 * (also checks if we should add extra skip arrays based on input keys)
 	 */
 	numArrayKeys = _bt_num_array_keys(scan, skip_eq_ops, &numSkipArrayKeys);
+	so->skipScan = (numSkipArrayKeys > 0);
 
 	/* Quit if nothing to do. */
 	if (numArrayKeys == 0)
@@ -1883,7 +1884,6 @@ _bt_preprocess_array_keys(IndexScanDesc scan, int *new_numberOfKeys)
 	arrayKeyData = (ScanKey) palloc(numArrayKeyData * sizeof(ScanKeyData));
 
 	/* Allocate space for per-array data in the workspace context */
-	so->skipScan = (numSkipArrayKeys > 0);
 	so->arrayKeys = (BTArrayKeyInfo *) palloc(numArrayKeys * sizeof(BTArrayKeyInfo));
 
 	/* Allocate space for ORDER procs used to help _bt_checkkeys */