From 530cfa8eb50ca5a2151dfc50a6a5999ec8aff148 Mon Sep 17 00:00:00 2001 From: Nathan Bossart Date: Tue, 9 Sep 2025 14:09:36 -0500 Subject: [PATCH 01/32] test_slru: Fix LWLock tranche allocation in EXEC_BACKEND builds. Currently, test_slru's shmem_startup_hook unconditionally generates new LWLock tranche IDs. This is fine on non-EXEC_BACKEND builds, where only the postmaster executes this hook, but on EXEC_BACKEND builds, every backend executes it, too. To fix, only generate the tranche IDs in the postmaster process by checking the IsUnderPostmaster variable. This is arguably a bug fix and could be back-patched, but since the damage is limited to some extra unused tranche IDs in a test module, I'm not going to bother. Reported-by: Sami Imseih Reviewed-by: Sami Imseih Discussion: https://postgr.es/m/CAA5RZ0vaAuonaf12CeDddQJu5xKL%2B6xVyS%2B_q1%2BcH%3D33JXV82w%40mail.gmail.com --- src/test/modules/test_slru/test_slru.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/src/test/modules/test_slru/test_slru.c b/src/test/modules/test_slru/test_slru.c index 8c0367eeee424..e963466aef1cd 100644 --- a/src/test/modules/test_slru/test_slru.c +++ b/src/test/modules/test_slru/test_slru.c @@ -219,8 +219,8 @@ test_slru_shmem_startup(void) */ const bool long_segment_names = true; const char slru_dir_name[] = "pg_test_slru"; - int test_tranche_id; - int test_buffer_tranche_id; + int test_tranche_id = -1; + int test_buffer_tranche_id = -1; if (prev_shmem_startup_hook) prev_shmem_startup_hook(); @@ -231,10 +231,18 @@ test_slru_shmem_startup(void) */ (void) MakePGDirectory(slru_dir_name); - /* initialize the SLRU facility */ - test_tranche_id = LWLockNewTrancheId("test_slru_tranche"); - - test_buffer_tranche_id = LWLockNewTrancheId("test_buffer_tranche"); + /* + * Initialize the SLRU facility. In EXEC_BACKEND builds, the + * shmem_startup_hook is called in the postmaster and in each backend, but + * we only need to generate the LWLock tranches once. Note that these + * tranche ID variables are not used by SimpleLruInit() when + * IsUnderPostmaster is true. + */ + if (!IsUnderPostmaster) + { + test_tranche_id = LWLockNewTrancheId("test_slru_tranche"); + test_buffer_tranche_id = LWLockNewTrancheId("test_buffer_tranche"); + } TestSlruCtl->PagePrecedes = test_slru_page_precedes_logically; SimpleLruInit(TestSlruCtl, "TestSLRU", From d96c854dfc634212193007ca58f8978bc272d457 Mon Sep 17 00:00:00 2001 From: Nathan Bossart Date: Tue, 9 Sep 2025 14:35:30 -0500 Subject: [PATCH 02/32] Fix documentation for shmem_startup_hook. This section claims that each backend executes the shmem_startup_hook shortly after attaching to shared memory, which is true for EXEC_BACKEND builds, but not for others. This commit adds this important detail. Oversight in commit 964152c476. Reported-by: Sami Imseih Reviewed-by: Sami Imseih Discussion: https://postgr.es/m/CAA5RZ0vEGT1eigGbVt604LkXP6mUPMwPMxQoRCbFny44w%2B9EUQ%40mail.gmail.com Backpatch-through: 17 --- doc/src/sgml/xfunc.sgml | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/doc/src/sgml/xfunc.sgml b/doc/src/sgml/xfunc.sgml index da21ef5689184..04bf919b34384 100644 --- a/doc/src/sgml/xfunc.sgml +++ b/doc/src/sgml/xfunc.sgml @@ -3668,11 +3668,14 @@ LWLockRelease(AddinShmemInitLock); shmem_startup_hook provides a convenient place for the initialization code, but it is not strictly required that all such code - be placed in this hook. Each backend will execute the registered - shmem_startup_hook shortly after it attaches to shared - memory. Note that add-ins should still acquire + be placed in this hook. On Windows (and anywhere else where + EXEC_BACKEND is defined), each backend executes the + registered shmem_startup_hook shortly after it + attaches to shared memory, so add-ins should still acquire AddinShmemInitLock within this hook, as shown in the - example above. + example above. On other platforms, only the postmaster process executes + the shmem_startup_hook, and each backend automatically + inherits the pointers to shared memory. From 8c8f7b199d9095dbc2e101a4614043b5ae13bde3 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Wed, 10 Sep 2025 07:23:05 +0900 Subject: [PATCH 03/32] Fix leak with SMgrRelations in startup process The startup process does not process shared invalidation messages, only sending them, and never calls AtEOXact_SMgr() which clean up any unpinned SMgrRelations. Hence, it is never able to free SMgrRelations on a periodic basis, bloating its hashtable over time. Like the checkpointer and the bgwriter, this commit takes a conservative approach by freeing periodically SMgrRelations when replaying a checkpoint record, either online or shutdown, so as the startup process has a way to perform a periodic cleanup. Issue caused by 21d9c3ee4ef7, so backpatch down to v17. Author: Jingtang Zhang Reviewed-by: Yuhang Qiu Discussion: https://postgr.es/m/28C687D4-F335-417E-B06C-6612A0BD5A10@gmail.com Backpatch-through: 17 --- src/backend/access/transam/xlog.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 7ffb217915190..0baf0ac6160af 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -8385,6 +8385,14 @@ xlog_redo(XLogReaderState *record) checkPoint.ThisTimeLineID, replayTLI))); RecoveryRestartPoint(&checkPoint, record); + + /* + * After replaying a checkpoint record, free all smgr objects. + * Otherwise we would never do so for dropped relations, as the + * startup does not process shared invalidation messages or call + * AtEOXact_SMgr(). + */ + smgrdestroyall(); } else if (info == XLOG_CHECKPOINT_ONLINE) { @@ -8438,6 +8446,14 @@ xlog_redo(XLogReaderState *record) checkPoint.ThisTimeLineID, replayTLI))); RecoveryRestartPoint(&checkPoint, record); + + /* + * After replaying a checkpoint record, free all smgr objects. + * Otherwise we would never do so for dropped relations, as the + * startup does not process shared invalidation messages or call + * AtEOXact_SMgr(). + */ + smgrdestroyall(); } else if (info == XLOG_OVERWRITE_CONTRECORD) { From b1187266e077265cb061cbedd502e94179dc7b21 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Wed, 10 Sep 2025 11:20:46 +0900 Subject: [PATCH 04/32] Replace callers of dynahash.h's my_log() by equivalent in pg_bitutils.h All the calls replaced by this commit use 4-byte integers for their variables used in input of my_log2(). Hence, the limit against too-large inputs does not really apply. Thresholds are also applied, as of: - In nodeAgg.c, the number of partitions is limited by HASHAGG_MAX_PARTITIONS. - In nodeHash.c, ExecChooseHashTableSize() caps its maximum number of buckets based on HashJoinTuple and palloc() allocation limit. - In worker.c, the number of subxacts tracked by ApplySubXactData uses uint32, making pg_ceil_log2_64() safe to use directly. Several approaches have been discussed, like an integration with thresholds in pg_bitutils.h, but it was found confusing. This uses Dean's idea, which gives a simpler result than what I came up with to be able to remove dynahash.h. dynahash.h will be removed in a follow-up commit, removing some duplication with the ceil log2 routines. Reviewed-by: Peter Eisentraut Reviewed-by: Dean Rasheed Discussion: https://postgr.es/m/CAEZATCUJPQD_7sC-wErak2CQGNa6bj2hY-mr8wsBki=kX7f2_A@mail.gmail.com --- src/backend/executor/nodeAgg.c | 3 +-- src/backend/executor/nodeHash.c | 7 +++---- src/backend/replication/logical/worker.c | 3 +-- 3 files changed, 5 insertions(+), 8 deletions(-) diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c index 377e016d73225..a4f3d30f307cc 100644 --- a/src/backend/executor/nodeAgg.c +++ b/src/backend/executor/nodeAgg.c @@ -267,7 +267,6 @@ #include "utils/acl.h" #include "utils/builtins.h" #include "utils/datum.h" -#include "utils/dynahash.h" #include "utils/expandeddatum.h" #include "utils/injection_point.h" #include "utils/logtape.h" @@ -2115,7 +2114,7 @@ hash_choose_num_partitions(double input_groups, double hashentrysize, npartitions = (int) dpartitions; /* ceil(log2(npartitions)) */ - partition_bits = my_log2(npartitions); + partition_bits = pg_ceil_log2_32(npartitions); /* make sure that we don't exhaust the hash bits */ if (partition_bits + used_bits >= 32) diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c index 8d2201ab67fa5..a3415db4e20f5 100644 --- a/src/backend/executor/nodeHash.c +++ b/src/backend/executor/nodeHash.c @@ -36,7 +36,6 @@ #include "executor/nodeHashjoin.h" #include "miscadmin.h" #include "port/pg_bitutils.h" -#include "utils/dynahash.h" #include "utils/lsyscache.h" #include "utils/memutils.h" #include "utils/syscache.h" @@ -340,7 +339,7 @@ MultiExecParallelHash(HashState *node) */ hashtable->curbatch = -1; hashtable->nbuckets = pstate->nbuckets; - hashtable->log2_nbuckets = my_log2(hashtable->nbuckets); + hashtable->log2_nbuckets = pg_ceil_log2_32(hashtable->nbuckets); hashtable->totalTuples = pstate->total_tuples; /* @@ -480,7 +479,7 @@ ExecHashTableCreate(HashState *state) &nbuckets, &nbatch, &num_skew_mcvs); /* nbuckets must be a power of 2 */ - log2_nbuckets = my_log2(nbuckets); + log2_nbuckets = pg_ceil_log2_32(nbuckets); Assert(nbuckets == (1 << log2_nbuckets)); /* @@ -3499,7 +3498,7 @@ ExecParallelHashTableSetCurrentBatch(HashJoinTable hashtable, int batchno) dsa_get_address(hashtable->area, hashtable->batches[batchno].shared->buckets); hashtable->nbuckets = hashtable->parallel_state->nbuckets; - hashtable->log2_nbuckets = my_log2(hashtable->nbuckets); + hashtable->log2_nbuckets = pg_ceil_log2_32(hashtable->nbuckets); hashtable->current_chunk = NULL; hashtable->current_chunk_shared = InvalidDsaPointer; hashtable->batches[batchno].at_least_one_chunk = false; diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c index b3cac1023731a..ee6ac22329fdc 100644 --- a/src/backend/replication/logical/worker.c +++ b/src/backend/replication/logical/worker.c @@ -276,7 +276,6 @@ #include "storage/procarray.h" #include "tcop/tcopprot.h" #include "utils/acl.h" -#include "utils/dynahash.h" #include "utils/guc.h" #include "utils/inval.h" #include "utils/lsyscache.h" @@ -5115,7 +5114,7 @@ subxact_info_read(Oid subid, TransactionId xid) len = sizeof(SubXactInfo) * subxact_data.nsubxacts; /* we keep the maximum as a power of 2 */ - subxact_data.nsubxacts_max = 1 << my_log2(subxact_data.nsubxacts); + subxact_data.nsubxacts_max = 1 << pg_ceil_log2_32(subxact_data.nsubxacts); /* * Allocate subxact information in the logical streaming context. We need From e6da68a6e1d60a037b63a9c9ed36e5ef0a996769 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Wed, 10 Sep 2025 14:11:50 +0900 Subject: [PATCH 05/32] Remove dynahash.h All the callers of my_log2() are now limited inside dynahash.c, so let's remove this header. The same capability is provided by pg_bitutils.h already. Discussion: https://postgr.es/m/CAEZATCUJPQD_7sC-wErak2CQGNa6bj2hY-mr8wsBki=kX7f2_A@mail.gmail.com --- src/backend/utils/hash/dynahash.c | 4 ++-- src/include/utils/dynahash.h | 20 -------------------- 2 files changed, 2 insertions(+), 22 deletions(-) delete mode 100644 src/include/utils/dynahash.h diff --git a/src/backend/utils/hash/dynahash.c b/src/backend/utils/hash/dynahash.c index 1aeee5be42acd..ac94b9e93c6e3 100644 --- a/src/backend/utils/hash/dynahash.c +++ b/src/backend/utils/hash/dynahash.c @@ -102,7 +102,6 @@ #include "port/pg_bitutils.h" #include "storage/shmem.h" #include "storage/spin.h" -#include "utils/dynahash.h" #include "utils/memutils.h" @@ -281,6 +280,7 @@ static bool init_htab(HTAB *hashp, int64 nelem); pg_noreturn static void hash_corrupted(HTAB *hashp); static uint32 hash_initial_lookup(HTAB *hashp, uint32 hashvalue, HASHBUCKET **bucketptr); +static int my_log2(int64 num); static int64 next_pow2_int64(int64 num); static int next_pow2_int(int64 num); static void register_seq_scan(HTAB *hashp); @@ -1813,7 +1813,7 @@ hash_corrupted(HTAB *hashp) } /* calculate ceil(log base 2) of num */ -int +static int my_log2(int64 num) { /* diff --git a/src/include/utils/dynahash.h b/src/include/utils/dynahash.h deleted file mode 100644 index a4362d3f65e59..0000000000000 --- a/src/include/utils/dynahash.h +++ /dev/null @@ -1,20 +0,0 @@ -/*------------------------------------------------------------------------- - * - * dynahash.h - * POSTGRES dynahash.h file definitions - * - * - * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group - * Portions Copyright (c) 1994, Regents of the University of California - * - * IDENTIFICATION - * src/include/utils/dynahash.h - * - *------------------------------------------------------------------------- - */ -#ifndef DYNAHASH_H -#define DYNAHASH_H - -extern int my_log2(int64 num); - -#endif /* DYNAHASH_H */ From 33eec809402bfbf3eb0d01ad5b023d3d05fcb3bc Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Wed, 10 Sep 2025 11:49:53 +0200 Subject: [PATCH 06/32] Fix CREATE TABLE LIKE with not-valid check constraint In CREATE TABLE ... LIKE, any check constraints copied from the source table should be set to valid if they are ENFORCED (the default). Bug introduced in commit ca87c415e2f. Author: jian he Discussion: https://www.postgresql.org/message-id/CACJufxH%3D%2Bod8Wy0P4L3_GpapNwLUP3oAes5UFRJ7yTxrM_M5kg%40mail.gmail.com --- src/backend/parser/parse_utilcmd.c | 3 +-- src/test/regress/expected/create_table_like.out | 8 ++++++++ src/test/regress/sql/create_table_like.sql | 1 + 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/backend/parser/parse_utilcmd.c b/src/backend/parser/parse_utilcmd.c index afcf54169c3b3..e96b38a59d503 100644 --- a/src/backend/parser/parse_utilcmd.c +++ b/src/backend/parser/parse_utilcmd.c @@ -1461,7 +1461,6 @@ expandTableLikeClause(RangeVar *heapRel, TableLikeClause *table_like_clause) char *ccname = constr->check[ccnum].ccname; char *ccbin = constr->check[ccnum].ccbin; bool ccenforced = constr->check[ccnum].ccenforced; - bool ccvalid = constr->check[ccnum].ccvalid; bool ccnoinherit = constr->check[ccnum].ccnoinherit; Node *ccbin_node; bool found_whole_row; @@ -1492,7 +1491,7 @@ expandTableLikeClause(RangeVar *heapRel, TableLikeClause *table_like_clause) n->conname = pstrdup(ccname); n->location = -1; n->is_enforced = ccenforced; - n->initially_valid = ccvalid; + n->initially_valid = ccenforced; /* sic */ n->is_no_inherit = ccnoinherit; n->raw_expr = NULL; n->cooked_expr = nodeToString(ccbin_node); diff --git a/src/test/regress/expected/create_table_like.out b/src/test/regress/expected/create_table_like.out index 29a779c2e9072..d3c35c148475d 100644 --- a/src/test/regress/expected/create_table_like.out +++ b/src/test/regress/expected/create_table_like.out @@ -320,6 +320,7 @@ DROP TABLE inhz; -- including storage and comments CREATE TABLE ctlt1 (a text CHECK (length(a) > 2) ENFORCED PRIMARY KEY, b text CHECK (length(b) > 100) NOT ENFORCED); +ALTER TABLE ctlt1 ADD CONSTRAINT cc CHECK (length(b) > 100) NOT VALID; CREATE INDEX ctlt1_b_key ON ctlt1 (b); CREATE INDEX ctlt1_fnidx ON ctlt1 ((a || b)); CREATE STATISTICS ctlt1_a_b_stat ON a,b FROM ctlt1; @@ -378,6 +379,7 @@ SELECT conname, description FROM pg_description, pg_constraint c WHERE classoid CREATE TABLE ctlt1_inh (LIKE ctlt1 INCLUDING CONSTRAINTS INCLUDING COMMENTS) INHERITS (ctlt1); NOTICE: merging column "a" with inherited definition NOTICE: merging column "b" with inherited definition +NOTICE: merging constraint "cc" with inherited definition NOTICE: merging constraint "ctlt1_a_check" with inherited definition NOTICE: merging constraint "ctlt1_b_check" with inherited definition \d+ ctlt1_inh @@ -387,6 +389,7 @@ NOTICE: merging constraint "ctlt1_b_check" with inherited definition a | text | | not null | | main | | A b | text | | | | extended | | B Check constraints: + "cc" CHECK (length(b) > 100) "ctlt1_a_check" CHECK (length(a) > 2) "ctlt1_b_check" CHECK (length(b) > 100) NOT ENFORCED Not-null constraints: @@ -409,6 +412,7 @@ NOTICE: merging multiple inherited definitions of column "a" b | text | | | | extended | | c | text | | | | external | | Check constraints: + "cc" CHECK (length(b) > 100) "ctlt1_a_check" CHECK (length(a) > 2) "ctlt1_b_check" CHECK (length(b) > 100) NOT ENFORCED "ctlt3_a_check" CHECK (length(a) < 5) @@ -430,6 +434,7 @@ NOTICE: merging column "a" with inherited definition Indexes: "ctlt13_like_expr_idx" btree ((a || c)) Check constraints: + "cc" CHECK (length(b) > 100) "ctlt1_a_check" CHECK (length(a) > 2) "ctlt1_b_check" CHECK (length(b) > 100) NOT ENFORCED "ctlt3_a_check" CHECK (length(a) < 5) @@ -456,6 +461,7 @@ Indexes: "ctlt_all_b_idx" btree (b) "ctlt_all_expr_idx" btree ((a || b)) Check constraints: + "cc" CHECK (length(b) > 100) "ctlt1_a_check" CHECK (length(a) > 2) "ctlt1_b_check" CHECK (length(b) > 100) NOT ENFORCED Statistics objects: @@ -499,6 +505,7 @@ Indexes: "pg_attrdef_b_idx" btree (b) "pg_attrdef_expr_idx" btree ((a || b)) Check constraints: + "cc" CHECK (length(b) > 100) "ctlt1_a_check" CHECK (length(a) > 2) "ctlt1_b_check" CHECK (length(b) > 100) NOT ENFORCED Statistics objects: @@ -524,6 +531,7 @@ Indexes: "ctlt1_b_idx" btree (b) "ctlt1_expr_idx" btree ((a || b)) Check constraints: + "cc" CHECK (length(b) > 100) "ctlt1_a_check" CHECK (length(a) > 2) "ctlt1_b_check" CHECK (length(b) > 100) NOT ENFORCED Statistics objects: diff --git a/src/test/regress/sql/create_table_like.sql b/src/test/regress/sql/create_table_like.sql index bf8702116a74b..93389b57dbf95 100644 --- a/src/test/regress/sql/create_table_like.sql +++ b/src/test/regress/sql/create_table_like.sql @@ -130,6 +130,7 @@ DROP TABLE inhz; -- including storage and comments CREATE TABLE ctlt1 (a text CHECK (length(a) > 2) ENFORCED PRIMARY KEY, b text CHECK (length(b) > 100) NOT ENFORCED); +ALTER TABLE ctlt1 ADD CONSTRAINT cc CHECK (length(b) > 100) NOT VALID; CREATE INDEX ctlt1_b_key ON ctlt1 (b); CREATE INDEX ctlt1_fnidx ON ctlt1 ((a || b)); CREATE STATISTICS ctlt1_a_b_stat ON a,b FROM ctlt1; From 9016fa7e3bcde8ae4c3d63c707143af147486a10 Mon Sep 17 00:00:00 2001 From: Nathan Bossart Date: Wed, 10 Sep 2025 11:21:12 -0500 Subject: [PATCH 07/32] meson: Build numeric.c with -ftree-vectorize. autoconf builds have compiled this file with -ftree-vectorize since commit 8870917623, but meson builds seem to have missed the memo. Reviewed-by: Jeff Davis Discussion: https://postgr.es/m/aL85CeasM51-0D1h%40nathan Backpatch-through: 16 --- src/backend/utils/adt/meson.build | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/backend/utils/adt/meson.build b/src/backend/utils/adt/meson.build index dac372c3bea3b..12fa0c209127c 100644 --- a/src/backend/utils/adt/meson.build +++ b/src/backend/utils/adt/meson.build @@ -1,5 +1,15 @@ # Copyright (c) 2022-2025, PostgreSQL Global Development Group +# Some code in numeric.c benefits from auto-vectorization +numeric_backend_lib = static_library('numeric_backend_lib', + 'numeric.c', + dependencies: backend_build_deps, + kwargs: internal_lib_args, + c_args: vectorize_cflags, +) + +backend_link_with += numeric_backend_lib + backend_sources += files( 'acl.c', 'amutils.c', @@ -61,7 +71,6 @@ backend_sources += files( 'network_gist.c', 'network_selfuncs.c', 'network_spgist.c', - 'numeric.c', 'numutils.c', 'oid.c', 'oracle_compat.c', From abdeacdb0920d94dec7500d09f6f29fbb2f6310d Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Wed, 10 Sep 2025 16:05:03 -0400 Subject: [PATCH 08/32] Fix memory leakage in nodeSubplan.c. If the hash functions used for hashing tuples leaked any memory, we failed to clean that up, resulting in query-lifespan memory leakage in queries using hashed subplans. One way that could happen is if the values being hashed require de-toasting, since most of our hash functions don't trouble to clean up de-toasted inputs. Prior to commit bf6c614a2, this leakage was largely masked because TupleHashTableMatch would reset hashtable->tempcxt (via execTuplesMatch). But it doesn't do that anymore, and that's not really the right place for this anyway: doing it there could reset the tempcxt many times per hash lookup, or not at all. Instead put reset calls into ExecHashSubPlan and buildSubPlanHash. Along the way to that, rearrange ExecHashSubPlan so that there's just one place to call MemoryContextReset instead of several. This amounts to accepting the de-facto API spec that the caller of the TupleHashTable routines is responsible for resetting the tempcxt adequately often. Although the other callers seem to get this right, it was not documented anywhere, so add a comment about it. Bug: #19040 Reported-by: Haiyang Li Author: Haiyang Li Reviewed-by: Fei Changhong Reviewed-by: Tom Lane Discussion: https://postgr.es/m/19040-c9b6073ef814f48c@postgresql.org Backpatch-through: 13 --- src/backend/executor/execGrouping.c | 6 +++ src/backend/executor/nodeSubplan.c | 70 +++++++++++------------------ 2 files changed, 33 insertions(+), 43 deletions(-) diff --git a/src/backend/executor/execGrouping.c b/src/backend/executor/execGrouping.c index b540074935386..75087204f0c69 100644 --- a/src/backend/executor/execGrouping.c +++ b/src/backend/executor/execGrouping.c @@ -156,6 +156,12 @@ execTuplesHashPrepare(int numCols, * * Note that the keyColIdx, hashfunctions, and collations arrays must be * allocated in storage that will live as long as the hashtable does. + * + * LookupTupleHashEntry, FindTupleHashEntry, and related functions may leak + * memory in the tempcxt. It is caller's responsibility to reset that context + * reasonably often, typically once per tuple. (We do it that way, rather + * than managing an extra context within the hashtable, because in many cases + * the caller can specify a tempcxt that it needs to reset per-tuple anyway.) */ TupleHashTable BuildTupleHashTable(PlanState *parent, diff --git a/src/backend/executor/nodeSubplan.c b/src/backend/executor/nodeSubplan.c index f7f6fc2da0b95..8e55dcc159b0b 100644 --- a/src/backend/executor/nodeSubplan.c +++ b/src/backend/executor/nodeSubplan.c @@ -102,6 +102,7 @@ ExecHashSubPlan(SubPlanState *node, ExprContext *econtext, bool *isNull) { + bool result = false; SubPlan *subplan = node->subplan; PlanState *planstate = node->planstate; TupleTableSlot *slot; @@ -132,14 +133,6 @@ ExecHashSubPlan(SubPlanState *node, node->projLeft->pi_exprContext = econtext; slot = ExecProject(node->projLeft); - /* - * Note: because we are typically called in a per-tuple context, we have - * to explicitly clear the projected tuple before returning. Otherwise, - * we'll have a double-free situation: the per-tuple context will probably - * be reset before we're called again, and then the tuple slot will think - * it still needs to free the tuple. - */ - /* * If the LHS is all non-null, probe for an exact match in the main hash * table. If we find one, the result is TRUE. Otherwise, scan the @@ -161,19 +154,10 @@ ExecHashSubPlan(SubPlanState *node, slot, node->cur_eq_comp, node->lhs_hash_expr) != NULL) - { - ExecClearTuple(slot); - return BoolGetDatum(true); - } - if (node->havenullrows && - findPartialMatch(node->hashnulls, slot, node->cur_eq_funcs)) - { - ExecClearTuple(slot); + result = true; + else if (node->havenullrows && + findPartialMatch(node->hashnulls, slot, node->cur_eq_funcs)) *isNull = true; - return BoolGetDatum(false); - } - ExecClearTuple(slot); - return BoolGetDatum(false); } /* @@ -186,34 +170,31 @@ ExecHashSubPlan(SubPlanState *node, * aren't provably unequal to the LHS; if so, the result is UNKNOWN. * Otherwise, the result is FALSE. */ - if (node->hashnulls == NULL) - { - ExecClearTuple(slot); - return BoolGetDatum(false); - } - if (slotAllNulls(slot)) - { - ExecClearTuple(slot); + else if (node->hashnulls == NULL) + /* just return FALSE */ ; + else if (slotAllNulls(slot)) *isNull = true; - return BoolGetDatum(false); - } /* Scan partly-null table first, since more likely to get a match */ - if (node->havenullrows && - findPartialMatch(node->hashnulls, slot, node->cur_eq_funcs)) - { - ExecClearTuple(slot); + else if (node->havenullrows && + findPartialMatch(node->hashnulls, slot, node->cur_eq_funcs)) *isNull = true; - return BoolGetDatum(false); - } - if (node->havehashrows && - findPartialMatch(node->hashtable, slot, node->cur_eq_funcs)) - { - ExecClearTuple(slot); + else if (node->havehashrows && + findPartialMatch(node->hashtable, slot, node->cur_eq_funcs)) *isNull = true; - return BoolGetDatum(false); - } + + /* + * Note: because we are typically called in a per-tuple context, we have + * to explicitly clear the projected tuple before returning. Otherwise, + * we'll have a double-free situation: the per-tuple context will probably + * be reset before we're called again, and then the tuple slot will think + * it still needs to free the tuple. + */ ExecClearTuple(slot); - return BoolGetDatum(false); + + /* Also must reset the hashtempcxt after each hashtable lookup. */ + MemoryContextReset(node->hashtempcxt); + + return BoolGetDatum(result); } /* @@ -642,6 +623,9 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext) * during ExecProject. */ ResetExprContext(innerecontext); + + /* Also must reset the hashtempcxt after each hashtable lookup. */ + MemoryContextReset(node->hashtempcxt); } /* From bdc6cfcd12f5c95799328e05aa4bfa75cfe3e79f Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Wed, 10 Sep 2025 16:15:08 -0400 Subject: [PATCH 09/32] Eliminate duplicative hashtempcxt in nodeSubplan.c. Instead of building a separate memory context that's used just for running hash functions, make the hash functions run in the per-tuple context of the node's innerecontext. This saves a little space at runtime, and it avoids needing to reset two contexts instead of one inside buildSubPlanHash's main loop. This largely reverts commit 133924e13. That's safe to do now because bf6c614a2 decoupled the evaluation context used by TupleHashTableMatch from that used for hash function evaluation, so that there's no longer a risk of resetting the innerecontext too soon. Per discussion of bug #19040, although this is not directly a fix for that. Author: Tom Lane Reviewed-by: Haiyang Li Reviewed-by: Fei Changhong Discussion: https://postgr.es/m/19040-c9b6073ef814f48c@postgresql.org --- src/backend/executor/nodeSubplan.c | 19 +++++-------------- src/include/nodes/execnodes.h | 1 - 2 files changed, 5 insertions(+), 15 deletions(-) diff --git a/src/backend/executor/nodeSubplan.c b/src/backend/executor/nodeSubplan.c index 8e55dcc159b0b..53fb56f7388e8 100644 --- a/src/backend/executor/nodeSubplan.c +++ b/src/backend/executor/nodeSubplan.c @@ -191,8 +191,8 @@ ExecHashSubPlan(SubPlanState *node, */ ExecClearTuple(slot); - /* Also must reset the hashtempcxt after each hashtable lookup. */ - MemoryContextReset(node->hashtempcxt); + /* Also must reset the innerecontext after each hashtable lookup. */ + ResetExprContext(node->innerecontext); return BoolGetDatum(result); } @@ -529,7 +529,7 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext) 0, node->planstate->state->es_query_cxt, node->hashtablecxt, - node->hashtempcxt, + innerecontext->ecxt_per_tuple_memory, false); if (!subplan->unknownEqFalse) @@ -558,7 +558,7 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext) 0, node->planstate->state->es_query_cxt, node->hashtablecxt, - node->hashtempcxt, + innerecontext->ecxt_per_tuple_memory, false); } else @@ -620,12 +620,9 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext) /* * Reset innerecontext after each inner tuple to free any memory used - * during ExecProject. + * during ExecProject and hashtable lookup. */ ResetExprContext(innerecontext); - - /* Also must reset the hashtempcxt after each hashtable lookup. */ - MemoryContextReset(node->hashtempcxt); } /* @@ -842,7 +839,6 @@ ExecInitSubPlan(SubPlan *subplan, PlanState *parent) sstate->hashtable = NULL; sstate->hashnulls = NULL; sstate->hashtablecxt = NULL; - sstate->hashtempcxt = NULL; sstate->innerecontext = NULL; sstate->keyColIdx = NULL; sstate->tab_eq_funcoids = NULL; @@ -898,11 +894,6 @@ ExecInitSubPlan(SubPlan *subplan, PlanState *parent) AllocSetContextCreate(CurrentMemoryContext, "Subplan HashTable Context", ALLOCSET_DEFAULT_SIZES); - /* and a small one for the hash tables to use as temp storage */ - sstate->hashtempcxt = - AllocSetContextCreate(CurrentMemoryContext, - "Subplan HashTable Temp Context", - ALLOCSET_SMALL_SIZES); /* and a short-lived exprcontext for function evaluation */ sstate->innerecontext = CreateExprContext(estate); diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index de782014b2d41..71857feae4823 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -1020,7 +1020,6 @@ typedef struct SubPlanState bool havehashrows; /* true if hashtable is not empty */ bool havenullrows; /* true if hashnulls is not empty */ MemoryContext hashtablecxt; /* memory context containing hash tables */ - MemoryContext hashtempcxt; /* temp memory context for hash tables */ ExprContext *innerecontext; /* econtext for computing inner tuples */ int numCols; /* number of columns being hashed */ /* each of the remaining fields is an array of length numCols: */ From 09036dc71c682b0bf7234ed39c1429ed99fbe442 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Wed, 10 Sep 2025 17:51:24 -0400 Subject: [PATCH 10/32] Avoid faulty alignment of Datums in build_sorted_items(). If sizeof(Pointer) is 4 then sizeof(SortItem) will be 12, so that if data->numrows is odd then we placed the values array at a location that's not a multiple of 8. That was fine when sizeof(Datum) was also 4, but in the wake of commit 2a600a93c it makes some alignment-picky machines unhappy. (You need a 32-bit machine that nonetheless expects 8-byte alignment of 8-byte quantities, which is an odd-seeming combination but it does exist outside the Intel universe.) To fix, MAXALIGN the space allocated to the SortItem array. In passing, let's make the "len" variable be Size not int, just for paranoia's sake. This code was arguably not too safe even before 2a600a93c, but at present I don't see a strong argument for back-patching. Reported-by: Tomas Vondra Author: Tom Lane Discussion: https://postgr.es/m/87036018-8d70-40ad-a0ac-192b07bd7b04@vondra.me --- src/backend/statistics/extended_stats.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c index af0b99243c614..3c3d2d315c6f4 100644 --- a/src/backend/statistics/extended_stats.c +++ b/src/backend/statistics/extended_stats.c @@ -986,10 +986,9 @@ build_sorted_items(StatsBuildData *data, int *nitems, { int i, j, - len, nrows; int nvalues = data->numrows * numattrs; - + Size len; SortItem *items; Datum *values; bool *isnull; @@ -997,14 +996,16 @@ build_sorted_items(StatsBuildData *data, int *nitems, int *typlen; /* Compute the total amount of memory we need (both items and values). */ - len = data->numrows * sizeof(SortItem) + nvalues * (sizeof(Datum) + sizeof(bool)); + len = MAXALIGN(data->numrows * sizeof(SortItem)) + + nvalues * (sizeof(Datum) + sizeof(bool)); /* Allocate the memory and split it into the pieces. */ ptr = palloc0(len); /* items to sort */ items = (SortItem *) ptr; - ptr += data->numrows * sizeof(SortItem); + /* MAXALIGN ensures that the following Datums are suitably aligned */ + ptr += MAXALIGN(data->numrows * sizeof(SortItem)); /* values and null flags */ values = (Datum *) ptr; From c88ce73eda2e0a818d730c5b72475ef99cc9c4cf Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Thu, 11 Sep 2025 10:15:33 +0900 Subject: [PATCH 11/32] Fix incorrect file reference in guc.h GucSource_Names was documented as being in guc.c, but since 0a20ff54f5e6 it is located in guc_tables.c. The reference to the location of GucSource_Names is important, as GucSource needs to be kept in sync with GucSource_Names. Author: David G. Johnston Discussion: https://postgr.es/m/CAKFQuwYPgAHWPYjPzK7iXzhSZ6MKR8w20_Nz7ZXpOvx=kZbs7A@mail.gmail.com Backpatch-through: 16 --- src/include/utils/guc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/include/utils/guc.h b/src/include/utils/guc.h index 756e80a2c2fcc..f21ec37da8933 100644 --- a/src/include/utils/guc.h +++ b/src/include/utils/guc.h @@ -106,7 +106,7 @@ typedef enum * will show as "default" in pg_settings. If there is a specific reason not * to want that, use source == PGC_S_OVERRIDE. * - * NB: see GucSource_Names in guc.c if you change this. + * NB: see GucSource_Names in guc_tables.c if you change this. */ typedef enum { From 26eadf4d2b14a8a8d110b214dbb7ef952fbf8e93 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Thu, 11 Sep 2025 17:17:04 +0900 Subject: [PATCH 12/32] Fix description of WAL record blocks in hash_xlog.h hash_xlog.h included descriptions for the blocks used in WAL records that were was not completely consistent with how the records are generated, with one block missing for SQUEEZE_PAGE, and inconsistent descriptions used for block 0 in VACUUM_ONE_PAGE and MOVE_PAGE_CONTENTS. This information was incorrect since c11453ce0aea, cross-checking the logic for the record generation. Author: Kirill Reshke Reviewed-by: Andrey Borodin Discussion: https://postgr.es/m/CALdSSPj1j=a1d1hVA3oabRFz0hSU3KKrYtZPijw4UPUM7LY9zw@mail.gmail.com Backpatch-through: 13 --- src/include/access/hash_xlog.h | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/include/access/hash_xlog.h b/src/include/access/hash_xlog.h index 6fe97de4d66f1..5d4671dc4c128 100644 --- a/src/include/access/hash_xlog.h +++ b/src/include/access/hash_xlog.h @@ -129,7 +129,7 @@ typedef struct xl_hash_split_complete * * This data record is used for XLOG_HASH_MOVE_PAGE_CONTENTS * - * Backup Blk 0: bucket page + * Backup Blk 0: primary bucket page * Backup Blk 1: page containing moved tuples * Backup Blk 2: page from which tuples will be removed */ @@ -149,12 +149,13 @@ typedef struct xl_hash_move_page_contents * * This data record is used for XLOG_HASH_SQUEEZE_PAGE * - * Backup Blk 0: page containing tuples moved from freed overflow page - * Backup Blk 1: freed overflow page - * Backup Blk 2: page previous to the freed overflow page - * Backup Blk 3: page next to the freed overflow page - * Backup Blk 4: bitmap page containing info of freed overflow page - * Backup Blk 5: meta page + * Backup Blk 0: primary bucket page + * Backup Blk 1: page containing tuples moved from freed overflow page + * Backup Blk 2: freed overflow page + * Backup Blk 3: page previous to the freed overflow page + * Backup Blk 4: page next to the freed overflow page + * Backup Blk 5: bitmap page containing info of freed overflow page + * Backup Blk 6: meta page */ typedef struct xl_hash_squeeze_page { @@ -245,7 +246,7 @@ typedef struct xl_hash_init_bitmap_page * * This data record is used for XLOG_HASH_VACUUM_ONE_PAGE * - * Backup Blk 0: bucket page + * Backup Blk 0: primary bucket page * Backup Blk 1: meta page */ typedef struct xl_hash_vacuum_one_page From 9c24111c4dad850bc2625c2113bb3d2dfd592efc Mon Sep 17 00:00:00 2001 From: Dean Rasheed Date: Thu, 11 Sep 2025 09:25:47 +0100 Subject: [PATCH 13/32] doc: Improve description of new random(min, max) functions. Mention that the new variants of random(min, max) are affected by setseed(), like the original functions. Reported-by: Marcos Pegoraro Discussion: https://postgr.es/m/CAB-JLwb1=drA3Le6uZXDBi_tCpeS1qm6XQU7dKwac_x91Z4qDg@mail.gmail.com --- doc/src/sgml/func/func-datetime.sgml | 6 ++++++ doc/src/sgml/func/func-math.sgml | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/doc/src/sgml/func/func-datetime.sgml b/doc/src/sgml/func/func-datetime.sgml index 98dd60aa9a7ec..a25da4b5175ca 100644 --- a/doc/src/sgml/func/func-datetime.sgml +++ b/doc/src/sgml/func/func-datetime.sgml @@ -948,6 +948,12 @@ Returns a random value in the range min <= x <= max. + + Note that these functions use the same pseudo-random number generator + as the functions listed in , + and respond in the same way to calling + setseed(). + random('1979-02-08'::date,'2025-07-03'::date) 1983-04-21 diff --git a/doc/src/sgml/func/func-math.sgml b/doc/src/sgml/func/func-math.sgml index fd821c0e70677..9dcf97e7c9e06 100644 --- a/doc/src/sgml/func/func-math.sgml +++ b/doc/src/sgml/func/func-math.sgml @@ -1130,7 +1130,7 @@ - + setseed setseed ( double precision ) From 2bbbb2eca9303df590cc79be74b13cad259124a5 Mon Sep 17 00:00:00 2001 From: Dean Rasheed Date: Thu, 11 Sep 2025 09:48:12 +0100 Subject: [PATCH 14/32] doc: Fix indentation in func-datetime.sgml. Incorrect indentation introduced by commit faf071b5538. --- doc/src/sgml/func/func-datetime.sgml | 56 ++++++++++++++-------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/doc/src/sgml/func/func-datetime.sgml b/doc/src/sgml/func/func-datetime.sgml index a25da4b5175ca..8cd7150b0d313 100644 --- a/doc/src/sgml/func/func-datetime.sgml +++ b/doc/src/sgml/func/func-datetime.sgml @@ -935,34 +935,34 @@ random ( min date, max date ) date - - - random ( min timestamp, max timestamp ) - timestamp - - - random ( min timestamptz, max timestamptz ) - timestamptz - - - Returns a random value in the range - min <= x <= max. - - - Note that these functions use the same pseudo-random number generator - as the functions listed in , - and respond in the same way to calling - setseed(). - - - random('1979-02-08'::date,'2025-07-03'::date) - 1983-04-21 - - - random('2000-01-01'::timestamptz, now()) - 2015-09-27 09:11:33.732707+00 - - + + + random ( min timestamp, max timestamp ) + timestamp + + + random ( min timestamptz, max timestamptz ) + timestamptz + + + Returns a random value in the range + min <= x <= max. + + + Note that these functions use the same pseudo-random number generator + as the functions listed in , + and respond in the same way to calling + setseed(). + + + random('1979-02-08'::date,'2025-07-03'::date) + 1983-04-21 + + + random('2000-01-01'::timestamptz, now()) + 2015-09-27 09:11:33.732707+00 + + From 01d793698f5921547a7b5e1e003722c17f552574 Mon Sep 17 00:00:00 2001 From: Amit Kapila Date: Thu, 11 Sep 2025 09:33:48 +0000 Subject: [PATCH 15/32] Fix intermittent test failure introduced in 6456c6e2c4. The test assumes that a backend will execute COMMIT PREPARED on the publisher and hit the injection point commit-after-delay-checkpoint within the commit critical section. This should cause the apply worker on the subscriber to wait for the transaction to complete. However, the test does not guarantee that the injection point is actually triggered, creating a race condition where the apply worker may proceed prematurely during COMMIT PREPARED. This commit resolves the issue by explicitly waiting for the injection point to be hit before continuing with the test, ensuring consistent and reliable behavior. Author: Zhijie Hou Reviewed-by: shveta malik Discussion: https://postgr.es/m/TY4PR01MB1690751D1CA8C128B0770EC6F9409A@TY4PR01MB16907.jpnprd01.prod.outlook.com --- src/test/subscription/t/035_conflicts.pl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/test/subscription/t/035_conflicts.pl b/src/test/subscription/t/035_conflicts.pl index db0d5b464e825..880551fc69d74 100644 --- a/src/test/subscription/t/035_conflicts.pl +++ b/src/test/subscription/t/035_conflicts.pl @@ -475,6 +475,9 @@ } ); + # Wait until the backend enters the injection point + $node_B->wait_for_event('client backend', 'commit-after-delay-checkpoint'); + # Confirm the update is suspended $result = $node_B->safe_psql('postgres', 'SELECT * FROM tab WHERE a = 1'); From 368c38dd47c209fa95d2df855d62bcde386f3037 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Thu, 11 Sep 2025 11:55:29 +0200 Subject: [PATCH 16/32] Remove stray semicolon at global scope The Sun Studio compiler complains about an empty declaration here. Note for future historians: This does not mean that this compiler is still of current interest for anyone using PostgreSQL. But we can let this small fix be its parting gift. Reviewed-by: Andres Freund Reviewed-by: Tom Lane Discussion: https://www.postgresql.org/message-id/flat/a0f817ee-fb86-483a-8a14-b6f7f5991b6e%40eisentraut.org --- src/backend/replication/logical/slotsync.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/backend/replication/logical/slotsync.c b/src/backend/replication/logical/slotsync.c index 9d0072a49ed6d..8c061d55bdb51 100644 --- a/src/backend/replication/logical/slotsync.c +++ b/src/backend/replication/logical/slotsync.c @@ -1337,7 +1337,7 @@ reset_syncing_flag() SpinLockRelease(&SlotSyncCtx->mutex); syncing_slots = false; -}; +} /* * The main loop of our worker process. From 4fbe0151455fefbef7abc9d507adb04c978beb0d Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Thu, 11 Sep 2025 11:55:29 +0200 Subject: [PATCH 17/32] Remove checks for no longer supported GCC versions Since commit f5e0186f865 (Raise C requirement to C11), we effectively require at least GCC version 4.7, so checks for older versions can be removed. Reviewed-by: Andres Freund Reviewed-by: Tom Lane Discussion: https://www.postgresql.org/message-id/flat/a0f817ee-fb86-483a-8a14-b6f7f5991b6e%40eisentraut.org --- src/include/c.h | 8 ++++---- src/include/port/atomics/generic-gcc.h | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/include/c.h b/src/include/c.h index 39022f8a9dd75..b580cfa7d3178 100644 --- a/src/include/c.h +++ b/src/include/c.h @@ -259,8 +259,8 @@ * choose not to. But, if possible, don't force inlining in unoptimized * debug builds. */ -#if (defined(__GNUC__) && __GNUC__ > 3 && defined(__OPTIMIZE__)) || defined(__SUNPRO_C) -/* GCC > 3 and Sunpro support always_inline via __attribute__ */ +#if (defined(__GNUC__) && defined(__OPTIMIZE__)) || defined(__SUNPRO_C) +/* GCC and Sunpro support always_inline via __attribute__ */ #define pg_attribute_always_inline __attribute__((always_inline)) inline #elif defined(_MSC_VER) /* MSVC has a special keyword for this */ @@ -277,7 +277,7 @@ * above, this should be placed before the function's return type and name. */ /* GCC and Sunpro support noinline via __attribute__ */ -#if (defined(__GNUC__) && __GNUC__ > 2) || defined(__SUNPRO_C) +#if defined(__GNUC__) || defined(__SUNPRO_C) #define pg_noinline __attribute__((noinline)) /* msvc via declspec */ #elif defined(_MSC_VER) @@ -369,7 +369,7 @@ * These should only be used sparingly, in very hot code paths. It's very easy * to mis-estimate likelihoods. */ -#if __GNUC__ >= 3 +#ifdef __GNUC__ #define likely(x) __builtin_expect((x) != 0, 1) #define unlikely(x) __builtin_expect((x) != 0, 0) #else diff --git a/src/include/port/atomics/generic-gcc.h b/src/include/port/atomics/generic-gcc.h index d8f04c89ccac2..e7dfad4f0d5eb 100644 --- a/src/include/port/atomics/generic-gcc.h +++ b/src/include/port/atomics/generic-gcc.h @@ -30,14 +30,14 @@ #define pg_compiler_barrier_impl() __asm__ __volatile__("" ::: "memory") /* - * If we're on GCC 4.1.0 or higher, we should be able to get a memory barrier + * If we're on GCC, we should be able to get a memory barrier * out of this compiler built-in. But we prefer to rely on platform specific * definitions where possible, and use this only as a fallback. */ #if !defined(pg_memory_barrier_impl) # if defined(HAVE_GCC__ATOMIC_INT32_CAS) # define pg_memory_barrier_impl() __atomic_thread_fence(__ATOMIC_SEQ_CST) -# elif (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) +# elif defined(__GNUC__) # define pg_memory_barrier_impl() __sync_synchronize() # endif #endif /* !defined(pg_memory_barrier_impl) */ From a2b4102a21ad730ce46b059acf49d72151e979f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Herrera?= Date: Thu, 11 Sep 2025 18:11:46 +0200 Subject: [PATCH 18/32] Fill testing gap for possible referential integrity violation This commit adds a missing isolation test for (non-PERIOD) foreign keys. With REPEATABLE READ, one transaction can insert a referencing row while another deletes the referenced row, and both see a valid state. But after they have committed, the table violates referential integrity. If the INSERT precedes the DELETE, we use a crosscheck snapshot to see the just-added row, so that the DELETE can raise a foreign key error. You can see the table violate referential integrity if you change ri_restrict to pass false for detectNewRows to ri_PerformCheck. A crosscheck snapshot is not needed when the DELETE comes first, because the INSERT's trigger takes a FOR KEY SHARE lock that sees the row now marked for deletion, waits for that transaction to commit, and raises a serialization error. I (Paul) added a test for that too though. We already have a similar test (in ri-triggers.spec) for SERIALIZABLE snapshot isolation showing that you can implement foreign keys with just pl/pgSQL, but that test does nothing to validate ri_triggers.c. We also have tests (in fk-snapshot.spec) for other concurrency scenarios, but not this one: we test concurrently deleting both the referencing and referenced row, when the constraint activates a cascade/set null action. But those tests don't exercise ri_restrict, and the consequence of omitting a crosscheck comparison is different: a serialization failure, not a referential integrity violation. Author: Paul Jungwirth Reviewed-by: Rustam ALLAKOV Reviewed-by: Dean Rasheed Reviewed-by: Robert Haas Discussion: https://postgr.es/m/CA+renyUp=xja80rBaB6NpY3RRdi750y046x28bo_xg29zKY72Q@mail.gmail.com --- src/test/isolation/expected/fk-snapshot-2.out | 61 +++++++++++++++++++ src/test/isolation/isolation_schedule | 1 + src/test/isolation/specs/fk-snapshot-2.spec | 50 +++++++++++++++ 3 files changed, 112 insertions(+) create mode 100644 src/test/isolation/expected/fk-snapshot-2.out create mode 100644 src/test/isolation/specs/fk-snapshot-2.spec diff --git a/src/test/isolation/expected/fk-snapshot-2.out b/src/test/isolation/expected/fk-snapshot-2.out new file mode 100644 index 0000000000000..0a4c9646fca4e --- /dev/null +++ b/src/test/isolation/expected/fk-snapshot-2.out @@ -0,0 +1,61 @@ +Parsed test spec with 2 sessions + +starting permutation: s1rr s2rr s2ins s1del s2c s1c +step s1rr: BEGIN ISOLATION LEVEL REPEATABLE READ; +step s2rr: BEGIN ISOLATION LEVEL REPEATABLE READ; +step s2ins: INSERT INTO child VALUES (1, 1); +step s1del: DELETE FROM parent WHERE parent_id = 1; +step s2c: COMMIT; +step s1del: <... completed> +ERROR: update or delete on table "parent" violates foreign key constraint "child_parent_id_fkey" on table "child" +step s1c: COMMIT; + +starting permutation: s1rr s2rr s1del s2ins s1c s2c +step s1rr: BEGIN ISOLATION LEVEL REPEATABLE READ; +step s2rr: BEGIN ISOLATION LEVEL REPEATABLE READ; +step s1del: DELETE FROM parent WHERE parent_id = 1; +step s2ins: INSERT INTO child VALUES (1, 1); +step s1c: COMMIT; +step s2ins: <... completed> +ERROR: could not serialize access due to concurrent update +step s2c: COMMIT; + +starting permutation: s1rc s2rc s2ins s1del s2c s1c +step s1rc: BEGIN ISOLATION LEVEL READ COMMITTED; +step s2rc: BEGIN ISOLATION LEVEL READ COMMITTED; +step s2ins: INSERT INTO child VALUES (1, 1); +step s1del: DELETE FROM parent WHERE parent_id = 1; +step s2c: COMMIT; +step s1del: <... completed> +ERROR: update or delete on table "parent" violates foreign key constraint "child_parent_id_fkey" on table "child" +step s1c: COMMIT; + +starting permutation: s1rc s2rc s1del s2ins s1c s2c +step s1rc: BEGIN ISOLATION LEVEL READ COMMITTED; +step s2rc: BEGIN ISOLATION LEVEL READ COMMITTED; +step s1del: DELETE FROM parent WHERE parent_id = 1; +step s2ins: INSERT INTO child VALUES (1, 1); +step s1c: COMMIT; +step s2ins: <... completed> +ERROR: insert or update on table "child" violates foreign key constraint "child_parent_id_fkey" +step s2c: COMMIT; + +starting permutation: s1ser s2ser s2ins s1del s2c s1c +step s1ser: BEGIN ISOLATION LEVEL SERIALIZABLE; +step s2ser: BEGIN ISOLATION LEVEL SERIALIZABLE; +step s2ins: INSERT INTO child VALUES (1, 1); +step s1del: DELETE FROM parent WHERE parent_id = 1; +step s2c: COMMIT; +step s1del: <... completed> +ERROR: update or delete on table "parent" violates foreign key constraint "child_parent_id_fkey" on table "child" +step s1c: COMMIT; + +starting permutation: s1ser s2ser s1del s2ins s1c s2c +step s1ser: BEGIN ISOLATION LEVEL SERIALIZABLE; +step s2ser: BEGIN ISOLATION LEVEL SERIALIZABLE; +step s1del: DELETE FROM parent WHERE parent_id = 1; +step s2ins: INSERT INTO child VALUES (1, 1); +step s1c: COMMIT; +step s2ins: <... completed> +ERROR: could not serialize access due to concurrent update +step s2c: COMMIT; diff --git a/src/test/isolation/isolation_schedule b/src/test/isolation/isolation_schedule index 9f1e997d81b00..130525dfd3d68 100644 --- a/src/test/isolation/isolation_schedule +++ b/src/test/isolation/isolation_schedule @@ -36,6 +36,7 @@ test: fk-deadlock2 test: fk-partitioned-1 test: fk-partitioned-2 test: fk-snapshot +test: fk-snapshot-2 test: subxid-overflow test: eval-plan-qual test: eval-plan-qual-trigger diff --git a/src/test/isolation/specs/fk-snapshot-2.spec b/src/test/isolation/specs/fk-snapshot-2.spec new file mode 100644 index 0000000000000..94cd151aab9d3 --- /dev/null +++ b/src/test/isolation/specs/fk-snapshot-2.spec @@ -0,0 +1,50 @@ +# RI Trigger test +# +# Test C-based referential integrity enforcement. +# Under REPEATABLE READ we need some snapshot trickery in C, +# or we would permit things that violate referential integrity. + +setup +{ + CREATE TABLE parent (parent_id SERIAL NOT NULL PRIMARY KEY); + CREATE TABLE child ( + child_id SERIAL NOT NULL PRIMARY KEY, + parent_id INTEGER REFERENCES parent); + INSERT INTO parent VALUES(1); +} + +teardown { DROP TABLE parent, child; } + +session s1 +step s1rc { BEGIN ISOLATION LEVEL READ COMMITTED; } +step s1rr { BEGIN ISOLATION LEVEL REPEATABLE READ; } +step s1ser { BEGIN ISOLATION LEVEL SERIALIZABLE; } +step s1del { DELETE FROM parent WHERE parent_id = 1; } +step s1c { COMMIT; } + +session s2 +step s2rc { BEGIN ISOLATION LEVEL READ COMMITTED; } +step s2rr { BEGIN ISOLATION LEVEL REPEATABLE READ; } +step s2ser { BEGIN ISOLATION LEVEL SERIALIZABLE; } +step s2ins { INSERT INTO child VALUES (1, 1); } +step s2c { COMMIT; } + +# Violates referential integrity unless we use a crosscheck snapshot, +# which is up-to-date compared with the transaction's snapshot. +permutation s1rr s2rr s2ins s1del s2c s1c + +# Raises a can't-serialize exception +# when the INSERT trigger does SELECT FOR KEY SHARE: +permutation s1rr s2rr s1del s2ins s1c s2c + +# Test the same scenarios in READ COMMITTED: +# A crosscheck snapshot is not required here. +permutation s1rc s2rc s2ins s1del s2c s1c +permutation s1rc s2rc s1del s2ins s1c s2c + +# Test the same scenarios in SERIALIZABLE: +# We should report the FK violation: +permutation s1ser s2ser s2ins s1del s2c s1c +# We raise a concurrent update error +# which is good enough: +permutation s1ser s2ser s1del s2ins s1c s2c From e8cec3d1791c140398454aa561cf51659dd8243d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Herrera?= Date: Thu, 11 Sep 2025 18:13:09 +0200 Subject: [PATCH 19/32] Add test for temporal referential integrity This commit adds an isolation test showing that temporal foreign keys do not permit referential integrity violations under concurrency, like fk-snapshot-2. You can show that the test fails by passing false for detectNewRows to ri_PerformCheck in ri_restrict. Author: Paul Jungwirth Reviewed-by: Rustam ALLAKOV Reviewed-by: Dean Rasheed Reviewed-by: Robert Haas Discussion: https://postgr.es/m/CA+renyUp=xja80rBaB6NpY3RRdi750y046x28bo_xg29zKY72Q@mail.gmail.com --- src/test/isolation/expected/fk-snapshot-3.out | 213 ++++++++++++++++++ src/test/isolation/isolation_schedule | 1 + src/test/isolation/specs/fk-snapshot-3.spec | 82 +++++++ 3 files changed, 296 insertions(+) create mode 100644 src/test/isolation/expected/fk-snapshot-3.out create mode 100644 src/test/isolation/specs/fk-snapshot-3.spec diff --git a/src/test/isolation/expected/fk-snapshot-3.out b/src/test/isolation/expected/fk-snapshot-3.out new file mode 100644 index 0000000000000..f98cb72fdac30 --- /dev/null +++ b/src/test/isolation/expected/fk-snapshot-3.out @@ -0,0 +1,213 @@ +Parsed test spec with 2 sessions + +starting permutation: s1rr s2rr s2ins s1del s2c s1c +step s1rr: BEGIN ISOLATION LEVEL REPEATABLE READ; +step s2rr: BEGIN ISOLATION LEVEL REPEATABLE READ; +step s2ins: + INSERT INTO child VALUES ('[1,2)', '[2020-01-01,2025-01-01)', '[1,2)'); + +step s1del: DELETE FROM parent WHERE id = '[1,2)'; +step s2c: COMMIT; +step s1del: <... completed> +ERROR: update or delete on table "parent" violates foreign key constraint "child_parent_id_valid_at_fkey" on table "child" +step s1c: COMMIT; + +starting permutation: s1rr s2rr s1del s2ins s1c s2c +step s1rr: BEGIN ISOLATION LEVEL REPEATABLE READ; +step s2rr: BEGIN ISOLATION LEVEL REPEATABLE READ; +step s1del: DELETE FROM parent WHERE id = '[1,2)'; +step s2ins: + INSERT INTO child VALUES ('[1,2)', '[2020-01-01,2025-01-01)', '[1,2)'); + +step s1c: COMMIT; +step s2ins: <... completed> +ERROR: could not serialize access due to concurrent update +step s2c: COMMIT; + +starting permutation: s1rc s2rc s2ins s1del s2c s1c +step s1rc: BEGIN ISOLATION LEVEL READ COMMITTED; +step s2rc: BEGIN ISOLATION LEVEL READ COMMITTED; +step s2ins: + INSERT INTO child VALUES ('[1,2)', '[2020-01-01,2025-01-01)', '[1,2)'); + +step s1del: DELETE FROM parent WHERE id = '[1,2)'; +step s2c: COMMIT; +step s1del: <... completed> +ERROR: update or delete on table "parent" violates foreign key constraint "child_parent_id_valid_at_fkey" on table "child" +step s1c: COMMIT; + +starting permutation: s1rc s2rc s1del s2ins s1c s2c +step s1rc: BEGIN ISOLATION LEVEL READ COMMITTED; +step s2rc: BEGIN ISOLATION LEVEL READ COMMITTED; +step s1del: DELETE FROM parent WHERE id = '[1,2)'; +step s2ins: + INSERT INTO child VALUES ('[1,2)', '[2020-01-01,2025-01-01)', '[1,2)'); + +step s1c: COMMIT; +step s2ins: <... completed> +ERROR: insert or update on table "child" violates foreign key constraint "child_parent_id_valid_at_fkey" +step s2c: COMMIT; + +starting permutation: s1ser s2ser s2ins s1del s2c s1c +step s1ser: BEGIN ISOLATION LEVEL SERIALIZABLE; +step s2ser: BEGIN ISOLATION LEVEL SERIALIZABLE; +step s2ins: + INSERT INTO child VALUES ('[1,2)', '[2020-01-01,2025-01-01)', '[1,2)'); + +step s1del: DELETE FROM parent WHERE id = '[1,2)'; +step s2c: COMMIT; +step s1del: <... completed> +ERROR: update or delete on table "parent" violates foreign key constraint "child_parent_id_valid_at_fkey" on table "child" +step s1c: COMMIT; + +starting permutation: s1ser s2ser s1del s2ins s1c s2c +step s1ser: BEGIN ISOLATION LEVEL SERIALIZABLE; +step s2ser: BEGIN ISOLATION LEVEL SERIALIZABLE; +step s1del: DELETE FROM parent WHERE id = '[1,2)'; +step s2ins: + INSERT INTO child VALUES ('[1,2)', '[2020-01-01,2025-01-01)', '[1,2)'); + +step s1c: COMMIT; +step s2ins: <... completed> +ERROR: could not serialize access due to concurrent update +step s2c: COMMIT; + +starting permutation: s1rc s2rc s2ins s1upok s2c s1c +step s1rc: BEGIN ISOLATION LEVEL READ COMMITTED; +step s2rc: BEGIN ISOLATION LEVEL READ COMMITTED; +step s2ins: + INSERT INTO child VALUES ('[1,2)', '[2020-01-01,2025-01-01)', '[1,2)'); + +step s1upok: UPDATE parent SET valid_at = '[2020-01-01,2026-01-01)' WHERE id = '[1,2)'; +step s2c: COMMIT; +step s1upok: <... completed> +step s1c: COMMIT; + +starting permutation: s1rc s2rc s1upok s2ins s1c s2c +step s1rc: BEGIN ISOLATION LEVEL READ COMMITTED; +step s2rc: BEGIN ISOLATION LEVEL READ COMMITTED; +step s1upok: UPDATE parent SET valid_at = '[2020-01-01,2026-01-01)' WHERE id = '[1,2)'; +step s2ins: + INSERT INTO child VALUES ('[1,2)', '[2020-01-01,2025-01-01)', '[1,2)'); + +step s1c: COMMIT; +step s2ins: <... completed> +step s2c: COMMIT; + +starting permutation: s1rr s2rr s2ins s1upok s2c s1c +step s1rr: BEGIN ISOLATION LEVEL REPEATABLE READ; +step s2rr: BEGIN ISOLATION LEVEL REPEATABLE READ; +step s2ins: + INSERT INTO child VALUES ('[1,2)', '[2020-01-01,2025-01-01)', '[1,2)'); + +step s1upok: UPDATE parent SET valid_at = '[2020-01-01,2026-01-01)' WHERE id = '[1,2)'; +step s2c: COMMIT; +step s1upok: <... completed> +step s1c: COMMIT; + +starting permutation: s1rr s2rr s1upok s2ins s1c s2c +step s1rr: BEGIN ISOLATION LEVEL REPEATABLE READ; +step s2rr: BEGIN ISOLATION LEVEL REPEATABLE READ; +step s1upok: UPDATE parent SET valid_at = '[2020-01-01,2026-01-01)' WHERE id = '[1,2)'; +step s2ins: + INSERT INTO child VALUES ('[1,2)', '[2020-01-01,2025-01-01)', '[1,2)'); + +step s1c: COMMIT; +step s2ins: <... completed> +ERROR: could not serialize access due to concurrent update +step s2c: COMMIT; + +starting permutation: s1ser s2ser s2ins s1upok s2c s1c +step s1ser: BEGIN ISOLATION LEVEL SERIALIZABLE; +step s2ser: BEGIN ISOLATION LEVEL SERIALIZABLE; +step s2ins: + INSERT INTO child VALUES ('[1,2)', '[2020-01-01,2025-01-01)', '[1,2)'); + +step s1upok: UPDATE parent SET valid_at = '[2020-01-01,2026-01-01)' WHERE id = '[1,2)'; +step s2c: COMMIT; +step s1upok: <... completed> +step s1c: COMMIT; + +starting permutation: s1ser s2ser s1upok s2ins s1c s2c +step s1ser: BEGIN ISOLATION LEVEL SERIALIZABLE; +step s2ser: BEGIN ISOLATION LEVEL SERIALIZABLE; +step s1upok: UPDATE parent SET valid_at = '[2020-01-01,2026-01-01)' WHERE id = '[1,2)'; +step s2ins: + INSERT INTO child VALUES ('[1,2)', '[2020-01-01,2025-01-01)', '[1,2)'); + +step s1c: COMMIT; +step s2ins: <... completed> +ERROR: could not serialize access due to concurrent update +step s2c: COMMIT; + +starting permutation: s1rc s2rc s2ins s1upbad s2c s1c +step s1rc: BEGIN ISOLATION LEVEL READ COMMITTED; +step s2rc: BEGIN ISOLATION LEVEL READ COMMITTED; +step s2ins: + INSERT INTO child VALUES ('[1,2)', '[2020-01-01,2025-01-01)', '[1,2)'); + +step s1upbad: UPDATE parent SET valid_at = '[2020-01-01,2024-01-01)' WHERE id = '[1,2)'; +step s2c: COMMIT; +step s1upbad: <... completed> +ERROR: update or delete on table "parent" violates foreign key constraint "child_parent_id_valid_at_fkey" on table "child" +step s1c: COMMIT; + +starting permutation: s1rc s2rc s1upbad s2ins s1c s2c +step s1rc: BEGIN ISOLATION LEVEL READ COMMITTED; +step s2rc: BEGIN ISOLATION LEVEL READ COMMITTED; +step s1upbad: UPDATE parent SET valid_at = '[2020-01-01,2024-01-01)' WHERE id = '[1,2)'; +step s2ins: + INSERT INTO child VALUES ('[1,2)', '[2020-01-01,2025-01-01)', '[1,2)'); + +step s1c: COMMIT; +step s2ins: <... completed> +ERROR: insert or update on table "child" violates foreign key constraint "child_parent_id_valid_at_fkey" +step s2c: COMMIT; + +starting permutation: s1rr s2rr s2ins s1upbad s2c s1c +step s1rr: BEGIN ISOLATION LEVEL REPEATABLE READ; +step s2rr: BEGIN ISOLATION LEVEL REPEATABLE READ; +step s2ins: + INSERT INTO child VALUES ('[1,2)', '[2020-01-01,2025-01-01)', '[1,2)'); + +step s1upbad: UPDATE parent SET valid_at = '[2020-01-01,2024-01-01)' WHERE id = '[1,2)'; +step s2c: COMMIT; +step s1upbad: <... completed> +ERROR: update or delete on table "parent" violates foreign key constraint "child_parent_id_valid_at_fkey" on table "child" +step s1c: COMMIT; + +starting permutation: s1rr s2rr s1upbad s2ins s1c s2c +step s1rr: BEGIN ISOLATION LEVEL REPEATABLE READ; +step s2rr: BEGIN ISOLATION LEVEL REPEATABLE READ; +step s1upbad: UPDATE parent SET valid_at = '[2020-01-01,2024-01-01)' WHERE id = '[1,2)'; +step s2ins: + INSERT INTO child VALUES ('[1,2)', '[2020-01-01,2025-01-01)', '[1,2)'); + +step s1c: COMMIT; +step s2ins: <... completed> +ERROR: could not serialize access due to concurrent update +step s2c: COMMIT; + +starting permutation: s1ser s2ser s2ins s1upbad s2c s1c +step s1ser: BEGIN ISOLATION LEVEL SERIALIZABLE; +step s2ser: BEGIN ISOLATION LEVEL SERIALIZABLE; +step s2ins: + INSERT INTO child VALUES ('[1,2)', '[2020-01-01,2025-01-01)', '[1,2)'); + +step s1upbad: UPDATE parent SET valid_at = '[2020-01-01,2024-01-01)' WHERE id = '[1,2)'; +step s2c: COMMIT; +step s1upbad: <... completed> +ERROR: update or delete on table "parent" violates foreign key constraint "child_parent_id_valid_at_fkey" on table "child" +step s1c: COMMIT; + +starting permutation: s1ser s2ser s1upbad s2ins s1c s2c +step s1ser: BEGIN ISOLATION LEVEL SERIALIZABLE; +step s2ser: BEGIN ISOLATION LEVEL SERIALIZABLE; +step s1upbad: UPDATE parent SET valid_at = '[2020-01-01,2024-01-01)' WHERE id = '[1,2)'; +step s2ins: + INSERT INTO child VALUES ('[1,2)', '[2020-01-01,2025-01-01)', '[1,2)'); + +step s1c: COMMIT; +step s2ins: <... completed> +ERROR: could not serialize access due to concurrent update +step s2c: COMMIT; diff --git a/src/test/isolation/isolation_schedule b/src/test/isolation/isolation_schedule index 130525dfd3d68..5afae33d37036 100644 --- a/src/test/isolation/isolation_schedule +++ b/src/test/isolation/isolation_schedule @@ -37,6 +37,7 @@ test: fk-partitioned-1 test: fk-partitioned-2 test: fk-snapshot test: fk-snapshot-2 +test: fk-snapshot-3 test: subxid-overflow test: eval-plan-qual test: eval-plan-qual-trigger diff --git a/src/test/isolation/specs/fk-snapshot-3.spec b/src/test/isolation/specs/fk-snapshot-3.spec new file mode 100644 index 0000000000000..90075024f5cc0 --- /dev/null +++ b/src/test/isolation/specs/fk-snapshot-3.spec @@ -0,0 +1,82 @@ +# RI Trigger test +# +# Test C-based temporal referential integrity enforcement. +# Under REPEATABLE READ we need some snapshot trickery in C, +# or we would permit things that violate referential integrity. + +setup +{ + CREATE TABLE parent ( + id int4range NOT NULL, + valid_at daterange NOT NULL, + PRIMARY KEY (id, valid_at WITHOUT OVERLAPS)); + CREATE TABLE child ( + id int4range NOT NULL, + valid_at daterange NOT NULL, + parent_id int4range, + FOREIGN KEY (parent_id, PERIOD valid_at) REFERENCES parent); + INSERT INTO parent VALUES ('[1,2)', '[2020-01-01,2030-01-01)'); +} + +teardown { DROP TABLE parent, child; } + +session s1 +step s1rc { BEGIN ISOLATION LEVEL READ COMMITTED; } +step s1rr { BEGIN ISOLATION LEVEL REPEATABLE READ; } +step s1ser { BEGIN ISOLATION LEVEL SERIALIZABLE; } +step s1del { DELETE FROM parent WHERE id = '[1,2)'; } +step s1upok { UPDATE parent SET valid_at = '[2020-01-01,2026-01-01)' WHERE id = '[1,2)'; } +step s1upbad { UPDATE parent SET valid_at = '[2020-01-01,2024-01-01)' WHERE id = '[1,2)'; } +step s1c { COMMIT; } + +session s2 +step s2rc { BEGIN ISOLATION LEVEL READ COMMITTED; } +step s2rr { BEGIN ISOLATION LEVEL REPEATABLE READ; } +step s2ser { BEGIN ISOLATION LEVEL SERIALIZABLE; } +step s2ins { + INSERT INTO child VALUES ('[1,2)', '[2020-01-01,2025-01-01)', '[1,2)'); +} +step s2c { COMMIT; } + +# Violates referential integrity unless we use an up-to-date crosscheck snapshot: +permutation s1rr s2rr s2ins s1del s2c s1c + +# Raises a can't-serialize exception +# when the INSERT trigger does SELECT FOR KEY SHARE: +permutation s1rr s2rr s1del s2ins s1c s2c + +# Test the same scenarios in READ COMMITTED: +# A crosscheck snapshot is not required here. +permutation s1rc s2rc s2ins s1del s2c s1c +permutation s1rc s2rc s1del s2ins s1c s2c + +# Test the same scenarios in SERIALIZABLE: +# We should report the FK violation: +permutation s1ser s2ser s2ins s1del s2c s1c +# We raise a concurrent update error +# which is good enough: +permutation s1ser s2ser s1del s2ins s1c s2c + +# Also check updating the valid time (without violating RI): + +# ...with READ COMMITED: +permutation s1rc s2rc s2ins s1upok s2c s1c +permutation s1rc s2rc s1upok s2ins s1c s2c +# ...with REPEATABLE READ: +permutation s1rr s2rr s2ins s1upok s2c s1c +permutation s1rr s2rr s1upok s2ins s1c s2c +# ...with SERIALIZABLE: +permutation s1ser s2ser s2ins s1upok s2c s1c +permutation s1ser s2ser s1upok s2ins s1c s2c + +# Also check updating the valid time (while violating RI): + +# ...with READ COMMITED: +permutation s1rc s2rc s2ins s1upbad s2c s1c +permutation s1rc s2rc s1upbad s2ins s1c s2c +# ...with REPEATABLE READ: +permutation s1rr s2rr s2ins s1upbad s2c s1c +permutation s1rr s2rr s1upbad s2ins s1c s2c +# ...with SERIALIZABLE: +permutation s1ser s2ser s2ins s1upbad s2c s1c +permutation s1ser s2ser s1upbad s2ins s1c s2c From 1d5800019f68d81139021b8bab159b8578fcaa2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Herrera?= Date: Thu, 11 Sep 2025 19:49:57 +0200 Subject: [PATCH 20/32] Improve comment about snapshot macros The comment mistakenly had "the others" for "the other", but this commit also reorders the comment so it matches the macros below. Now we describe the levels in increasing strictness. In addition, it seems easier to follow if we introduce one level at a time, rather than describing two, followed by "the other" (and then jumping back to one of the first two). Finally, reword the sentence about the purpose of the macros, which was slightly off-point. Author: Paul Jungwirth Reviewed-by: Rustam ALLAKOV Reviewed-by: Dean Rasheed Reviewed-by: Robert Haas Discussion: https://postgr.es/m/CA+renyUp=xja80rBaB6NpY3RRdi750y046x28bo_xg29zKY72Q@mail.gmail.com --- src/include/access/xact.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/include/access/xact.h b/src/include/access/xact.h index b2bc10ee04196..4528e51829e61 100644 --- a/src/include/access/xact.h +++ b/src/include/access/xact.h @@ -43,10 +43,11 @@ extern PGDLLIMPORT int XactIsoLevel; /* * We implement three isolation levels internally. - * The two stronger ones use one snapshot per database transaction; - * the others use one snapshot per statement. - * Serializable uses predicate locks in addition to snapshots. - * These macros should be used to check which isolation level is selected. + * The weakest uses one snapshot per statement; + * the two stronger levels use one snapshot per database transaction. + * Serializable uses predicate locks in addition to the snapshot. + * These macros can be used to determine which implementation to use + * depending on the prevailing serialization level. */ #define IsolationUsesXactSnapshot() (XactIsoLevel >= XACT_REPEATABLE_READ) #define IsolationIsSerializable() (XactIsoLevel == XACT_SERIALIZABLE) From a0b99fc12203fa179d5b4218a21de30e0e91a7b8 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Thu, 11 Sep 2025 17:11:54 -0400 Subject: [PATCH 21/32] Report the correct is_temporary flag for column defaults. pg_event_trigger_dropped_objects() would report a column default object with is_temporary = false, even if it belongs to a temporary table. This seems clearly wrong, so adjust it to report the table's temp-ness. While here, refactor EventTriggerSQLDropAddObject to make its handling of namespace objects less messy and avoid duplication of the schema-lookup code. And add some explicit test coverage of dropped-object reports for dependencies of temp tables. Back-patch to v15. The bug exists further back, but the GetAttrDefaultColumnAddress function this patch depends on does not, and it doesn't seem worth adjusting it to cope with the older code. Author: Antoine Violin Co-authored-by: Tom Lane Discussion: https://postgr.es/m/CAFjUV9x3-hv0gihf+CtUc-1it0hh7Skp9iYFhMS7FJjtAeAptA@mail.gmail.com Backpatch-through: 15 --- src/backend/commands/event_trigger.c | 111 ++++++++++++++------ src/test/regress/expected/event_trigger.out | 37 +++++++ src/test/regress/sql/event_trigger.sql | 20 ++++ 3 files changed, 136 insertions(+), 32 deletions(-) diff --git a/src/backend/commands/event_trigger.c b/src/backend/commands/event_trigger.c index 631fb0525f1e7..fcdcba009d4e3 100644 --- a/src/backend/commands/event_trigger.c +++ b/src/backend/commands/event_trigger.c @@ -21,6 +21,7 @@ #include "catalog/dependency.h" #include "catalog/indexing.h" #include "catalog/objectaccess.h" +#include "catalog/pg_attrdef.h" #include "catalog/pg_authid.h" #include "catalog/pg_auth_members.h" #include "catalog/pg_database.h" @@ -109,6 +110,8 @@ static Oid insert_event_trigger_tuple(const char *trigname, const char *eventnam static void validate_ddl_tags(const char *filtervar, List *taglist); static void validate_table_rewrite_tags(const char *filtervar, List *taglist); static void EventTriggerInvoke(List *fn_oid_list, EventTriggerData *trigdata); +static bool obtain_object_name_namespace(const ObjectAddress *object, + SQLDropObject *obj); static const char *stringify_grant_objtype(ObjectType objtype); static const char *stringify_adefprivs_objtype(ObjectType objtype); static void SetDatabaseHasLoginEventTriggers(void); @@ -1280,12 +1283,6 @@ EventTriggerSQLDropAddObject(const ObjectAddress *object, bool original, bool no Assert(EventTriggerSupportsObject(object)); - /* don't report temp schemas except my own */ - if (object->classId == NamespaceRelationId && - (isAnyTempNamespace(object->objectId) && - !isTempNamespace(object->objectId))) - return; - oldcxt = MemoryContextSwitchTo(currentEventTriggerState->cxt); obj = palloc0(sizeof(SQLDropObject)); @@ -1293,21 +1290,88 @@ EventTriggerSQLDropAddObject(const ObjectAddress *object, bool original, bool no obj->original = original; obj->normal = normal; + if (object->classId == NamespaceRelationId) + { + /* Special handling is needed for temp namespaces */ + if (isTempNamespace(object->objectId)) + obj->istemp = true; + else if (isAnyTempNamespace(object->objectId)) + { + /* don't report temp schemas except my own */ + pfree(obj); + MemoryContextSwitchTo(oldcxt); + return; + } + } + else if (object->classId == AttrDefaultRelationId) + { + /* We treat a column default as temp if its table is temp */ + ObjectAddress colobject; + + colobject = GetAttrDefaultColumnAddress(object->objectId); + if (OidIsValid(colobject.objectId)) + { + colobject.objectSubId = 0; /* convert to table reference */ + if (!obtain_object_name_namespace(&colobject, obj)) + { + pfree(obj); + MemoryContextSwitchTo(oldcxt); + return; + } + } + } + else + { + /* Generic handling for all other object classes */ + if (!obtain_object_name_namespace(object, obj)) + { + /* don't report temp objects except my own */ + pfree(obj); + MemoryContextSwitchTo(oldcxt); + return; + } + } + + /* object identity, objname and objargs */ + obj->objidentity = + getObjectIdentityParts(&obj->address, &obj->addrnames, &obj->addrargs, + false); + + /* object type */ + obj->objecttype = getObjectTypeDescription(&obj->address, false); + + slist_push_head(&(currentEventTriggerState->SQLDropList), &obj->next); + + MemoryContextSwitchTo(oldcxt); +} + +/* + * Fill obj->objname, obj->schemaname, and obj->istemp based on object. + * + * Returns true if this object should be reported, false if it should + * be ignored because it is a temporary object of another session. + */ +static bool +obtain_object_name_namespace(const ObjectAddress *object, SQLDropObject *obj) +{ /* * Obtain schema names from the object's catalog tuple, if one exists; * this lets us skip objects in temp schemas. We trust that * ObjectProperty contains all object classes that can be * schema-qualified. + * + * Currently, this function does nothing for object classes that are not + * in ObjectProperty, but we might sometime add special cases for that. */ if (is_objectclass_supported(object->classId)) { Relation catalog; HeapTuple tuple; - catalog = table_open(obj->address.classId, AccessShareLock); + catalog = table_open(object->classId, AccessShareLock); tuple = get_catalog_object_by_oid(catalog, get_object_attnum_oid(object->classId), - obj->address.objectId); + object->objectId); if (tuple) { @@ -1315,7 +1379,7 @@ EventTriggerSQLDropAddObject(const ObjectAddress *object, bool original, bool no Datum datum; bool isnull; - attnum = get_object_attnum_namespace(obj->address.classId); + attnum = get_object_attnum_namespace(object->classId); if (attnum != InvalidAttrNumber) { datum = heap_getattr(tuple, attnum, @@ -1333,10 +1397,9 @@ EventTriggerSQLDropAddObject(const ObjectAddress *object, bool original, bool no } else if (isAnyTempNamespace(namespaceId)) { - pfree(obj); + /* no need to fill any fields of *obj */ table_close(catalog, AccessShareLock); - MemoryContextSwitchTo(oldcxt); - return; + return false; } else { @@ -1346,10 +1409,10 @@ EventTriggerSQLDropAddObject(const ObjectAddress *object, bool original, bool no } } - if (get_object_namensp_unique(obj->address.classId) && - obj->address.objectSubId == 0) + if (get_object_namensp_unique(object->classId) && + object->objectSubId == 0) { - attnum = get_object_attnum_name(obj->address.classId); + attnum = get_object_attnum_name(object->classId); if (attnum != InvalidAttrNumber) { datum = heap_getattr(tuple, attnum, @@ -1362,24 +1425,8 @@ EventTriggerSQLDropAddObject(const ObjectAddress *object, bool original, bool no table_close(catalog, AccessShareLock); } - else - { - if (object->classId == NamespaceRelationId && - isTempNamespace(object->objectId)) - obj->istemp = true; - } - /* object identity, objname and objargs */ - obj->objidentity = - getObjectIdentityParts(&obj->address, &obj->addrnames, &obj->addrargs, - false); - - /* object type */ - obj->objecttype = getObjectTypeDescription(&obj->address, false); - - slist_push_head(&(currentEventTriggerState->SQLDropList), &obj->next); - - MemoryContextSwitchTo(oldcxt); + return true; } /* diff --git a/src/test/regress/expected/event_trigger.out b/src/test/regress/expected/event_trigger.out index 7b2198eac6f20..0e090cbc37500 100644 --- a/src/test/regress/expected/event_trigger.out +++ b/src/test/regress/expected/event_trigger.out @@ -476,6 +476,43 @@ NOTICE: NORMAL: orig=f normal=t istemp=f type=table identity=evttrig.part_10_15 NOTICE: NORMAL: orig=f normal=t istemp=f type=table identity=evttrig.part_15_20 name={evttrig,part_15_20} args={} DROP TABLE a_temp_tbl; NOTICE: NORMAL: orig=t normal=f istemp=t type=table identity=pg_temp.a_temp_tbl name={pg_temp,a_temp_tbl} args={} +-- check unfiltered results, too +CREATE OR REPLACE FUNCTION event_trigger_report_dropped() + RETURNS event_trigger + LANGUAGE plpgsql +AS $$ +DECLARE r record; +BEGIN + FOR r IN SELECT * from pg_event_trigger_dropped_objects() + LOOP + RAISE NOTICE 'DROP: orig=% normal=% istemp=% type=% identity=% name=% args=%', + r.original, r.normal, r.is_temporary, r.object_type, + r.object_identity, r.address_names, r.address_args; + END LOOP; +END; $$; +NOTICE: END: command_tag=CREATE FUNCTION type=function identity=public.event_trigger_report_dropped() +CREATE TABLE evtrg_nontemp_table (f1 int primary key, f2 int default 42); +NOTICE: END: command_tag=CREATE TABLE type=table identity=public.evtrg_nontemp_table +NOTICE: END: command_tag=CREATE INDEX type=index identity=public.evtrg_nontemp_table_pkey +DROP TABLE evtrg_nontemp_table; +NOTICE: DROP: orig=t normal=f istemp=f type=table identity=public.evtrg_nontemp_table name={public,evtrg_nontemp_table} args={} +NOTICE: DROP: orig=f normal=f istemp=f type=type identity=public.evtrg_nontemp_table name={public.evtrg_nontemp_table} args={} +NOTICE: DROP: orig=f normal=f istemp=f type=type identity=public.evtrg_nontemp_table[] name={public.evtrg_nontemp_table[]} args={} +NOTICE: DROP: orig=f normal=f istemp=f type=default value identity=for public.evtrg_nontemp_table.f2 name={public,evtrg_nontemp_table,f2} args={} +NOTICE: DROP: orig=f normal=f istemp=f type=table constraint identity=evtrg_nontemp_table_f1_not_null on public.evtrg_nontemp_table name={public,evtrg_nontemp_table,evtrg_nontemp_table_f1_not_null} args={} +NOTICE: DROP: orig=f normal=f istemp=f type=table constraint identity=evtrg_nontemp_table_pkey on public.evtrg_nontemp_table name={public,evtrg_nontemp_table,evtrg_nontemp_table_pkey} args={} +NOTICE: DROP: orig=f normal=f istemp=f type=index identity=public.evtrg_nontemp_table_pkey name={public,evtrg_nontemp_table_pkey} args={} +CREATE TEMP TABLE a_temp_tbl (f1 int primary key, f2 int default 42); +NOTICE: END: command_tag=CREATE TABLE type=table identity=pg_temp.a_temp_tbl +NOTICE: END: command_tag=CREATE INDEX type=index identity=pg_temp.a_temp_tbl_pkey +DROP TABLE a_temp_tbl; +NOTICE: DROP: orig=t normal=f istemp=t type=table identity=pg_temp.a_temp_tbl name={pg_temp,a_temp_tbl} args={} +NOTICE: DROP: orig=f normal=f istemp=t type=type identity=pg_temp.a_temp_tbl name={pg_temp.a_temp_tbl} args={} +NOTICE: DROP: orig=f normal=f istemp=t type=type identity=pg_temp.a_temp_tbl[] name={pg_temp.a_temp_tbl[]} args={} +NOTICE: DROP: orig=f normal=f istemp=t type=default value identity=for pg_temp.a_temp_tbl.f2 name={pg_temp,a_temp_tbl,f2} args={} +NOTICE: DROP: orig=f normal=f istemp=t type=table constraint identity=a_temp_tbl_f1_not_null on pg_temp.a_temp_tbl name={pg_temp,a_temp_tbl,a_temp_tbl_f1_not_null} args={} +NOTICE: DROP: orig=f normal=f istemp=t type=table constraint identity=a_temp_tbl_pkey on pg_temp.a_temp_tbl name={pg_temp,a_temp_tbl,a_temp_tbl_pkey} args={} +NOTICE: DROP: orig=f normal=f istemp=t type=index identity=pg_temp.a_temp_tbl_pkey name={pg_temp,a_temp_tbl_pkey} args={} -- CREATE OPERATOR CLASS without FAMILY clause should report -- both CREATE OPERATOR FAMILY and CREATE OPERATOR CLASS CREATE OPERATOR CLASS evttrigopclass FOR TYPE int USING btree AS STORAGE int; diff --git a/src/test/regress/sql/event_trigger.sql b/src/test/regress/sql/event_trigger.sql index 013546b83057b..ef5978b9697aa 100644 --- a/src/test/regress/sql/event_trigger.sql +++ b/src/test/regress/sql/event_trigger.sql @@ -337,6 +337,26 @@ DROP INDEX evttrig.one_idx; DROP SCHEMA evttrig CASCADE; DROP TABLE a_temp_tbl; +-- check unfiltered results, too +CREATE OR REPLACE FUNCTION event_trigger_report_dropped() + RETURNS event_trigger + LANGUAGE plpgsql +AS $$ +DECLARE r record; +BEGIN + FOR r IN SELECT * from pg_event_trigger_dropped_objects() + LOOP + RAISE NOTICE 'DROP: orig=% normal=% istemp=% type=% identity=% name=% args=%', + r.original, r.normal, r.is_temporary, r.object_type, + r.object_identity, r.address_names, r.address_args; + END LOOP; +END; $$; + +CREATE TABLE evtrg_nontemp_table (f1 int primary key, f2 int default 42); +DROP TABLE evtrg_nontemp_table; +CREATE TEMP TABLE a_temp_tbl (f1 int primary key, f2 int default 42); +DROP TABLE a_temp_tbl; + -- CREATE OPERATOR CLASS without FAMILY clause should report -- both CREATE OPERATOR FAMILY and CREATE OPERATOR CLASS CREATE OPERATOR CLASS evttrigopclass FOR TYPE int USING btree AS STORAGE int; From ed1aad15e09d7d523f4ef413e3c4d410497c8065 Mon Sep 17 00:00:00 2001 From: Nathan Bossart Date: Thu, 11 Sep 2025 16:13:55 -0500 Subject: [PATCH 22/32] Move named LWLock tranche requests to shared memory. In EXEC_BACKEND builds, GetNamedLWLockTranche() can segfault when called outside of the postmaster process, as it might access NamedLWLockTrancheRequestArray, which won't be initialized. Given the lack of reports, this is apparently unusual, presumably because it is usually called from a shmem_startup_hook like this: mystruct = ShmemInitStruct(..., &found); if (!found) { mystruct->locks = GetNamedLWLockTranche(...); ... } This genre of shmem_startup_hook evades the aforementioned segfaults because the struct is initialized in the postmaster, so all other callers skip the !found path. We considered modifying the documentation or requiring GetNamedLWLockTranche() to be called from the postmaster, but ultimately we decided to simply move the request array to shared memory (and add it to the BackendParameters struct), thereby allowing calls outside postmaster on all platforms. Since the main shared memory segment is initialized after accepting LWLock tranche requests, postmaster builds the request array in local memory first and then copies it to shared memory later. Given the lack of reports, back-patching seems unnecessary. Reported-by: Sami Imseih Reviewed-by: Sami Imseih Discussion: https://postgr.es/m/CAA5RZ0v1_15QPg5Sqd2Qz5rh_qcsyCeHHmRDY89xVHcy2yt5BQ%40mail.gmail.com --- src/backend/postmaster/launch_backend.c | 3 +++ src/backend/storage/lmgr/lwlock.c | 31 +++++++++++++++++++++---- src/include/storage/lwlock.h | 4 ++++ 3 files changed, 33 insertions(+), 5 deletions(-) diff --git a/src/backend/postmaster/launch_backend.c b/src/backend/postmaster/launch_backend.c index a38979c50e4bb..c5ef14e1eaae8 100644 --- a/src/backend/postmaster/launch_backend.c +++ b/src/backend/postmaster/launch_backend.c @@ -101,6 +101,7 @@ typedef struct struct InjectionPointsCtl *ActiveInjectionPoints; #endif int NamedLWLockTrancheRequests; + NamedLWLockTrancheRequest *NamedLWLockTrancheRequestArray; char **LWLockTrancheNames; int *LWLockCounter; LWLockPadded *MainLWLockArray; @@ -761,6 +762,7 @@ save_backend_variables(BackendParameters *param, #endif param->NamedLWLockTrancheRequests = NamedLWLockTrancheRequests; + param->NamedLWLockTrancheRequestArray = NamedLWLockTrancheRequestArray; param->LWLockTrancheNames = LWLockTrancheNames; param->LWLockCounter = LWLockCounter; param->MainLWLockArray = MainLWLockArray; @@ -1022,6 +1024,7 @@ restore_backend_variables(BackendParameters *param) #endif NamedLWLockTrancheRequests = param->NamedLWLockTrancheRequests; + NamedLWLockTrancheRequestArray = param->NamedLWLockTrancheRequestArray; LWLockTrancheNames = param->LWLockTrancheNames; LWLockCounter = param->LWLockCounter; MainLWLockArray = param->MainLWLockArray; diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c index fcbac5213a5c0..46c82c63ca537 100644 --- a/src/backend/storage/lmgr/lwlock.c +++ b/src/backend/storage/lmgr/lwlock.c @@ -184,14 +184,13 @@ typedef struct NamedLWLockTrancheRequest int num_lwlocks; } NamedLWLockTrancheRequest; -static NamedLWLockTrancheRequest *NamedLWLockTrancheRequestArray = NULL; - /* - * NamedLWLockTrancheRequests is the valid length of the request array. This - * variable is non-static so that postmaster.c can copy them to child processes - * in EXEC_BACKEND builds. + * NamedLWLockTrancheRequests is the valid length of the request array. These + * variables are non-static so that launch_backend.c can copy them to child + * processes in EXEC_BACKEND builds. */ int NamedLWLockTrancheRequests = 0; +NamedLWLockTrancheRequest *NamedLWLockTrancheRequestArray = NULL; /* shared memory counter of registered tranches */ int *LWLockCounter = NULL; @@ -407,6 +406,14 @@ LWLockShmemSize(void) size = add_size(size, mul_size(MAX_NAMED_TRANCHES, sizeof(char *))); size = add_size(size, mul_size(MAX_NAMED_TRANCHES, NAMEDATALEN)); + /* + * Make space for named tranche requests. This is done for the benefit of + * EXEC_BACKEND builds, which otherwise wouldn't be able to call + * GetNamedLWLockTranche() outside postmaster. + */ + size = add_size(size, mul_size(NamedLWLockTrancheRequests, + sizeof(NamedLWLockTrancheRequest))); + /* Space for the LWLock array, plus room for cache line alignment. */ size = add_size(size, LWLOCK_PADDED_SIZE); size = add_size(size, mul_size(numLocks, sizeof(LWLockPadded))); @@ -443,6 +450,20 @@ CreateLWLocks(void) ptr += NAMEDATALEN; } + /* + * Move named tranche requests to shared memory. This is done for the + * benefit of EXEC_BACKEND builds, which otherwise wouldn't be able to + * call GetNamedLWLockTranche() outside postmaster. + */ + if (NamedLWLockTrancheRequests > 0) + { + memcpy(ptr, NamedLWLockTrancheRequestArray, + NamedLWLockTrancheRequests * sizeof(NamedLWLockTrancheRequest)); + pfree(NamedLWLockTrancheRequestArray); + NamedLWLockTrancheRequestArray = (NamedLWLockTrancheRequest *) ptr; + ptr += NamedLWLockTrancheRequests * sizeof(NamedLWLockTrancheRequest); + } + /* Ensure desired alignment of LWLock array */ ptr += LWLOCK_PADDED_SIZE - ((uintptr_t) ptr) % LWLOCK_PADDED_SIZE; MainLWLockArray = (LWLockPadded *) ptr; diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h index 0e9cf81a4c766..8e0d0d233b48f 100644 --- a/src/include/storage/lwlock.h +++ b/src/include/storage/lwlock.h @@ -73,8 +73,12 @@ typedef union LWLockPadded extern PGDLLIMPORT LWLockPadded *MainLWLockArray; +/* forward declaration of private type for use only by lwlock.c */ +typedef struct NamedLWLockTrancheRequest NamedLWLockTrancheRequest; + extern PGDLLIMPORT char **LWLockTrancheNames; extern PGDLLIMPORT int NamedLWLockTrancheRequests; +extern PGDLLIMPORT NamedLWLockTrancheRequest *NamedLWLockTrancheRequestArray; extern PGDLLIMPORT int *LWLockCounter; /* From 306dd13079ed616c414c9411c5deadffea273266 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Fri, 12 Sep 2025 09:55:16 +0900 Subject: [PATCH 23/32] Remove whitespace in comment of pg_stat_statements.c Introduced by 6b4d23feef6e, spotted while reading this area of the code. --- contrib/pg_stat_statements/pg_stat_statements.c | 1 - 1 file changed, 1 deletion(-) diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c index 1cb368c8590ba..0bb0f9333998b 100644 --- a/contrib/pg_stat_statements/pg_stat_statements.c +++ b/contrib/pg_stat_statements/pg_stat_statements.c @@ -139,7 +139,6 @@ typedef enum pgssStoreKind * If you add a new key to this struct, make sure to teach pgss_store() to * zero the padding bytes. Otherwise, things will break, because pgss_hash is * created using HASH_BLOBS, and thus tag_hash is used to hash this. - */ typedef struct pgssHashKey { From 528dadf691df3023fbb0bd71da5c6087c2d49d6a Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Fri, 12 Sep 2025 10:29:02 +0900 Subject: [PATCH 24/32] Add more information for WAL records of hash index AMs hashdesc.c was missing a couple of fields in its record descriptions, as of: - is_prev_bucket_same_wrt for SQUEEZE_PAGE. - procid for INIT_META_PAGE. - old_bucket_flag and new_bucket_flag for SPLIT_ALLOCATE_PAGE. The author has noted the first hole, and I have spotted the others while double-checking this area of the code. Note that the only data missing now are the offsets stored in VACUUM_ONE_PAGE. We could perhaps add them, if somebody sees value in this data, even if it makes the output larger. These are discarded here. Author: Kirill Reshke Discussion: https://postgr.es/m/CALdSSPjc-OVwtZH0Xrkvg7n=2ZwdbMJzqrm_ed_CfjiAzuKVGg@mail.gmail.com --- src/backend/access/rmgrdesc/hashdesc.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/backend/access/rmgrdesc/hashdesc.c b/src/backend/access/rmgrdesc/hashdesc.c index 75f43a9152071..2ee5332452f39 100644 --- a/src/backend/access/rmgrdesc/hashdesc.c +++ b/src/backend/access/rmgrdesc/hashdesc.c @@ -28,8 +28,10 @@ hash_desc(StringInfo buf, XLogReaderState *record) { xl_hash_init_meta_page *xlrec = (xl_hash_init_meta_page *) rec; - appendStringInfo(buf, "num_tuples %g, fillfactor %d", - xlrec->num_tuples, xlrec->ffactor); + appendStringInfo(buf, "num_tuples %g, procid %u, fillfactor %d", + xlrec->num_tuples, + xlrec->procid, + xlrec->ffactor); break; } case XLOG_HASH_INIT_BITMAP_PAGE: @@ -58,8 +60,10 @@ hash_desc(StringInfo buf, XLogReaderState *record) { xl_hash_split_allocate_page *xlrec = (xl_hash_split_allocate_page *) rec; - appendStringInfo(buf, "new_bucket %u, meta_page_masks_updated %c, issplitpoint_changed %c", + appendStringInfo(buf, "new_bucket %u, old_bucket_flag %u, new_bucket_flag %u, meta_page_masks_updated %c, issplitpoint_changed %c", xlrec->new_bucket, + xlrec->old_bucket_flag, + xlrec->new_bucket_flag, (xlrec->flags & XLH_SPLIT_META_UPDATE_MASKS) ? 'T' : 'F', (xlrec->flags & XLH_SPLIT_META_UPDATE_SPLITPOINT) ? 'T' : 'F'); break; @@ -85,11 +89,12 @@ hash_desc(StringInfo buf, XLogReaderState *record) { xl_hash_squeeze_page *xlrec = (xl_hash_squeeze_page *) rec; - appendStringInfo(buf, "prevblkno %u, nextblkno %u, ntups %d, is_primary %c", + appendStringInfo(buf, "prevblkno %u, nextblkno %u, ntups %d, is_primary %c, is_previous %c", xlrec->prevblkno, xlrec->nextblkno, xlrec->ntups, - xlrec->is_prim_bucket_same_wrt ? 'T' : 'F'); + xlrec->is_prim_bucket_same_wrt ? 'T' : 'F', + xlrec->is_prev_bucket_same_wrt ? 'T' : 'F'); break; } case XLOG_HASH_DELETE: From 2d756ebbe857e3d395d18350bf232300ebd23981 Mon Sep 17 00:00:00 2001 From: Richard Guo Date: Fri, 12 Sep 2025 11:12:19 +0900 Subject: [PATCH 25/32] Fix misuse of Relids for storing attribute numbers The typedef Relids (Bitmapset *) is intended to represent set of relation identifiers, but was incorrectly used in several places to store sets of attribute numbers. This is my oversight in e2debb643. Fix that by replacing such usages with Bitmapset * to reflect the correct semantics. Author: Junwang Zhao Reviewed-by: Tender Wang Reviewed-by: Richard Guo Discussion: https://postgr.es/m/CAEG8a3LJhp_xriXf39iCz0TsK+M-2biuhDhpLC6Baxw8+ZYT3A@mail.gmail.com --- src/backend/optimizer/util/clauses.c | 2 +- src/backend/optimizer/util/plancat.c | 6 +++--- src/include/optimizer/plancat.h | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c index 6f0b338d2cdf1..ae0bd073ca917 100644 --- a/src/backend/optimizer/util/clauses.c +++ b/src/backend/optimizer/util/clauses.c @@ -4203,7 +4203,7 @@ simplify_function(Oid funcid, Oid result_type, int32 result_typmod, bool var_is_nonnullable(PlannerInfo *root, Var *var, bool use_rel_info) { - Relids notnullattnums = NULL; + Bitmapset *notnullattnums = NULL; Assert(IsA(var, Var)); diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index 4536bdd6cb4d7..572d626b2c4d2 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -62,7 +62,7 @@ get_relation_info_hook_type get_relation_info_hook = NULL; typedef struct NotnullHashEntry { Oid relid; /* OID of the relation */ - Relids notnullattnums; /* attnums of NOT NULL columns */ + Bitmapset *notnullattnums; /* attnums of NOT NULL columns */ } NotnullHashEntry; @@ -683,7 +683,7 @@ get_relation_notnullatts(PlannerInfo *root, Relation relation) Oid relid = RelationGetRelid(relation); NotnullHashEntry *hentry; bool found; - Relids notnullattnums = NULL; + Bitmapset *notnullattnums = NULL; /* bail out if the relation has no not-null constraints */ if (relation->rd_att->constr == NULL || @@ -750,7 +750,7 @@ get_relation_notnullatts(PlannerInfo *root, Relation relation) * Searches the hash table and returns the column not-null constraint * information for a given relation. */ -Relids +Bitmapset * find_relation_notnullatts(PlannerInfo *root, Oid relid) { NotnullHashEntry *hentry; diff --git a/src/include/optimizer/plancat.h b/src/include/optimizer/plancat.h index dd8f2cd157f6f..9610707683235 100644 --- a/src/include/optimizer/plancat.h +++ b/src/include/optimizer/plancat.h @@ -30,7 +30,7 @@ extern void get_relation_info(PlannerInfo *root, Oid relationObjectId, extern void get_relation_notnullatts(PlannerInfo *root, Relation relation); -extern Relids find_relation_notnullatts(PlannerInfo *root, Oid relid); +extern Bitmapset *find_relation_notnullatts(PlannerInfo *root, Oid relid); extern List *infer_arbiter_indexes(PlannerInfo *root); From e92677e86333562b8dd4972083c8a1abf985d90d Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Fri, 12 Sep 2025 07:27:48 +0200 Subject: [PATCH 26/32] Silence compiler warnings on clang 21 Clang 21 shows some new compiler warnings, for example: warning: variable 'dstsize' is uninitialized when passed as a const pointer argument here [-Wuninitialized-const-pointer] The fix is to initialize the variables when they are defined. This is similar to, for example, the existing situation in gistKeyIsEQ(). Discussion: https://www.postgresql.org/message-id/flat/6604ad6e-5934-43ac-8590-15113d6ae4b1%40eisentraut.org --- src/backend/access/common/toast_internals.c | 2 +- src/backend/access/gist/gistutil.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/backend/access/common/toast_internals.c b/src/backend/access/common/toast_internals.c index a1d0eed8953ba..75e908c2e80a7 100644 --- a/src/backend/access/common/toast_internals.c +++ b/src/backend/access/common/toast_internals.c @@ -135,7 +135,7 @@ toast_save_datum(Relation rel, Datum value, char data[TOAST_MAX_CHUNK_SIZE + VARHDRSZ]; /* ensure union is aligned well enough: */ int32 align_it; - } chunk_data; + } chunk_data = {0}; /* silence compiler warning */ int32 chunk_size; int32 chunk_seq = 0; char *data_p; diff --git a/src/backend/access/gist/gistutil.c b/src/backend/access/gist/gistutil.c index c0aa7d0222f39..cdc4ab3151be1 100644 --- a/src/backend/access/gist/gistutil.c +++ b/src/backend/access/gist/gistutil.c @@ -157,7 +157,7 @@ gistMakeUnionItVec(GISTSTATE *giststate, IndexTuple *itvec, int len, { int i; GistEntryVector *evec; - int attrsize; + int attrsize = 0; /* silence compiler warning */ evec = (GistEntryVector *) palloc((len + 2) * sizeof(GISTENTRY) + GEVHDRSZ); @@ -242,7 +242,7 @@ gistMakeUnionKey(GISTSTATE *giststate, int attno, char padding[2 * sizeof(GISTENTRY) + GEVHDRSZ]; } storage; GistEntryVector *evec = &storage.gev; - int dstsize; + int dstsize = 0; /* silence compiler warning */ evec->n = 2; From 25f36066dd2abde74faa12f08e5e498a95128cd0 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Thu, 11 Sep 2025 11:55:29 +0200 Subject: [PATCH 27/32] Remove traces of support for Sun Studio compiler Per discussion, this compiler suite is no longer maintained, and it has not been able to compile PostgreSQL since at least PostgreSQL 17. This removes all the remaining support code for this compiler. Note that the Solaris operating system continues to be supported, but using GCC as the compiler. Reviewed-by: Andres Freund Reviewed-by: Tom Lane Discussion: https://www.postgresql.org/message-id/flat/a0f817ee-fb86-483a-8a14-b6f7f5991b6e%40eisentraut.org --- config/c-compiler.m4 | 2 +- configure | 53 +--------- configure.ac | 31 +----- doc/src/sgml/dfunc.sgml | 9 +- doc/src/sgml/installation.sgml | 60 +----------- meson.build | 2 +- src/Makefile.global.in | 4 - src/backend/port/Makefile | 12 --- src/backend/port/tas/sunstudio_sparc.s | 53 ---------- src/backend/port/tas/sunstudio_x86.s | 43 -------- src/backend/storage/lmgr/Makefile | 6 +- src/include/c.h | 15 ++- src/include/port/atomics.h | 2 - src/include/port/atomics/arch-x86.h | 2 +- src/include/port/atomics/generic-sunpro.h | 113 ---------------------- src/include/port/solaris.h | 21 ---- src/include/storage/s_lock.h | 24 +---- src/makefiles/meson.build | 4 +- src/template/linux | 23 ----- src/template/solaris | 29 +----- src/tools/pginclude/headerscheck | 1 - 21 files changed, 24 insertions(+), 485 deletions(-) delete mode 100644 src/backend/port/tas/sunstudio_sparc.s delete mode 100644 src/backend/port/tas/sunstudio_x86.s delete mode 100644 src/include/port/atomics/generic-sunpro.h diff --git a/config/c-compiler.m4 b/config/c-compiler.m4 index da40bd6a64755..236a59e8536c2 100644 --- a/config/c-compiler.m4 +++ b/config/c-compiler.m4 @@ -83,7 +83,7 @@ if test x"$pgac_cv__128bit_int" = xyes ; then AC_CACHE_CHECK([for __int128 alignment bug], [pgac_cv__128bit_int_bug], [AC_RUN_IFELSE([AC_LANG_PROGRAM([ /* This must match the corresponding code in c.h: */ -#if defined(__GNUC__) || defined(__SUNPRO_C) +#if defined(__GNUC__) #define pg_attribute_aligned(a) __attribute__((aligned(a))) #elif defined(_MSC_VER) #define pg_attribute_aligned(a) __declspec(align(a)) diff --git a/configure b/configure index 39c68161ceced..22cd866147b96 100755 --- a/configure +++ b/configure @@ -739,7 +739,6 @@ PKG_CONFIG_LIBDIR PKG_CONFIG_PATH PKG_CONFIG DLSUFFIX -TAS GCC CPP CFLAGS_SL @@ -760,7 +759,6 @@ CLANG LLVM_CONFIG AWK with_llvm -SUN_STUDIO_CC ac_ct_CXX CXXFLAGS CXX @@ -3059,12 +3057,6 @@ $as_echo "$template" >&6; } PORTNAME=$template -# Initialize default assumption that we do not need separate assembly code -# for TAS (test-and-set). This can be overridden by the template file -# when it's executed. -need_tas=no -tas_file=dummy.s - # Default, works for most platforms, override in template file if needed DLSUFFIX=".so" @@ -4799,30 +4791,6 @@ else fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -# Check if it's Sun Studio compiler. We assume that -# __SUNPRO_C will be defined for Sun Studio compilers -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -int -main () -{ -#ifndef __SUNPRO_C -choke me -#endif - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - SUN_STUDIO_CC=yes -else - SUN_STUDIO_CC=no -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - - - # # LLVM @@ -6748,7 +6716,7 @@ fi # __attribute__((visibility("hidden"))) is supported, if we encounter a # compiler that supports one of the supported variants of -fvisibility=hidden # but uses a different syntax to mark a symbol as exported. -if test "$GCC" = yes -o "$SUN_STUDIO_CC" = yes ; then +if test "$GCC" = yes; then { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether ${CC} supports -fvisibility=hidden, for CFLAGS_SL_MODULE" >&5 $as_echo_n "checking whether ${CC} supports -fvisibility=hidden, for CFLAGS_SL_MODULE... " >&6; } if ${pgac_cv_prog_CC_cflags__fvisibility_hidden+:} false; then : @@ -7731,20 +7699,6 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu -# -# Set up TAS assembly code if needed; the template file has now had its -# chance to request this. -# -ac_config_links="$ac_config_links src/backend/port/tas.s:src/backend/port/tas/${tas_file}" - - -if test "$need_tas" = yes ; then - TAS=tas.o -else - TAS="" -fi - - cat >>confdefs.h <<_ACEOF #define DLSUFFIX "$DLSUFFIX" @@ -17141,7 +17095,7 @@ else /* end confdefs.h. */ /* This must match the corresponding code in c.h: */ -#if defined(__GNUC__) || defined(__SUNPRO_C) +#if defined(__GNUC__) #define pg_attribute_aligned(a) __attribute__((aligned(a))) #elif defined(_MSC_VER) #define pg_attribute_aligned(a) __declspec(align(a)) @@ -19344,8 +19298,6 @@ fi if test x"$GCC" = x"yes" ; then cc_string=`${CC} --version | sed q` case $cc_string in [A-Za-z]*) ;; *) cc_string="GCC $cc_string";; esac -elif test x"$SUN_STUDIO_CC" = x"yes" ; then - cc_string=`${CC} -V 2>&1 | sed q` else cc_string=$CC fi @@ -20142,7 +20094,6 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 for ac_config_target in $ac_config_targets do case $ac_config_target in - "src/backend/port/tas.s") CONFIG_LINKS="$CONFIG_LINKS src/backend/port/tas.s:src/backend/port/tas/${tas_file}" ;; "GNUmakefile") CONFIG_FILES="$CONFIG_FILES GNUmakefile" ;; "src/Makefile.global") CONFIG_FILES="$CONFIG_FILES src/Makefile.global" ;; "src/backend/port/pg_sema.c") CONFIG_LINKS="$CONFIG_LINKS src/backend/port/pg_sema.c:${SEMA_IMPLEMENTATION}" ;; diff --git a/configure.ac b/configure.ac index 066e3976c0aac..e44943aa6fe35 100644 --- a/configure.ac +++ b/configure.ac @@ -95,12 +95,6 @@ AC_MSG_RESULT([$template]) PORTNAME=$template AC_SUBST(PORTNAME) -# Initialize default assumption that we do not need separate assembly code -# for TAS (test-and-set). This can be overridden by the template file -# when it's executed. -need_tas=no -tas_file=dummy.s - # Default, works for most platforms, override in template file if needed DLSUFFIX=".so" @@ -400,14 +394,6 @@ AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], [@%:@ifndef __INTEL_COMPILER choke me @%:@endif])], [ICC=yes], [ICC=no]) -# Check if it's Sun Studio compiler. We assume that -# __SUNPRO_C will be defined for Sun Studio compilers -AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], [@%:@ifndef __SUNPRO_C -choke me -@%:@endif])], [SUN_STUDIO_CC=yes], [SUN_STUDIO_CC=no]) - -AC_SUBST(SUN_STUDIO_CC) - # # LLVM @@ -618,7 +604,7 @@ fi # __attribute__((visibility("hidden"))) is supported, if we encounter a # compiler that supports one of the supported variants of -fvisibility=hidden # but uses a different syntax to mark a symbol as exported. -if test "$GCC" = yes -o "$SUN_STUDIO_CC" = yes ; then +if test "$GCC" = yes; then PGAC_PROG_CC_VAR_OPT(CFLAGS_SL_MODULE, [-fvisibility=hidden]) # For C++ we additionally want -fvisibility-inlines-hidden PGAC_PROG_VARCXX_VARFLAGS_OPT(CXX, CXXFLAGS_SL_MODULE, [-fvisibility=hidden]) @@ -774,19 +760,6 @@ AC_PROG_CPP AC_SUBST(GCC) -# -# Set up TAS assembly code if needed; the template file has now had its -# chance to request this. -# -AC_CONFIG_LINKS([src/backend/port/tas.s:src/backend/port/tas/${tas_file}]) - -if test "$need_tas" = yes ; then - TAS=tas.o -else - TAS="" -fi -AC_SUBST(TAS) - AC_SUBST(DLSUFFIX)dnl AC_DEFINE_UNQUOTED([DLSUFFIX], ["$DLSUFFIX"], [Define to the file name extension of dynamically-loadable modules.]) @@ -2478,8 +2451,6 @@ AC_SUBST(LDFLAGS_EX_BE) if test x"$GCC" = x"yes" ; then cc_string=`${CC} --version | sed q` case $cc_string in [[A-Za-z]]*) ;; *) cc_string="GCC $cc_string";; esac -elif test x"$SUN_STUDIO_CC" = x"yes" ; then - cc_string=`${CC} -V 2>&1 | sed q` else cc_string=$CC fi diff --git a/doc/src/sgml/dfunc.sgml b/doc/src/sgml/dfunc.sgml index b94aefcd0ca6c..3778efc83ebfa 100644 --- a/doc/src/sgml/dfunc.sgml +++ b/doc/src/sgml/dfunc.sgml @@ -157,19 +157,12 @@ ld -Bshareable -o foo.so foo.o The compiler flag to create PIC is - with the Sun compiler and with GCC. To link shared libraries, the compiler option is - with either compiler or alternatively with GCC. -cc -KPIC -c foo.c -cc -G -o foo.so foo.o - - or - gcc -fPIC -c foo.c -gcc -G -o foo.so foo.o +gcc -shared -o foo.so foo.o diff --git a/doc/src/sgml/installation.sgml b/doc/src/sgml/installation.sgml index a4ad80a678211..593202f4fb259 100644 --- a/doc/src/sgml/installation.sgml +++ b/doc/src/sgml/installation.sgml @@ -1676,10 +1676,6 @@ build-postgresql: using the GCC compiler: ./configure CC='gcc -m64' --enable-dtrace DTRACEFLAGS='-64' ... - - Using Sun's compiler: - -./configure CC='/opt/SUNWspro/bin/cc -xtarget=native64' --enable-dtrace DTRACEFLAGS='-64' ... @@ -3713,24 +3709,13 @@ xcrun --show-sdk-path Required Tools - You can build with either GCC or Sun's compiler suite. For - better code optimization, Sun's compiler is strongly recommended - on the SPARC architecture. If - you are using Sun's compiler, be careful not to select - /usr/ucb/cc; - use /opt/SUNWspro/bin/cc. + Only GCC is supported as the compiler. Sun's compiler suite is no longer + supported. - You can download Sun Studio - from . - Many GNU tools are integrated into Solaris 10, or they are - present on the Solaris companion CD. If you need packages for - older versions of Solaris, you can find these tools - at . - If you prefer - sources, look - at . + Many additional dependencies can be installed via the package management + system. @@ -3753,27 +3738,6 @@ configure ... LDFLAGS="-R /usr/sfw/lib:/opt/sfw/lib:/usr/local/lib" - - Compiling for Optimal Performance - - - On the SPARC architecture, Sun Studio is strongly recommended for - compilation. Try using the optimization - flag to generate significantly faster binaries. Do not use any - flags that modify behavior of floating-point operations - and errno processing (e.g., - ). - - - - If you do not have a reason to use 64-bit binaries on SPARC, - prefer the 32-bit version. The 64-bit operations are slower and - 64-bit binaries are slower than the 32-bit variants. On the - other hand, 32-bit code on the AMD64 CPU family is not native, - so 32-bit code is significantly slower on that CPU family. - - - Using DTrace for Tracing PostgreSQL @@ -3781,22 +3745,6 @@ configure ... LDFLAGS="-R /usr/sfw/lib:/opt/sfw/lib:/usr/local/lib" Yes, using DTrace is possible. See for further information. - - - If you see the linking of the postgres executable abort with an - error message like: - -Undefined first referenced - symbol in file -AbortTransaction utils/probes.o -CommitTransaction utils/probes.o -ld: fatal: Symbol referencing errors. No output written to postgres -collect2: ld returned 1 exit status -make: *** [postgres] Error 1 - - your DTrace installation is too old to handle probes in static - functions. You need Solaris 10u4 or newer to use DTrace. - diff --git a/meson.build b/meson.build index ab8101d67b26d..d71c7c8267e79 100644 --- a/meson.build +++ b/meson.build @@ -1809,7 +1809,7 @@ if cc.links(''' if not meson.is_cross_build() r = cc.run(''' /* This must match the corresponding code in c.h: */ - #if defined(__GNUC__) || defined(__SUNPRO_C) + #if defined(__GNUC__) #define pg_attribute_aligned(a) __attribute__((aligned(a))) #elif defined(_MSC_VER) #define pg_attribute_aligned(a) __declspec(align(a)) diff --git a/src/Makefile.global.in b/src/Makefile.global.in index 8b1b357beaa04..0aa389bc71012 100644 --- a/src/Makefile.global.in +++ b/src/Makefile.global.in @@ -267,7 +267,6 @@ endif # not PGXS CC = @CC@ GCC = @GCC@ -SUN_STUDIO_CC = @SUN_STUDIO_CC@ CXX = @CXX@ CFLAGS = @CFLAGS@ CFLAGS_SL = @CFLAGS_SL@ @@ -796,9 +795,6 @@ ifeq ($(PORTNAME),win32) LIBS += -lws2_32 endif -# Not really standard libc functions, used by the backend. -TAS = @TAS@ - ########################################################################## # diff --git a/src/backend/port/Makefile b/src/backend/port/Makefile index 47338d9922957..8613ac01aff6d 100644 --- a/src/backend/port/Makefile +++ b/src/backend/port/Makefile @@ -22,7 +22,6 @@ top_builddir = ../../.. include $(top_builddir)/src/Makefile.global OBJS = \ - $(TAS) \ atomics.o \ pg_sema.o \ pg_shmem.o @@ -33,16 +32,5 @@ endif include $(top_srcdir)/src/backend/common.mk -tas.o: tas.s -ifeq ($(SUN_STUDIO_CC), yes) -# preprocess assembler file with cpp - $(CC) $(CFLAGS) -c -P $< - mv $*.i $*_cpp.s - $(CC) $(CFLAGS) -c $*_cpp.s -o $@ -else - $(CC) $(CFLAGS) -c $< -endif - clean: - rm -f tas_cpp.s $(MAKE) -C win32 clean diff --git a/src/backend/port/tas/sunstudio_sparc.s b/src/backend/port/tas/sunstudio_sparc.s deleted file mode 100644 index 8e0a0965b64ea..0000000000000 --- a/src/backend/port/tas/sunstudio_sparc.s +++ /dev/null @@ -1,53 +0,0 @@ -!------------------------------------------------------------------------- -! -! sunstudio_sparc.s -! compare and swap for Sun Studio on Sparc -! -! Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group -! Portions Copyright (c) 1994, Regents of the University of California -! -! IDENTIFICATION -! src/backend/port/tas/sunstudio_sparc.s -! -!------------------------------------------------------------------------- - -! Fortunately the Sun compiler can process cpp conditionals with -P - -! '/' is the comment for x86, while '!' is the comment for Sparc - -#if defined(__sparcv9) || defined(__sparc) - - .section ".text" - .align 8 - .skip 24 - .align 4 - - .global pg_atomic_cas -pg_atomic_cas: - - ! "cas" only works on sparcv9 and sparcv8plus chips, and - ! requires a compiler targeting these CPUs. It will fail - ! on a compiler targeting sparcv8, and of course will not - ! be understood by a sparcv8 CPU. gcc continues to use - ! "ldstub" because it targets sparcv7. - ! - ! There is actually a trick for embedding "cas" in a - ! sparcv8-targeted compiler, but it can only be run - ! on a sparcv8plus/v9 cpus: - ! - ! http://cvs.opensolaris.org/source/xref/on/usr/src/lib/libc/sparc/threads/sparc.il - ! - ! NB: We're assuming we're running on a TSO system here - solaris - ! userland luckily always has done so. - -#if defined(__sparcv9) || defined(__sparcv8plus) - cas [%o0],%o2,%o1 -#else - ldstub [%o0],%o1 -#endif - mov %o1,%o0 - retl - nop - .type pg_atomic_cas,2 - .size pg_atomic_cas,(.-pg_atomic_cas) -#endif diff --git a/src/backend/port/tas/sunstudio_x86.s b/src/backend/port/tas/sunstudio_x86.s deleted file mode 100644 index 0111ffde45c29..0000000000000 --- a/src/backend/port/tas/sunstudio_x86.s +++ /dev/null @@ -1,43 +0,0 @@ -/------------------------------------------------------------------------- -/ -/ sunstudio_x86.s -/ compare and swap for Sun Studio on x86 -/ -/ Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group -/ Portions Copyright (c) 1994, Regents of the University of California -/ -/ IDENTIFICATION -/ src/backend/port/tas/sunstudio_x86.s -/ -/------------------------------------------------------------------------- - -/ Fortunately the Sun compiler can process cpp conditionals with -P - -/ '/' is the comment for x86, while '!' is the comment for Sparc - - .file "tas.s" - -#if defined(__amd64) - .code64 -#endif - - .globl pg_atomic_cas - .type pg_atomic_cas, @function - - .section .text, "ax" - .align 16 - -pg_atomic_cas: -#if defined(__amd64) - movl %edx,%eax - lock - cmpxchgl %esi,(%rdi) -#else - movl 4(%esp), %edx - movl 8(%esp), %ecx - movl 12(%esp), %eax - lock - cmpxchgl %ecx, (%edx) -#endif - ret - .size pg_atomic_cas, . - pg_atomic_cas diff --git a/src/backend/storage/lmgr/Makefile b/src/backend/storage/lmgr/Makefile index 6cbaf23b855f6..a5fbc24ddad6e 100644 --- a/src/backend/storage/lmgr/Makefile +++ b/src/backend/storage/lmgr/Makefile @@ -24,13 +24,9 @@ OBJS = \ include $(top_srcdir)/src/backend/common.mk -ifdef TAS -TASPATH = $(top_builddir)/src/backend/port/tas.o -endif - s_lock_test: s_lock.c $(top_builddir)/src/common/libpgcommon.a $(top_builddir)/src/port/libpgport.a $(CC) $(CPPFLAGS) $(CFLAGS) -DS_LOCK_TEST=1 $(srcdir)/s_lock.c \ - $(TASPATH) -L $(top_builddir)/src/common -lpgcommon \ + -L $(top_builddir)/src/common -lpgcommon \ -L $(top_builddir)/src/port -lpgport -lm -o s_lock_test lwlocknames.h: ../../../include/storage/lwlocklist.h ../../utils/activity/wait_event_names.txt generate-lwlocknames.pl diff --git a/src/include/c.h b/src/include/c.h index b580cfa7d3178..f303ba0605a40 100644 --- a/src/include/c.h +++ b/src/include/c.h @@ -114,7 +114,6 @@ * GCC: https://gcc.gnu.org/onlinedocs/gcc/Function-Attributes.html * GCC: https://gcc.gnu.org/onlinedocs/gcc/Type-Attributes.html * Clang: https://clang.llvm.org/docs/AttributeReference.html - * Sunpro: https://docs.oracle.com/cd/E18659_01/html/821-1384/gjzke.html */ /* @@ -157,7 +156,7 @@ */ #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define pg_noreturn _Noreturn -#elif defined(__GNUC__) || defined(__SUNPRO_C) +#elif defined(__GNUC__) #define pg_noreturn __attribute__((noreturn)) #elif defined(_MSC_VER) #define pg_noreturn __declspec(noreturn) @@ -233,8 +232,8 @@ #define pg_attribute_printf(f,a) #endif -/* GCC and Sunpro support aligned and packed */ -#if defined(__GNUC__) || defined(__SUNPRO_C) +/* GCC supports aligned and packed */ +#if defined(__GNUC__) #define pg_attribute_aligned(a) __attribute__((aligned(a))) #define pg_attribute_packed() __attribute__((packed)) #elif defined(_MSC_VER) @@ -259,8 +258,8 @@ * choose not to. But, if possible, don't force inlining in unoptimized * debug builds. */ -#if (defined(__GNUC__) && defined(__OPTIMIZE__)) || defined(__SUNPRO_C) -/* GCC and Sunpro support always_inline via __attribute__ */ +#if defined(__GNUC__) && defined(__OPTIMIZE__) +/* GCC supports always_inline via __attribute__ */ #define pg_attribute_always_inline __attribute__((always_inline)) inline #elif defined(_MSC_VER) /* MSVC has a special keyword for this */ @@ -276,8 +275,8 @@ * for proper cost attribution. Note that unlike the pg_attribute_XXX macros * above, this should be placed before the function's return type and name. */ -/* GCC and Sunpro support noinline via __attribute__ */ -#if defined(__GNUC__) || defined(__SUNPRO_C) +/* GCC supports noinline via __attribute__ */ +#if defined(__GNUC__) #define pg_noinline __attribute__((noinline)) /* msvc via declspec */ #elif defined(_MSC_VER) diff --git a/src/include/port/atomics.h b/src/include/port/atomics.h index 074136fe4c4a8..96f1858da9722 100644 --- a/src/include/port/atomics.h +++ b/src/include/port/atomics.h @@ -88,8 +88,6 @@ #include "port/atomics/generic-gcc.h" #elif defined(_MSC_VER) #include "port/atomics/generic-msvc.h" -#elif defined(__SUNPRO_C) && !defined(__GNUC__) -#include "port/atomics/generic-sunpro.h" #else /* Unknown compiler. */ #endif diff --git a/src/include/port/atomics/arch-x86.h b/src/include/port/atomics/arch-x86.h index 8983dd89d53d7..4ba2ccc05913d 100644 --- a/src/include/port/atomics/arch-x86.h +++ b/src/include/port/atomics/arch-x86.h @@ -241,6 +241,6 @@ pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_) */ #if defined(__i568__) || defined(__i668__) || /* gcc i586+ */ \ (defined(_M_IX86) && _M_IX86 >= 500) || /* msvc i586+ */ \ - defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) /* gcc, sunpro, msvc */ + defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) /* gcc, msvc */ #define PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY #endif /* 8 byte single-copy atomicity */ diff --git a/src/include/port/atomics/generic-sunpro.h b/src/include/port/atomics/generic-sunpro.h deleted file mode 100644 index 09bba0be2037d..0000000000000 --- a/src/include/port/atomics/generic-sunpro.h +++ /dev/null @@ -1,113 +0,0 @@ -/*------------------------------------------------------------------------- - * - * generic-sunpro.h - * Atomic operations for solaris' CC - * - * Portions Copyright (c) 2013-2025, PostgreSQL Global Development Group - * - * NOTES: - * - * Documentation: - * * manpage for atomic_cas(3C) - * http://www.unix.com/man-page/opensolaris/3c/atomic_cas/ - * http://docs.oracle.com/cd/E23824_01/html/821-1465/atomic-cas-3c.html - * - * src/include/port/atomics/generic-sunpro.h - * - * ------------------------------------------------------------------------- - */ - -#ifdef HAVE_MBARRIER_H -#include - -#define pg_compiler_barrier_impl() __compiler_barrier() - -#ifndef pg_memory_barrier_impl -/* - * Despite the name this is actually a full barrier. Expanding to mfence/ - * membar #StoreStore | #LoadStore | #StoreLoad | #LoadLoad on x86/sparc - * respectively. - */ -# define pg_memory_barrier_impl() __machine_rw_barrier() -#endif -#ifndef pg_read_barrier_impl -# define pg_read_barrier_impl() __machine_r_barrier() -#endif -#ifndef pg_write_barrier_impl -# define pg_write_barrier_impl() __machine_w_barrier() -#endif - -#endif /* HAVE_MBARRIER_H */ - -/* Older versions of the compiler don't have atomic.h... */ -#ifdef HAVE_ATOMIC_H - -#include - -#define PG_HAVE_ATOMIC_U32_SUPPORT -typedef struct pg_atomic_uint32 -{ - volatile uint32 value; -} pg_atomic_uint32; - -#define PG_HAVE_ATOMIC_U64_SUPPORT -typedef struct pg_atomic_uint64 -{ - /* - * Syntax to enforce variable alignment should be supported by versions - * supporting atomic.h, but it's hard to find accurate documentation. If - * it proves to be a problem, we'll have to add more version checks for 64 - * bit support. - */ - volatile uint64 value pg_attribute_aligned(8); -} pg_atomic_uint64; - -#endif /* HAVE_ATOMIC_H */ - - -#ifdef HAVE_ATOMIC_H - -#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32 -static inline bool -pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr, - uint32 *expected, uint32 newval) -{ - bool ret; - uint32 current; - - current = atomic_cas_32(&ptr->value, *expected, newval); - ret = current == *expected; - *expected = current; - return ret; -} - -#define PG_HAVE_ATOMIC_EXCHANGE_U32 -static inline uint32 -pg_atomic_exchange_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 newval) -{ - return atomic_swap_32(&ptr->value, newval); -} - -#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64 -static inline bool -pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr, - uint64 *expected, uint64 newval) -{ - bool ret; - uint64 current; - - AssertPointerAlignment(expected, 8); - current = atomic_cas_64(&ptr->value, *expected, newval); - ret = current == *expected; - *expected = current; - return ret; -} - -#define PG_HAVE_ATOMIC_EXCHANGE_U64 -static inline uint64 -pg_atomic_exchange_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 newval) -{ - return atomic_swap_64(&ptr->value, newval); -} - -#endif /* HAVE_ATOMIC_H */ diff --git a/src/include/port/solaris.h b/src/include/port/solaris.h index 8ff40007c7f6a..c352361c81d83 100644 --- a/src/include/port/solaris.h +++ b/src/include/port/solaris.h @@ -1,26 +1,5 @@ /* src/include/port/solaris.h */ -/* - * Sort this out for all operating systems some time. The __xxx - * symbols are defined on both GCC and Solaris CC, although GCC - * doesn't document them. The __xxx__ symbols are only on GCC. - */ -#if defined(__i386) && !defined(__i386__) -#define __i386__ -#endif - -#if defined(__amd64) && !defined(__amd64__) -#define __amd64__ -#endif - -#if defined(__x86_64) && !defined(__x86_64__) -#define __x86_64__ -#endif - -#if defined(__sparc) && !defined(__sparc__) -#define __sparc__ -#endif - #if defined(__i386__) #include #endif diff --git a/src/include/storage/s_lock.h b/src/include/storage/s_lock.h index 2f73f9fcf57a2..7f8f566bd407f 100644 --- a/src/include/storage/s_lock.h +++ b/src/include/storage/s_lock.h @@ -333,9 +333,9 @@ tas(volatile slock_t *lock) slock_t _res; /* - * See comment in src/backend/port/tas/sunstudio_sparc.s for why this - * uses "ldstub", and that file uses "cas". gcc currently generates - * sparcv7-targeted binaries, so "cas" use isn't possible. + * "cas" would be better than "ldstub", but it is only present on + * sparcv8plus and later, while some platforms still support sparcv7 or + * sparcv8. Also, "cas" requires that the system be running in TSO mode. */ __asm__ __volatile__( " ldstub [%2], %0 \n" @@ -594,24 +594,6 @@ tas(volatile slock_t *lock) #if !defined(HAS_TEST_AND_SET) /* We didn't trigger above, let's try here */ -/* These are in sunstudio_(sparc|x86).s */ - -#if defined(__SUNPRO_C) && (defined(__i386) || defined(__x86_64__) || defined(__sparc__) || defined(__sparc)) -#define HAS_TEST_AND_SET - -#if defined(__i386) || defined(__x86_64__) || defined(__sparcv9) || defined(__sparcv8plus) -typedef unsigned int slock_t; -#else -typedef unsigned char slock_t; -#endif - -extern slock_t pg_atomic_cas(volatile slock_t *lock, slock_t with, - slock_t cmp); - -#define TAS(a) (pg_atomic_cas((a), 1, 0) != 0) -#endif - - #ifdef _MSC_VER typedef LONG slock_t; diff --git a/src/makefiles/meson.build b/src/makefiles/meson.build index 54dbc059adac7..0def244c9011d 100644 --- a/src/makefiles/meson.build +++ b/src/makefiles/meson.build @@ -63,8 +63,6 @@ pgxs_kv = { 'DLSUFFIX': dlsuffix, 'EXEEXT': exesuffix, - 'SUN_STUDIO_CC': 'no', # not supported so far - # want the chosen option, rather than the library 'with_ssl' : ssl_library, 'with_uuid': uuidopt, @@ -179,7 +177,7 @@ pgxs_empty = [ 'WANTED_LANGUAGES', # Not needed because we don't build the server / PLs with the generated makefile - 'LIBOBJS', 'PG_CRC32C_OBJS', 'TAS', + 'LIBOBJS', 'PG_CRC32C_OBJS', 'PG_TEST_EXTRA', 'DTRACEFLAGS', # only server has dtrace probes diff --git a/src/template/linux b/src/template/linux index ec3302c4a223f..faefe64254a90 100644 --- a/src/template/linux +++ b/src/template/linux @@ -14,26 +14,3 @@ CFLAGS_SL="-fPIC" # If --enable-profiling is specified, we need -DLINUX_PROFILE PLATFORM_PROFILE_FLAGS="-DLINUX_PROFILE" - -if test "$SUN_STUDIO_CC" = "yes" ; then - CC="$CC -Xa" # relaxed ISO C mode - CFLAGS="-v" # -v is like gcc -Wall - if test "$enable_debug" != yes; then - CFLAGS="$CFLAGS -O" # any optimization breaks debug - fi - - # Pick the right test-and-set (TAS) code for the Sun compiler. - # We would like to use in-line assembler, but the compiler - # requires *.il files to be on every compile line, making - # the build system too fragile. - case $host_cpu in - sparc) - need_tas=yes - tas_file=sunstudio_sparc.s - ;; - i?86|x86_64) - need_tas=yes - tas_file=sunstudio_x86.s - ;; - esac -fi diff --git a/src/template/solaris b/src/template/solaris index f88b1cdad37f8..a4d8d38a8f852 100644 --- a/src/template/solaris +++ b/src/template/solaris @@ -1,31 +1,4 @@ # src/template/solaris # Extra CFLAGS for code that will go into a shared library -if test "$GCC" = yes ; then - CFLAGS_SL="-fPIC" -else - CFLAGS_SL="-KPIC" -fi - -if test "$SUN_STUDIO_CC" = yes ; then - CC="$CC -Xa" # relaxed ISO C mode - CFLAGS="-v" # -v is like gcc -Wall - if test "$enable_debug" != yes; then - CFLAGS="$CFLAGS -O" # any optimization breaks debug - fi - - # Pick the right test-and-set (TAS) code for the Sun compiler. - # We would like to use in-line assembler, but the compiler - # requires *.il files to be on every compile line, making - # the build system too fragile. - case $host_cpu in - sparc) - need_tas=yes - tas_file=sunstudio_sparc.s - ;; - i?86|x86_64) - need_tas=yes - tas_file=sunstudio_x86.s - ;; - esac -fi +CFLAGS_SL="-fPIC" diff --git a/src/tools/pginclude/headerscheck b/src/tools/pginclude/headerscheck index 17138a7569e4f..d017490a5386a 100755 --- a/src/tools/pginclude/headerscheck +++ b/src/tools/pginclude/headerscheck @@ -114,7 +114,6 @@ do test "$f" = src/include/port/atomics/generic.h && continue test "$f" = src/include/port/atomics/generic-gcc.h && continue test "$f" = src/include/port/atomics/generic-msvc.h && continue - test "$f" = src/include/port/atomics/generic-sunpro.h && continue # sepgsql.h depends on headers that aren't there on most platforms. test "$f" = contrib/sepgsql/sepgsql.h && continue From 2aac62be8cbb870ccf8c5b3fbb8a4e4aa8a14a73 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Fri, 12 Sep 2025 07:57:06 +0200 Subject: [PATCH 28/32] Default to log_lock_waits=on If someone is stuck behind a lock for more than a second, that is almost always a problem that is worth a log entry. Author: Laurenz Albe Reviewed-By: Michael Banck Reviewed-By: Robert Haas Reviewed-By: Christoph Berg Reviewed-By: Stephen Frost Discussion: https://postgr.es/m/b8b8502915e50f44deb111bc0b43a99e2733e117.camel%40cybertec.at --- doc/src/sgml/config.sgml | 2 +- src/backend/storage/lmgr/proc.c | 2 +- src/backend/utils/misc/guc_parameters.dat | 2 +- src/backend/utils/misc/postgresql.conf.sample | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index 2a3685f474a96..3c33d5d0fbcae 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -7929,7 +7929,7 @@ log_line_prefix = '%m [%p] %q%u@%d/%a ' Controls whether a log message is produced when a session waits longer than to acquire a lock. This is useful in determining if lock waits are causing - poor performance. The default is off. + poor performance. The default is on. Only superusers and users with the appropriate SET privilege can change this setting. diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c index e9ef0fbfe32cb..96f29aafc391e 100644 --- a/src/backend/storage/lmgr/proc.c +++ b/src/backend/storage/lmgr/proc.c @@ -60,7 +60,7 @@ int LockTimeout = 0; int IdleInTransactionSessionTimeout = 0; int TransactionTimeout = 0; int IdleSessionTimeout = 0; -bool log_lock_waits = false; +bool log_lock_waits = true; /* Pointer to this process's PGPROC struct, if any */ PGPROC *MyProc = NULL; diff --git a/src/backend/utils/misc/guc_parameters.dat b/src/backend/utils/misc/guc_parameters.dat index 0da01627cfec1..6bc6be13d2ad2 100644 --- a/src/backend/utils/misc/guc_parameters.dat +++ b/src/backend/utils/misc/guc_parameters.dat @@ -566,7 +566,7 @@ { name => 'log_lock_waits', type => 'bool', context => 'PGC_SUSET', group => 'LOGGING_WHAT', short_desc => 'Logs long lock waits.', variable => 'log_lock_waits', - boot_val => 'false', + boot_val => 'true', }, { name => 'log_lock_failures', type => 'bool', context => 'PGC_SUSET', group => 'LOGGING_WHAT', diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 26c0869356485..c36fcb9ab6105 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -624,7 +624,7 @@ # processes # %% = '%' # e.g. '<%u%%%d> ' -#log_lock_waits = off # log lock waits >= deadlock_timeout +#log_lock_waits = on # log lock waits >= deadlock_timeout #log_lock_failures = off # log lock failures #log_recovery_conflict_waits = off # log standby recovery conflict waits # >= deadlock_timeout From 675ddc4d704f4cde0bc72244263a9efbb0d32cb8 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Fri, 12 Sep 2025 08:13:05 +0200 Subject: [PATCH 29/32] Improve pgbench definition of yyscan_t It was defining yyscan_t as a macro while the rest of the code uses a typedef with #ifdef guards around it. The latter is also what the flex generated code uses. So it seems best to make it look like those other places for consistency. The old way also had a potential for conflict if some code included multiple headers providing yyscan_t. exprscan.l includes #include "fe_utils/psqlscan_int.h" #include "pgbench.h" and fe_utils/psqlscan_int.h contains #ifndef YY_TYPEDEF_YY_SCANNER_T #define YY_TYPEDEF_YY_SCANNER_T typedef void *yyscan_t; #endif which was then followed by pgbench.h #define yyscan_t void * and then the generated code in exprscan.c #ifndef YY_TYPEDEF_YY_SCANNER_T #define YY_TYPEDEF_YY_SCANNER_T typedef void* yyscan_t; #endif This works, but if the #ifdef guard in psqlscan_int.h is removed, this fails. We want to move toward allowing repeat typedefs, per C11, but for that we need to make sure they are all the same. Reviewed-by: Chao Li Discussion: https://www.postgresql.org/message-id/flat/10d32190-f31b-40a5-b177-11db55597355@eisentraut.org --- src/bin/pgbench/pgbench.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/bin/pgbench/pgbench.h b/src/bin/pgbench/pgbench.h index e053c9e2eb63d..f8b7b497d1ee0 100644 --- a/src/bin/pgbench/pgbench.h +++ b/src/bin/pgbench/pgbench.h @@ -16,11 +16,14 @@ /* * This file is included outside exprscan.l, in places where we can't see * flex's definition of typedef yyscan_t. Fortunately, it's documented as - * being "void *", so we can use a macro to keep the function declarations + * being "void *", so we can use typedef to keep the function declarations * here looking like the definitions in exprscan.l. exprparse.y and * pgbench.c also use this to be able to declare things as "yyscan_t". */ -#define yyscan_t void * +#ifndef YY_TYPEDEF_YY_SCANNER_T +#define YY_TYPEDEF_YY_SCANNER_T +typedef void *yyscan_t; +#endif /* * Likewise, we can't see exprparse.y's definition of union YYSTYPE here, From ae0e1be9f2a20f6b64072dcee5b8dd7b9027a8fa Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Fri, 12 Sep 2025 08:13:05 +0200 Subject: [PATCH 30/32] Allow redeclaration of typedef yyscan_t This is allowed in C11, so we don't need the workaround guards against it anymore. This effectively reverts commit 382092a0cd2 that put these guards in place. Reviewed-by: Chao Li Discussion: https://www.postgresql.org/message-id/flat/10d32190-f31b-40a5-b177-11db55597355@eisentraut.org --- contrib/cube/cubedata.h | 3 --- contrib/seg/segdata.h | 3 --- src/backend/utils/adt/jsonpath_internal.h | 3 --- src/bin/pgbench/pgbench.h | 3 --- src/include/bootstrap/bootstrap.h | 3 --- src/include/fe_utils/psqlscan_int.h | 6 ------ src/include/replication/syncrep.h | 3 --- src/include/replication/walsender_private.h | 3 --- src/pl/plpgsql/src/plpgsql.h | 3 --- 9 files changed, 30 deletions(-) diff --git a/contrib/cube/cubedata.h b/contrib/cube/cubedata.h index ad1e2bd699810..8bfcc6e99a27d 100644 --- a/contrib/cube/cubedata.h +++ b/contrib/cube/cubedata.h @@ -62,10 +62,7 @@ typedef struct NDBOX /* for cubescan.l and cubeparse.y */ /* All grammar constructs return strings */ #define YYSTYPE char * -#ifndef YY_TYPEDEF_YY_SCANNER_T -#define YY_TYPEDEF_YY_SCANNER_T typedef void *yyscan_t; -#endif /* in cubescan.l */ extern int cube_yylex(YYSTYPE *yylval_param, yyscan_t yyscanner); diff --git a/contrib/seg/segdata.h b/contrib/seg/segdata.h index 4347c31c28e94..7bc7c83dca309 100644 --- a/contrib/seg/segdata.h +++ b/contrib/seg/segdata.h @@ -16,10 +16,7 @@ extern int significant_digits(const char *s); /* for segscan.l and segparse.y */ union YYSTYPE; -#ifndef YY_TYPEDEF_YY_SCANNER_T -#define YY_TYPEDEF_YY_SCANNER_T typedef void *yyscan_t; -#endif /* in segscan.l */ extern int seg_yylex(union YYSTYPE *yylval_param, yyscan_t yyscanner); diff --git a/src/backend/utils/adt/jsonpath_internal.h b/src/backend/utils/adt/jsonpath_internal.h index f78069857d02b..19567aca6f775 100644 --- a/src/backend/utils/adt/jsonpath_internal.h +++ b/src/backend/utils/adt/jsonpath_internal.h @@ -22,10 +22,7 @@ typedef struct JsonPathString int total; } JsonPathString; -#ifndef YY_TYPEDEF_YY_SCANNER_T -#define YY_TYPEDEF_YY_SCANNER_T typedef void *yyscan_t; -#endif #include "utils/jsonpath.h" #include "jsonpath_gram.h" diff --git a/src/bin/pgbench/pgbench.h b/src/bin/pgbench/pgbench.h index f8b7b497d1ee0..d55d30e0ef954 100644 --- a/src/bin/pgbench/pgbench.h +++ b/src/bin/pgbench/pgbench.h @@ -20,10 +20,7 @@ * here looking like the definitions in exprscan.l. exprparse.y and * pgbench.c also use this to be able to declare things as "yyscan_t". */ -#ifndef YY_TYPEDEF_YY_SCANNER_T -#define YY_TYPEDEF_YY_SCANNER_T typedef void *yyscan_t; -#endif /* * Likewise, we can't see exprparse.y's definition of union YYSTYPE here, diff --git a/src/include/bootstrap/bootstrap.h b/src/include/bootstrap/bootstrap.h index befc4fa1b3d87..5ad347ec290fa 100644 --- a/src/include/bootstrap/bootstrap.h +++ b/src/include/bootstrap/bootstrap.h @@ -56,10 +56,7 @@ extern void boot_get_type_io_data(Oid typid, Oid *typoutput); union YYSTYPE; -#ifndef YY_TYPEDEF_YY_SCANNER_T -#define YY_TYPEDEF_YY_SCANNER_T typedef void *yyscan_t; -#endif extern int boot_yyparse(yyscan_t yyscanner); extern int boot_yylex_init(yyscan_t *yyscannerp); diff --git a/src/include/fe_utils/psqlscan_int.h b/src/include/fe_utils/psqlscan_int.h index 2a3a9d7c82aaa..a1ebf226cf499 100644 --- a/src/include/fe_utils/psqlscan_int.h +++ b/src/include/fe_utils/psqlscan_int.h @@ -51,14 +51,8 @@ * validity checking; in actual use, this file should always be included * from the body of a flex file, where these symbols are already defined. */ -#ifndef YY_TYPEDEF_YY_BUFFER_STATE -#define YY_TYPEDEF_YY_BUFFER_STATE typedef struct yy_buffer_state *YY_BUFFER_STATE; -#endif -#ifndef YY_TYPEDEF_YY_SCANNER_T -#define YY_TYPEDEF_YY_SCANNER_T typedef void *yyscan_t; -#endif /* * We use a stack of flex buffers to handle substitution of psql variables. diff --git a/src/include/replication/syncrep.h b/src/include/replication/syncrep.h index 675669a79f7d3..dc2b118b16629 100644 --- a/src/include/replication/syncrep.h +++ b/src/include/replication/syncrep.h @@ -97,10 +97,7 @@ extern void SyncRepUpdateSyncStandbysDefined(void); * in syncrep_gram.y and syncrep_scanner.l */ union YYSTYPE; -#ifndef YY_TYPEDEF_YY_SCANNER_T -#define YY_TYPEDEF_YY_SCANNER_T typedef void *yyscan_t; -#endif extern int syncrep_yyparse(SyncRepConfigData **syncrep_parse_result_p, char **syncrep_parse_error_msg_p, yyscan_t yyscanner); extern int syncrep_yylex(union YYSTYPE *yylval_param, char **syncrep_parse_error_msg_p, yyscan_t yyscanner); extern void syncrep_yyerror(SyncRepConfigData **syncrep_parse_result_p, char **syncrep_parse_error_msg_p, yyscan_t yyscanner, const char *str); diff --git a/src/include/replication/walsender_private.h b/src/include/replication/walsender_private.h index e98701038f506..384b8a78b9462 100644 --- a/src/include/replication/walsender_private.h +++ b/src/include/replication/walsender_private.h @@ -141,10 +141,7 @@ extern void WalSndSetState(WalSndState state); * repl_scanner.l */ union YYSTYPE; -#ifndef YY_TYPEDEF_YY_SCANNER_T -#define YY_TYPEDEF_YY_SCANNER_T typedef void *yyscan_t; -#endif extern int replication_yyparse(Node **replication_parse_result_p, yyscan_t yyscanner); extern int replication_yylex(union YYSTYPE *yylval_param, yyscan_t yyscanner); pg_noreturn extern void replication_yyerror(Node **replication_parse_result_p, yyscan_t yyscanner, const char *message); diff --git a/src/pl/plpgsql/src/plpgsql.h b/src/pl/plpgsql/src/plpgsql.h index 41e52b8ce7183..5f193a3718399 100644 --- a/src/pl/plpgsql/src/plpgsql.h +++ b/src/pl/plpgsql/src/plpgsql.h @@ -1307,10 +1307,7 @@ extern void plpgsql_dumptree(PLpgSQL_function *func); */ union YYSTYPE; #define YYLTYPE int -#ifndef YY_TYPEDEF_YY_SCANNER_T -#define YY_TYPEDEF_YY_SCANNER_T typedef void *yyscan_t; -#endif extern int plpgsql_yylex(union YYSTYPE *yylvalp, YYLTYPE *yyllocp, yyscan_t yyscanner); extern int plpgsql_token_length(yyscan_t yyscanner); extern void plpgsql_push_back_token(int token, union YYSTYPE *yylvalp, YYLTYPE *yyllocp, yyscan_t yyscanner); From 20d541a200e9dfed8affef9e798ff35ca0f30b8e Mon Sep 17 00:00:00 2001 From: Andres Freund Date: Fri, 12 Sep 2025 10:18:31 -0400 Subject: [PATCH 31/32] ci: openbsd: Increase RAM disk's size Its size was ~3.8GB before, which sometimes was not enough. OpenBSD CI task often were failing due to no space left on device. Increase the RAM disk size to ~4.6 GB. Author: Nazir Bilal Yavuz Discussion: https://postgr.es/m/CAN55FZ2XVVPJRJmGB2DsL3gOrOinWh=HWvj6GO1cHzJ=6LwTag@mail.gmail.com Backpatch-through: 18, where openbsd was added to CI --- src/tools/ci/gcp_ram_disk.sh | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/tools/ci/gcp_ram_disk.sh b/src/tools/ci/gcp_ram_disk.sh index d48634512ac28..18dbb2037f5dc 100755 --- a/src/tools/ci/gcp_ram_disk.sh +++ b/src/tools/ci/gcp_ram_disk.sh @@ -15,7 +15,12 @@ case "`uname`" in umount /dev/sd0j # unused /usr/obj partition printf "m j\n\n\nswap\nw\nq\n" | disklabel -E sd0 swapon /dev/sd0j - mount -t mfs -o rw,noatime,nodev,-s=8000000 swap $CIRRUS_WORKING_DIR + # Remove the per-process data segment limit so that mount_mfs can allocate + # large memory filesystems. Without this, mount_mfs mmap() may fail with + # "Cannot allocate memory" if the requested size exceeds the current + # datasize limit. + ulimit -d unlimited + mount -t mfs -o rw,noatime,nodev,-s=10000000 swap $CIRRUS_WORKING_DIR ;; esac From 7dcea51c2a4dcf7c512bbd4f618d1d3620f9d3d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Herrera?= Date: Fri, 12 Sep 2025 18:47:25 +0200 Subject: [PATCH 32/32] Avoid unexpected changes of CurrentResourceOwner and CurrentMemoryContext Users of logical decoding can encounter an unexpected change of CurrentResourceOwner and CurrentMemoryContext. The problem is that, unlike other call sites of RollbackAndReleaseCurrentSubTransaction(), in reorderbuffer.c we fail to restore the original values of these global variables after being clobbered by subtransaction abort. This patch saves the values prior to the call and restores them eventually. In addition, logical.c and logicalfuncs.c had a hack to restore resource owner, presumably because of lack of this restore. Remove that. Instead, because the test coverage here is not very consistent, add an Assert() to ensure that the resowner is kept identical; this would make it easy to detect other cases of bugs were we fail to restore resowner properly. This could be removed later. This is arguably an old bug, but there appears to be no reason to backpatch it and it's risky to do so, so refrain for now. Author: Antonin Houska Reported-by: Mihail Nikalayeu Reviewed-by: Euler Taveira Discussion: https://postgr.es/m/119497.1756892972@localhost --- src/backend/replication/logical/logical.c | 19 +++++++++++-------- .../replication/logical/logicalfuncs.c | 19 +++++++++++-------- .../replication/logical/reorderbuffer.c | 15 +++++++++++++++ 3 files changed, 37 insertions(+), 16 deletions(-) diff --git a/src/backend/replication/logical/logical.c b/src/backend/replication/logical/logical.c index 7e363a7c05b4f..c68c0481f427a 100644 --- a/src/backend/replication/logical/logical.c +++ b/src/backend/replication/logical/logical.c @@ -2082,7 +2082,7 @@ LogicalSlotAdvanceAndCheckSnapState(XLogRecPtr moveto, bool *found_consistent_snapshot) { LogicalDecodingContext *ctx; - ResourceOwner old_resowner = CurrentResourceOwner; + ResourceOwner old_resowner PG_USED_FOR_ASSERTS_ONLY = CurrentResourceOwner; XLogRecPtr retlsn; Assert(moveto != InvalidXLogRecPtr); @@ -2141,21 +2141,24 @@ LogicalSlotAdvanceAndCheckSnapState(XLogRecPtr moveto, * might still have critical updates to do. */ if (record) + { LogicalDecodingProcessRecord(ctx, ctx->reader); + /* + * We used to have bugs where logical decoding would fail to + * preserve the resource owner. That's important here, so + * verify that that doesn't happen anymore. XXX this could be + * removed once it's been battle-tested. + */ + Assert(CurrentResourceOwner == old_resowner); + } + CHECK_FOR_INTERRUPTS(); } if (found_consistent_snapshot && DecodingContextReady(ctx)) *found_consistent_snapshot = true; - /* - * Logical decoding could have clobbered CurrentResourceOwner during - * transaction management, so restore the executor's value. (This is - * a kluge, but it's not worth cleaning up right now.) - */ - CurrentResourceOwner = old_resowner; - if (ctx->reader->EndRecPtr != InvalidXLogRecPtr) { LogicalConfirmReceivedLocation(moveto); diff --git a/src/backend/replication/logical/logicalfuncs.c b/src/backend/replication/logical/logicalfuncs.c index ca53caac2f2f5..25f890ddeedac 100644 --- a/src/backend/replication/logical/logicalfuncs.c +++ b/src/backend/replication/logical/logicalfuncs.c @@ -107,7 +107,7 @@ pg_logical_slot_get_changes_guts(FunctionCallInfo fcinfo, bool confirm, bool bin XLogRecPtr end_of_wal; XLogRecPtr wait_for_wal_lsn; LogicalDecodingContext *ctx; - ResourceOwner old_resowner = CurrentResourceOwner; + ResourceOwner old_resowner PG_USED_FOR_ASSERTS_ONLY = CurrentResourceOwner; ArrayType *arr; Size ndim; List *options = NIL; @@ -263,8 +263,18 @@ pg_logical_slot_get_changes_guts(FunctionCallInfo fcinfo, bool confirm, bool bin * store the description into our tuplestore. */ if (record != NULL) + { LogicalDecodingProcessRecord(ctx, ctx->reader); + /* + * We used to have bugs where logical decoding would fail to + * preserve the resource owner. Verify that that doesn't + * happen anymore. XXX this could be removed once it's been + * battle-tested. + */ + Assert(CurrentResourceOwner == old_resowner); + } + /* check limits */ if (upto_lsn != InvalidXLogRecPtr && upto_lsn <= ctx->reader->EndRecPtr) @@ -275,13 +285,6 @@ pg_logical_slot_get_changes_guts(FunctionCallInfo fcinfo, bool confirm, bool bin CHECK_FOR_INTERRUPTS(); } - /* - * Logical decoding could have clobbered CurrentResourceOwner during - * transaction management, so restore the executor's value. (This is - * a kluge, but it's not worth cleaning up right now.) - */ - CurrentResourceOwner = old_resowner; - /* * Next time, start where we left off. (Hunting things, the family * business..) diff --git a/src/backend/replication/logical/reorderbuffer.c b/src/backend/replication/logical/reorderbuffer.c index 34cf05668ae84..4736f993c3743 100644 --- a/src/backend/replication/logical/reorderbuffer.c +++ b/src/backend/replication/logical/reorderbuffer.c @@ -2215,6 +2215,7 @@ ReorderBufferProcessTXN(ReorderBuffer *rb, ReorderBufferTXN *txn, { bool using_subtxn; MemoryContext ccxt = CurrentMemoryContext; + ResourceOwner cowner = CurrentResourceOwner; ReorderBufferIterTXNState *volatile iterstate = NULL; volatile XLogRecPtr prev_lsn = InvalidXLogRecPtr; ReorderBufferChange *volatile specinsert = NULL; @@ -2692,7 +2693,11 @@ ReorderBufferProcessTXN(ReorderBuffer *rb, ReorderBufferTXN *txn, } if (using_subtxn) + { RollbackAndReleaseCurrentSubTransaction(); + MemoryContextSwitchTo(ccxt); + CurrentResourceOwner = cowner; + } /* * We are here due to one of the four reasons: 1. Decoding an @@ -2751,7 +2756,11 @@ ReorderBufferProcessTXN(ReorderBuffer *rb, ReorderBufferTXN *txn, } if (using_subtxn) + { RollbackAndReleaseCurrentSubTransaction(); + MemoryContextSwitchTo(ccxt); + CurrentResourceOwner = cowner; + } /* * The error code ERRCODE_TRANSACTION_ROLLBACK indicates a concurrent @@ -3244,6 +3253,8 @@ ReorderBufferImmediateInvalidation(ReorderBuffer *rb, uint32 ninvalidations, SharedInvalidationMessage *invalidations) { bool use_subtxn = IsTransactionOrTransactionBlock(); + MemoryContext ccxt = CurrentMemoryContext; + ResourceOwner cowner = CurrentResourceOwner; int i; if (use_subtxn) @@ -3262,7 +3273,11 @@ ReorderBufferImmediateInvalidation(ReorderBuffer *rb, uint32 ninvalidations, LocalExecuteInvalidationMessage(&invalidations[i]); if (use_subtxn) + { RollbackAndReleaseCurrentSubTransaction(); + MemoryContextSwitchTo(ccxt); + CurrentResourceOwner = cowner; + } } /*