From 47d90b741d097f2881b258de3aedcef2876b3f17 Mon Sep 17 00:00:00 2001 From: Fujii Masao Date: Mon, 26 May 2025 12:47:33 +0900 Subject: [PATCH 001/602] doc: Fix documenation for snapshot export in logical decoding. The documentation for exported snapshots in logical decoding previously stated that snapshot creation may fail on a hot standby. This is no longer accurate, as snapshot exporting on standbys has been supported since PostgreSQL 10. This commit removes the outdated description. Additionally, the docs referred to the NOEXPORT_SNAPSHOT option to suppress snapshot exporting in CREATE_REPLICATION_SLOT. However, since PostgreSQL 15, NOEXPORT_SNAPSHOT is considered legacy syntax and retained only for backward compatibility. This commit updates the documentation for v15 and later to use the modern equivalent: SNAPSHOT 'nothing'. The older syntax is preserved in documentation for v14 and earlier. Back-patched to all supported branches. Reported-by: Kevin K Biju Author: Fujii Masao Reviewed-by: Kevin K Biju Discussion: https://postgr.es/m/174791480466.798.17122832105389395178@wrigleys.postgresql.org Backpatch-through: 13 --- doc/src/sgml/logicaldecoding.sgml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/doc/src/sgml/logicaldecoding.sgml b/doc/src/sgml/logicaldecoding.sgml index 3f2bcd45a1ea7..dd9e83b08eaf1 100644 --- a/doc/src/sgml/logicaldecoding.sgml +++ b/doc/src/sgml/logicaldecoding.sgml @@ -455,9 +455,8 @@ postgres=# select * from pg_logical_slot_get_changes('regression_slot', NULL, NU using the slot's contents without losing any changes. - Creation of a snapshot is not always possible. In particular, it will - fail when connected to a hot standby. Applications that do not require - snapshot export may suppress it with the NOEXPORT_SNAPSHOT + Applications that do not require + snapshot export may suppress it with the SNAPSHOT 'nothing' option. From 3bcb554fd2154d2d9b25e942970163e581d0d5fc Mon Sep 17 00:00:00 2001 From: Amit Kapila Date: Mon, 26 May 2025 11:05:05 +0530 Subject: [PATCH 002/602] Doc: Make logical replication examples executable in bulk. To improve the usability of logical replication examples, we need to enable bulk copy-pasting of DML/DDL series. Currently, output command tags and prompts disrupt this workflow. While prompts are typically removed, converting them to comments is acceptable here, given the multi-server context. Additionally, ensure all examples containing operators like < and > are wrapped in CDATA blocks to guarantee correct rendering and consistency with other places. Author: David G. Johnston Reviewed-by: Peter Smith Discussion: https://postgr.es/m/CAKFQuwbhbL1uaDTuo9shmo1rA-fX6XGotR7qZQ7rd-ia5ZDoQA@mail.gmail.com --- doc/src/sgml/logical-replication.sgml | 449 ++++++++++---------------- 1 file changed, 177 insertions(+), 272 deletions(-) diff --git a/doc/src/sgml/logical-replication.sgml b/doc/src/sgml/logical-replication.sgml index f288c049a5c9a..686dd441d0223 100644 --- a/doc/src/sgml/logical-replication.sgml +++ b/doc/src/sgml/logical-replication.sgml @@ -363,34 +363,25 @@ Create some test tables on the publisher. -test_pub=# CREATE TABLE t1(a int, b text, PRIMARY KEY(a)); -CREATE TABLE -test_pub=# CREATE TABLE t2(c int, d text, PRIMARY KEY(c)); -CREATE TABLE -test_pub=# CREATE TABLE t3(e int, f text, PRIMARY KEY(e)); -CREATE TABLE +/* pub # */ CREATE TABLE t1(a int, b text, PRIMARY KEY(a)); +/* pub # */ CREATE TABLE t2(c int, d text, PRIMARY KEY(c)); +/* pub # */ CREATE TABLE t3(e int, f text, PRIMARY KEY(e)); Create the same tables on the subscriber. -test_sub=# CREATE TABLE t1(a int, b text, PRIMARY KEY(a)); -CREATE TABLE -test_sub=# CREATE TABLE t2(c int, d text, PRIMARY KEY(c)); -CREATE TABLE -test_sub=# CREATE TABLE t3(e int, f text, PRIMARY KEY(e)); -CREATE TABLE +/* sub # */ CREATE TABLE t1(a int, b text, PRIMARY KEY(a)); +/* sub # */ CREATE TABLE t2(c int, d text, PRIMARY KEY(c)); +/* sub # */ CREATE TABLE t3(e int, f text, PRIMARY KEY(e)); Insert data to the tables at the publisher side. -test_pub=# INSERT INTO t1 VALUES (1, 'one'), (2, 'two'), (3, 'three'); -INSERT 0 3 -test_pub=# INSERT INTO t2 VALUES (1, 'A'), (2, 'B'), (3, 'C'); -INSERT 0 3 -test_pub=# INSERT INTO t3 VALUES (1, 'i'), (2, 'ii'), (3, 'iii'); -INSERT 0 3 +/* pub # */ INSERT INTO t1 VALUES (1, 'one'), (2, 'two'), (3, 'three'); +/* pub # */ INSERT INTO t2 VALUES (1, 'A'), (2, 'B'), (3, 'C'); +/* pub # */ INSERT INTO t3 VALUES (1, 'i'), (2, 'ii'), (3, 'iii'); @@ -399,41 +390,34 @@ INSERT 0 3 publish operations. The publication pub3b has a row filter (see ). - -test_pub=# CREATE PUBLICATION pub1 FOR TABLE t1; -CREATE PUBLICATION -test_pub=# CREATE PUBLICATION pub2 FOR TABLE t2 WITH (publish = 'truncate'); -CREATE PUBLICATION -test_pub=# CREATE PUBLICATION pub3a FOR TABLE t3 WITH (publish = 'truncate'); -CREATE PUBLICATION -test_pub=# CREATE PUBLICATION pub3b FOR TABLE t3 WHERE (e > 5); -CREATE PUBLICATION - + 5); +]]> Create subscriptions for the publications. The subscription sub3 subscribes to both pub3a and pub3b. All subscriptions will copy initial data by default. -test_sub=# CREATE SUBSCRIPTION sub1 -test_sub-# CONNECTION 'host=localhost dbname=test_pub application_name=sub1' -test_sub-# PUBLICATION pub1; -CREATE SUBSCRIPTION -test_sub=# CREATE SUBSCRIPTION sub2 -test_sub-# CONNECTION 'host=localhost dbname=test_pub application_name=sub2' -test_sub-# PUBLICATION pub2; -CREATE SUBSCRIPTION -test_sub=# CREATE SUBSCRIPTION sub3 -test_sub-# CONNECTION 'host=localhost dbname=test_pub application_name=sub3' -test_sub-# PUBLICATION pub3a, pub3b; -CREATE SUBSCRIPTION +/* sub # */ CREATE SUBSCRIPTION sub1 +/* sub - */ CONNECTION 'host=localhost dbname=test_pub application_name=sub1' +/* sub - */ PUBLICATION pub1; +/* sub # */ CREATE SUBSCRIPTION sub2 +/* sub - */ CONNECTION 'host=localhost dbname=test_pub application_name=sub2' +/* sub - */ PUBLICATION pub2; +/* sub # */ CREATE SUBSCRIPTION sub3 +/* sub - */ CONNECTION 'host=localhost dbname=test_pub application_name=sub3' +/* sub - */ PUBLICATION pub3a, pub3b; Observe that initial table data is copied, regardless of the publish operation of the publication. -test_sub=# SELECT * FROM t1; +/* sub # */ SELECT * FROM t1; a | b ---+------- 1 | one @@ -441,7 +425,7 @@ test_sub=# SELECT * FROM t1; 3 | three (3 rows) -test_sub=# SELECT * FROM t2; +/* sub # */ SELECT * FROM t2; c | d ---+--- 1 | A @@ -456,7 +440,7 @@ test_sub=# SELECT * FROM t2; it means the copied table t3 contains all rows even when they do not match the row filter of publication pub3b. -test_sub=# SELECT * FROM t3; +/* sub # */ SELECT * FROM t3; e | f ---+----- 1 | i @@ -468,18 +452,15 @@ test_sub=# SELECT * FROM t3; Insert more data to the tables at the publisher side. -test_pub=# INSERT INTO t1 VALUES (4, 'four'), (5, 'five'), (6, 'six'); -INSERT 0 3 -test_pub=# INSERT INTO t2 VALUES (4, 'D'), (5, 'E'), (6, 'F'); -INSERT 0 3 -test_pub=# INSERT INTO t3 VALUES (4, 'iv'), (5, 'v'), (6, 'vi'); -INSERT 0 3 +/* pub # */ INSERT INTO t1 VALUES (4, 'four'), (5, 'five'), (6, 'six'); +/* pub # */ INSERT INTO t2 VALUES (4, 'D'), (5, 'E'), (6, 'F'); +/* pub # */ INSERT INTO t3 VALUES (4, 'iv'), (5, 'v'), (6, 'vi'); Now the publisher side data looks like: -test_pub=# SELECT * FROM t1; +/* pub # */ SELECT * FROM t1; a | b ---+------- 1 | one @@ -490,7 +471,7 @@ test_pub=# SELECT * FROM t1; 6 | six (6 rows) -test_pub=# SELECT * FROM t2; +/* pub # */ SELECT * FROM t2; c | d ---+--- 1 | A @@ -501,7 +482,7 @@ test_pub=# SELECT * FROM t2; 6 | F (6 rows) -test_pub=# SELECT * FROM t3; +/* pub # */ SELECT * FROM t3; e | f ---+----- 1 | i @@ -521,7 +502,7 @@ test_pub=# SELECT * FROM t3; only replicate data that matches the row filter of pub3b. Now the subscriber side data looks like: -test_sub=# SELECT * FROM t1; +/* sub # */ SELECT * FROM t1; a | b ---+------- 1 | one @@ -532,7 +513,7 @@ test_sub=# SELECT * FROM t1; 6 | six (6 rows) -test_sub=# SELECT * FROM t2; +/* sub # */ SELECT * FROM t2; c | d ---+--- 1 | A @@ -540,7 +521,7 @@ test_sub=# SELECT * FROM t2; 3 | C (3 rows) -test_sub=# SELECT * FROM t3; +/* sub # */ SELECT * FROM t3; e | f ---+----- 1 | i @@ -567,8 +548,7 @@ test_sub=# SELECT * FROM t3; First, create a publication for the examples to use. -test_pub=# CREATE PUBLICATION pub1 FOR ALL TABLES; -CREATE PUBLICATION +/* pub # */ CREATE PUBLICATION pub1 FOR ALL TABLES; Example 1: Where the subscription says connect = false @@ -579,13 +559,12 @@ CREATE PUBLICATION Create the subscription. -test_sub=# CREATE SUBSCRIPTION sub1 -test_sub-# CONNECTION 'host=localhost dbname=test_pub' -test_sub-# PUBLICATION pub1 -test_sub-# WITH (connect=false); +/* sub # */ CREATE SUBSCRIPTION sub1 +/* sub - */ CONNECTION 'host=localhost dbname=test_pub' +/* sub - */ PUBLICATION pub1 +/* sub - */ WITH (connect=false); WARNING: subscription was created, but is not connected HINT: To initiate replication, you must manually create the replication slot, enable the subscription, and refresh the subscription. -CREATE SUBSCRIPTION @@ -594,7 +573,7 @@ CREATE SUBSCRIPTION specified during CREATE SUBSCRIPTION, the name of the slot to create is same as the subscription name, e.g. "sub1". -test_pub=# SELECT * FROM pg_create_logical_replication_slot('sub1', 'pgoutput'); +/* pub # */ SELECT * FROM pg_create_logical_replication_slot('sub1', 'pgoutput'); slot_name | lsn -----------+----------- sub1 | 0/19404D0 @@ -606,10 +585,8 @@ test_pub=# SELECT * FROM pg_create_logical_replication_slot('sub1', 'pgoutput'); On the subscriber, complete the activation of the subscription. After this the tables of pub1 will start replicating. -test_sub=# ALTER SUBSCRIPTION sub1 ENABLE; -ALTER SUBSCRIPTION -test_sub=# ALTER SUBSCRIPTION sub1 REFRESH PUBLICATION; -ALTER SUBSCRIPTION +/* sub # */ ALTER SUBSCRIPTION sub1 ENABLE; +/* sub # */ ALTER SUBSCRIPTION sub1 REFRESH PUBLICATION; @@ -625,13 +602,12 @@ ALTER SUBSCRIPTION Create the subscription. -test_sub=# CREATE SUBSCRIPTION sub1 -test_sub-# CONNECTION 'host=localhost dbname=test_pub' -test_sub-# PUBLICATION pub1 -test_sub-# WITH (connect=false, slot_name='myslot'); +/* sub # */ CREATE SUBSCRIPTION sub1 +/* sub - */ CONNECTION 'host=localhost dbname=test_pub' +/* sub - */ PUBLICATION pub1 +/* sub - */ WITH (connect=false, slot_name='myslot'); WARNING: subscription was created, but is not connected HINT: To initiate replication, you must manually create the replication slot, enable the subscription, and refresh the subscription. -CREATE SUBSCRIPTION @@ -639,7 +615,7 @@ CREATE SUBSCRIPTION On the publisher, manually create a slot using the same name that was specified during CREATE SUBSCRIPTION, e.g. "myslot". -test_pub=# SELECT * FROM pg_create_logical_replication_slot('myslot', 'pgoutput'); +/* pub # */ SELECT * FROM pg_create_logical_replication_slot('myslot', 'pgoutput'); slot_name | lsn -----------+----------- myslot | 0/19059A0 @@ -651,10 +627,8 @@ test_pub=# SELECT * FROM pg_create_logical_replication_slot('myslot', 'pgoutput' On the subscriber, the remaining subscription activation steps are the same as before. -test_sub=# ALTER SUBSCRIPTION sub1 ENABLE; -ALTER SUBSCRIPTION -test_sub=# ALTER SUBSCRIPTION sub1 REFRESH PUBLICATION; -ALTER SUBSCRIPTION +/* sub # */ ALTER SUBSCRIPTION sub1 ENABLE; +/* sub # */ ALTER SUBSCRIPTION sub1 REFRESH PUBLICATION; @@ -669,18 +643,17 @@ ALTER SUBSCRIPTION enabled = false, and create_slot = false are also needed. -test_sub=# CREATE SUBSCRIPTION sub1 -test_sub-# CONNECTION 'host=localhost dbname=test_pub' -test_sub-# PUBLICATION pub1 -test_sub-# WITH (slot_name=NONE, enabled=false, create_slot=false); -CREATE SUBSCRIPTION +/* sub # */ CREATE SUBSCRIPTION sub1 +/* sub - */ CONNECTION 'host=localhost dbname=test_pub' +/* sub - */ PUBLICATION pub1 +/* sub - */ WITH (slot_name=NONE, enabled=false, create_slot=false); On the publisher, manually create a slot using any name, e.g. "myslot". -test_pub=# SELECT * FROM pg_create_logical_replication_slot('myslot', 'pgoutput'); +/* pub # */ SELECT * FROM pg_create_logical_replication_slot('myslot', 'pgoutput'); slot_name | lsn -----------+----------- myslot | 0/1905930 @@ -692,18 +665,15 @@ test_pub=# SELECT * FROM pg_create_logical_replication_slot('myslot', 'pgoutput' On the subscriber, associate the subscription with the slot name just created. -test_sub=# ALTER SUBSCRIPTION sub1 SET (slot_name='myslot'); -ALTER SUBSCRIPTION +/* sub # */ ALTER SUBSCRIPTION sub1 SET (slot_name='myslot'); The remaining subscription activation steps are same as before. -test_sub=# ALTER SUBSCRIPTION sub1 ENABLE; -ALTER SUBSCRIPTION -test_sub=# ALTER SUBSCRIPTION sub1 REFRESH PUBLICATION; -ALTER SUBSCRIPTION +/* sub # */ ALTER SUBSCRIPTION sub1 ENABLE; +/* sub # */ ALTER SUBSCRIPTION sub1 REFRESH PUBLICATION; @@ -752,7 +722,7 @@ ALTER SUBSCRIPTION will return the relevant replication slots associated with the failover-enabled subscriptions. -test_sub=# SELECT +/* sub # */ SELECT array_agg(quote_literal(s.subslotname)) AS slots FROM pg_subscription s WHERE s.subfailover AND @@ -775,7 +745,7 @@ test_sub=# SELECT as they will either be dropped or re-created on the new primary server in those cases. -test_sub=# SELECT +/* sub # */ SELECT array_agg(quote_literal(slot_name)) AS slots FROM ( @@ -794,7 +764,7 @@ test_sub=# SELECT Check that the logical replication slots identified above exist on the standby server and are ready for failover. -test_standby=# SELECT slot_name, (synced AND NOT temporary AND NOT conflicting) AS failover_ready +/* standby # */ SELECT slot_name, (synced AND NOT temporary AND NOT conflicting) AS failover_ready FROM pg_replication_slots WHERE slot_name IN ('sub1','sub2','sub3', 'pg_16394_sync_16385_7394666715149055164'); @@ -1024,12 +994,9 @@ test_standby=# SELECT slot_name, (synced AND NOT temporary AND NOT conflicting) Create some tables to be used in the following examples. -test_pub=# CREATE TABLE t1(a int, b int, c text, PRIMARY KEY(a,c)); -CREATE TABLE -test_pub=# CREATE TABLE t2(d int, e int, f int, PRIMARY KEY(d)); -CREATE TABLE -test_pub=# CREATE TABLE t3(g int, h int, i int, PRIMARY KEY(g)); -CREATE TABLE +/* pub # */ CREATE TABLE t1(a int, b int, c text, PRIMARY KEY(a,c)); +/* pub # */ CREATE TABLE t2(d int, e int, f int, PRIMARY KEY(d)); +/* pub # */ CREATE TABLE t3(g int, h int, i int, PRIMARY KEY(g)); @@ -1038,43 +1005,40 @@ CREATE TABLE p2 has two tables. Table t1 has no row filter, and table t2 has a row filter. Publication p3 has two tables, and both of them have a row filter. - -test_pub=# CREATE PUBLICATION p1 FOR TABLE t1 WHERE (a > 5 AND c = 'NSW'); -CREATE PUBLICATION -test_pub=# CREATE PUBLICATION p2 FOR TABLE t1, t2 WHERE (e = 99); -CREATE PUBLICATION -test_pub=# CREATE PUBLICATION p3 FOR TABLE t2 WHERE (d = 10), t3 WHERE (g = 10); -CREATE PUBLICATION - + 5 AND c = 'NSW'); +/* pub # */ CREATE PUBLICATION p2 FOR TABLE t1, t2 WHERE (e = 99); +/* pub # */ CREATE PUBLICATION p3 FOR TABLE t2 WHERE (d = 10), t3 WHERE (g = 10); +]]> psql can be used to show the row filter expressions (if defined) for each publication. - -test_pub=# \dRp+ - Publication p1 + 5) AND (c = 'NSW'::text)) + "public.t1" WHERE ((a > 5) AND (c = 'NSW'::text)) - Publication p2 + Publication p2 Owner | All tables | Inserts | Updates | Deletes | Truncates | Via root ----------+------------+---------+---------+---------+-----------+---------- postgres | f | t | t | t | t | f Tables: - "public.t1" - "public.t2" WHERE (e = 99) + "public.t1" + "public.t2" WHERE (e = 99) - Publication p3 + Publication p3 Owner | All tables | Inserts | Updates | Deletes | Truncates | Via root ----------+------------+---------+---------+---------+-----------+---------- postgres | f | t | t | t | t | f Tables: - "public.t2" WHERE (d = 10) - "public.t3" WHERE (g = 10) - + "public.t2" WHERE (d = 10) + "public.t3" WHERE (g = 10) +]]> psql can be used to show the row filter expressions (if @@ -1082,8 +1046,8 @@ Tables: of two publications, but has a row filter only in p1. See that table t2 is a member of two publications, and has a different row filter in each of them. - -test_pub=# \d t1 + 5) AND (c = 'NSW'::text)) "p2" -test_pub=# \d t2 +/* pub # */ \d t2 Table "public.t2" Column | Type | Collation | Nullable | Default --------+---------+-----------+----------+--------- @@ -1109,7 +1073,7 @@ Publications: "p2" WHERE (e = 99) "p3" WHERE (d = 10) -test_pub=# \d t3 +/* pub # */ \d t3 Table "public.t3" Column | Type | Collation | Nullable | Default --------+---------+-----------+----------+--------- @@ -1120,43 +1084,33 @@ Indexes: "t3_pkey" PRIMARY KEY, btree (g) Publications: "p3" WHERE (g = 10) - +]]> On the subscriber node, create a table t1 with the same definition as the one on the publisher, and also create the subscription s1 that subscribes to the publication p1. -test_sub=# CREATE TABLE t1(a int, b int, c text, PRIMARY KEY(a,c)); -CREATE TABLE -test_sub=# CREATE SUBSCRIPTION s1 -test_sub-# CONNECTION 'host=localhost dbname=test_pub application_name=s1' -test_sub-# PUBLICATION p1; -CREATE SUBSCRIPTION +/* sub # */ CREATE TABLE t1(a int, b int, c text, PRIMARY KEY(a,c)); +/* sub # */ CREATE SUBSCRIPTION s1 +/* sub - */ CONNECTION 'host=localhost dbname=test_pub application_name=s1' +/* sub - */ PUBLICATION p1; Insert some rows. Only the rows satisfying the t1 WHERE clause of publication p1 are replicated. -test_pub=# INSERT INTO t1 VALUES (2, 102, 'NSW'); -INSERT 0 1 -test_pub=# INSERT INTO t1 VALUES (3, 103, 'QLD'); -INSERT 0 1 -test_pub=# INSERT INTO t1 VALUES (4, 104, 'VIC'); -INSERT 0 1 -test_pub=# INSERT INTO t1 VALUES (5, 105, 'ACT'); -INSERT 0 1 -test_pub=# INSERT INTO t1 VALUES (6, 106, 'NSW'); -INSERT 0 1 -test_pub=# INSERT INTO t1 VALUES (7, 107, 'NT'); -INSERT 0 1 -test_pub=# INSERT INTO t1 VALUES (8, 108, 'QLD'); -INSERT 0 1 -test_pub=# INSERT INTO t1 VALUES (9, 109, 'NSW'); -INSERT 0 1 - -test_pub=# SELECT * FROM t1; +/* pub # */ INSERT INTO t1 VALUES (2, 102, 'NSW'); +/* pub # */ INSERT INTO t1 VALUES (3, 103, 'QLD'); +/* pub # */ INSERT INTO t1 VALUES (4, 104, 'VIC'); +/* pub # */ INSERT INTO t1 VALUES (5, 105, 'ACT'); +/* pub # */ INSERT INTO t1 VALUES (6, 106, 'NSW'); +/* pub # */ INSERT INTO t1 VALUES (7, 107, 'NT'); +/* pub # */ INSERT INTO t1 VALUES (8, 108, 'QLD'); +/* pub # */ INSERT INTO t1 VALUES (9, 109, 'NSW'); + +/* pub # */ SELECT * FROM t1; a | b | c ---+-----+----- 2 | 102 | NSW @@ -1170,7 +1124,7 @@ test_pub=# SELECT * FROM t1; (8 rows) -test_sub=# SELECT * FROM t1; +/* sub # */ SELECT * FROM t1; a | b | c ---+-----+----- 6 | 106 | NSW @@ -1184,10 +1138,9 @@ test_sub=# SELECT * FROM t1; p1. The UPDATE replicates the change as normal. -test_pub=# UPDATE t1 SET b = 999 WHERE a = 6; -UPDATE 1 +/* pub # */ UPDATE t1 SET b = 999 WHERE a = 6; -test_pub=# SELECT * FROM t1; +/* pub # */ SELECT * FROM t1; a | b | c ---+-----+----- 2 | 102 | NSW @@ -1201,7 +1154,7 @@ test_pub=# SELECT * FROM t1; (8 rows) -test_sub=# SELECT * FROM t1; +/* sub # */ SELECT * FROM t1; a | b | c ---+-----+----- 9 | 109 | NSW @@ -1216,10 +1169,9 @@ test_sub=# SELECT * FROM t1; transformed into an INSERT and the change is replicated. See the new row on the subscriber. -test_pub=# UPDATE t1 SET a = 555 WHERE a = 2; -UPDATE 1 +/* pub # */ UPDATE t1 SET a = 555 WHERE a = 2; -test_pub=# SELECT * FROM t1; +/* pub # */ SELECT * FROM t1; a | b | c -----+-----+----- 3 | 103 | QLD @@ -1233,7 +1185,7 @@ test_pub=# SELECT * FROM t1; (8 rows) -test_sub=# SELECT * FROM t1; +/* sub # */ SELECT * FROM t1; a | b | c -----+-----+----- 9 | 109 | NSW @@ -1249,10 +1201,9 @@ test_sub=# SELECT * FROM t1; transformed into a DELETE and the change is replicated. See that the row is removed from the subscriber. -test_pub=# UPDATE t1 SET c = 'VIC' WHERE a = 9; -UPDATE 1 +/* pub # */ UPDATE t1 SET c = 'VIC' WHERE a = 9; -test_pub=# SELECT * FROM t1; +/* pub # */ SELECT * FROM t1; a | b | c -----+-----+----- 3 | 103 | QLD @@ -1266,7 +1217,7 @@ test_pub=# SELECT * FROM t1; (8 rows) -test_sub=# SELECT * FROM t1; +/* sub # */ SELECT * FROM t1; a | b | c -----+-----+----- 6 | 999 | NSW @@ -1284,17 +1235,13 @@ test_sub=# SELECT * FROM t1; Create a partitioned table on the publisher. -test_pub=# CREATE TABLE parent(a int PRIMARY KEY) PARTITION BY RANGE(a); -CREATE TABLE -test_pub=# CREATE TABLE child PARTITION OF parent DEFAULT; -CREATE TABLE +/* pub # */ CREATE TABLE parent(a int PRIMARY KEY) PARTITION BY RANGE(a); +/* pub # */ CREATE TABLE child PARTITION OF parent DEFAULT; Create the same tables on the subscriber. -test_sub=# CREATE TABLE parent(a int PRIMARY KEY) PARTITION BY RANGE(a); -CREATE TABLE -test_sub=# CREATE TABLE child PARTITION OF parent DEFAULT; -CREATE TABLE +/* sub # */ CREATE TABLE parent(a int PRIMARY KEY) PARTITION BY RANGE(a); +/* sub # */ CREATE TABLE child PARTITION OF parent DEFAULT; @@ -1302,16 +1249,14 @@ CREATE TABLE publication parameter publish_via_partition_root is set as true. There are row filters defined on both the partitioned table (parent), and on the partition (child). - -test_pub=# CREATE PUBLICATION p4 FOR TABLE parent WHERE (a < 5), child WHERE (a >= 5) -test_pub-# WITH (publish_via_partition_root=true); -CREATE PUBLICATION - - -test_sub=# CREATE SUBSCRIPTION s4 -test_sub-# CONNECTION 'host=localhost dbname=test_pub application_name=s4' -test_sub-# PUBLICATION p4; -CREATE SUBSCRIPTION += 5) +/* pub - */ WITH (publish_via_partition_root=true); +]]> + +/* sub # */ CREATE SUBSCRIPTION s4 +/* sub - */ CONNECTION 'host=localhost dbname=test_pub application_name=s4' +/* sub - */ PUBLICATION p4; @@ -1320,12 +1265,10 @@ CREATE SUBSCRIPTION parent (because publish_via_partition_root is true). -test_pub=# INSERT INTO parent VALUES (2), (4), (6); -INSERT 0 3 -test_pub=# INSERT INTO child VALUES (3), (5), (7); -INSERT 0 3 +/* pub # */ INSERT INTO parent VALUES (2), (4), (6); +/* pub # */ INSERT INTO child VALUES (3), (5), (7); -test_pub=# SELECT * FROM parent ORDER BY a; +/* pub # */ SELECT * FROM parent ORDER BY a; a --- 2 @@ -1337,7 +1280,7 @@ test_pub=# SELECT * FROM parent ORDER BY a; (6 rows) -test_sub=# SELECT * FROM parent ORDER BY a; +/* sub # */ SELECT * FROM parent ORDER BY a; a --- 2 @@ -1350,16 +1293,13 @@ test_sub=# SELECT * FROM parent ORDER BY a; Repeat the same test, but with a different value for publish_via_partition_root. The publication parameter publish_via_partition_root is set as false. A row filter is defined on the partition (child). += 5) +/* pub - */ WITH (publish_via_partition_root=false); +]]> -test_pub=# DROP PUBLICATION p4; -DROP PUBLICATION -test_pub=# CREATE PUBLICATION p4 FOR TABLE parent, child WHERE (a >= 5) -test_pub-# WITH (publish_via_partition_root=false); -CREATE PUBLICATION - - -test_sub=# ALTER SUBSCRIPTION s4 REFRESH PUBLICATION; -ALTER SUBSCRIPTION +/* sub # */ ALTER SUBSCRIPTION s4 REFRESH PUBLICATION; @@ -1367,14 +1307,11 @@ ALTER SUBSCRIPTION row filter of child (because publish_via_partition_root is false). -test_pub=# TRUNCATE parent; -TRUNCATE TABLE -test_pub=# INSERT INTO parent VALUES (2), (4), (6); -INSERT 0 3 -test_pub=# INSERT INTO child VALUES (3), (5), (7); -INSERT 0 3 +/* pub # */ TRUNCATE parent; +/* pub # */ INSERT INTO parent VALUES (2), (4), (6); +/* pub # */ INSERT INTO child VALUES (3), (5), (7); -test_pub=# SELECT * FROM parent ORDER BY a; +/* pub # */ SELECT * FROM parent ORDER BY a; a --- 2 @@ -1386,7 +1323,7 @@ test_pub=# SELECT * FROM parent ORDER BY a; (6 rows) -test_sub=# SELECT * FROM child ORDER BY a; +/* sub # */ SELECT * FROM child ORDER BY a; a --- 5 @@ -1505,8 +1442,7 @@ test_sub=# SELECT * FROM child ORDER BY a; Create a table t1 to be used in the following example. -test_pub=# CREATE TABLE t1(id int, a text, b text, c text, d text, e text, PRIMARY KEY(id)); -CREATE TABLE +/* pub # */ CREATE TABLE t1(id int, a text, b text, c text, d text, e text, PRIMARY KEY(id)); @@ -1515,15 +1451,14 @@ CREATE TABLE replicated. Notice that the order of column names in the column list does not matter. -test_pub=# CREATE PUBLICATION p1 FOR TABLE t1 (id, b, a, d); -CREATE PUBLICATION +/* pub # */ CREATE PUBLICATION p1 FOR TABLE t1 (id, b, a, d); psql can be used to show the column lists (if defined) for each publication. -test_pub=# \dRp+ +/* pub # */ \dRp+ Publication p1 Owner | All tables | Inserts | Updates | Deletes | Truncates | Via root ----------+------------+---------+---------+---------+-----------+---------- @@ -1536,7 +1471,7 @@ Tables: psql can be used to show the column lists (if defined) for each table. -test_pub=# \d t1 +/* pub # */ \d t1 Table "public.t1" Column | Type | Collation | Nullable | Default --------+---------+-----------+----------+--------- @@ -1559,24 +1494,19 @@ Publications: s1 that subscribes to the publication p1. -test_sub=# CREATE TABLE t1(id int, b text, a text, d text, PRIMARY KEY(id)); -CREATE TABLE -test_sub=# CREATE SUBSCRIPTION s1 -test_sub-# CONNECTION 'host=localhost dbname=test_pub application_name=s1' -test_sub-# PUBLICATION p1; -CREATE SUBSCRIPTION +/* sub # */ CREATE TABLE t1(id int, b text, a text, d text, PRIMARY KEY(id)); +/* sub # */ CREATE SUBSCRIPTION s1 +/* sub - */ CONNECTION 'host=localhost dbname=test_pub application_name=s1' +/* sub - */ PUBLICATION p1; On the publisher node, insert some rows to table t1. -test_pub=# INSERT INTO t1 VALUES(1, 'a-1', 'b-1', 'c-1', 'd-1', 'e-1'); -INSERT 0 1 -test_pub=# INSERT INTO t1 VALUES(2, 'a-2', 'b-2', 'c-2', 'd-2', 'e-2'); -INSERT 0 1 -test_pub=# INSERT INTO t1 VALUES(3, 'a-3', 'b-3', 'c-3', 'd-3', 'e-3'); -INSERT 0 1 -test_pub=# SELECT * FROM t1 ORDER BY id; +/* pub # */ INSERT INTO t1 VALUES(1, 'a-1', 'b-1', 'c-1', 'd-1', 'e-1'); +/* pub # */ INSERT INTO t1 VALUES(2, 'a-2', 'b-2', 'c-2', 'd-2', 'e-2'); +/* pub # */ INSERT INTO t1 VALUES(3, 'a-3', 'b-3', 'c-3', 'd-3', 'e-3'); +/* pub # */ SELECT * FROM t1 ORDER BY id; id | a | b | c | d | e ----+-----+-----+-----+-----+----- 1 | a-1 | b-1 | c-1 | d-1 | e-1 @@ -1589,7 +1519,7 @@ test_pub=# SELECT * FROM t1 ORDER BY id; Only data from the column list of publication p1 is replicated. -test_sub=# SELECT * FROM t1 ORDER BY id; +/* sub # */ SELECT * FROM t1 ORDER BY id; id | b | a | d ----+-----+-----+----- 1 | b-1 | a-1 | d-1 @@ -1617,13 +1547,10 @@ test_sub=# SELECT * FROM t1 ORDER BY id; For example, note below that subscriber table generated column value comes from the subscriber column's calculation. -test_pub=# CREATE TABLE tab_gen_to_gen (a int, b int GENERATED ALWAYS AS (a + 1) STORED); -CREATE TABLE -test_pub=# INSERT INTO tab_gen_to_gen VALUES (1),(2),(3); -INSERT 0 3 -test_pub=# CREATE PUBLICATION pub1 FOR TABLE tab_gen_to_gen; -CREATE PUBLICATION -test_pub=# SELECT * FROM tab_gen_to_gen; +/* pub # */ CREATE TABLE tab_gen_to_gen (a int, b int GENERATED ALWAYS AS (a + 1) STORED); +/* pub # */ INSERT INTO tab_gen_to_gen VALUES (1),(2),(3); +/* pub # */ CREATE PUBLICATION pub1 FOR TABLE tab_gen_to_gen; +/* pub # */ SELECT * FROM tab_gen_to_gen; a | b ---+--- 1 | 2 @@ -1631,11 +1558,9 @@ test_pub=# SELECT * FROM tab_gen_to_gen; 3 | 4 (3 rows) -test_sub=# CREATE TABLE tab_gen_to_gen (a int, b int GENERATED ALWAYS AS (a * 100) STORED); -CREATE TABLE -test_sub=# CREATE SUBSCRIPTION sub1 CONNECTION 'dbname=test_pub' PUBLICATION pub1; -CREATE SUBSCRIPTION -test_sub=# SELECT * from tab_gen_to_gen; +/* sub # */ CREATE TABLE tab_gen_to_gen (a int, b int GENERATED ALWAYS AS (a * 100) STORED); +/* sub # */ CREATE SUBSCRIPTION sub1 CONNECTION 'dbname=test_pub' PUBLICATION pub1; +/* sub # */ SELECT * from tab_gen_to_gen; a | b ---+---- 1 | 100 @@ -2690,8 +2615,7 @@ CONTEXT: processing remote data for replication origin "pg_16395" during "INSER ALTER SUBSCRIPTION ... DISABLE, e.g.: -node2=# ALTER SUBSCRIPTION sub1_node1_node2 DISABLE; -ALTER SUBSCRIPTION +/* node2 # */ ALTER SUBSCRIPTION sub1_node1_node2 DISABLE; @@ -2780,8 +2704,7 @@ pg_ctl -D /opt/PostgreSQL/data2_upgraded start -l logfile and now, e.g.: -node2=# CREATE TABLE distributors (did integer PRIMARY KEY, name varchar(40)); -CREATE TABLE +/* node2 # */ CREATE TABLE distributors (did integer PRIMARY KEY, name varchar(40)); @@ -2793,8 +2716,7 @@ CREATE TABLE ALTER SUBSCRIPTION ... ENABLE, e.g.: -node2=# ALTER SUBSCRIPTION sub1_node1_node2 ENABLE; -ALTER SUBSCRIPTION +/* node2 # */ ALTER SUBSCRIPTION sub1_node1_node2 ENABLE; @@ -2805,8 +2727,7 @@ ALTER SUBSCRIPTION ALTER SUBSCRIPTION ... REFRESH PUBLICATION, e.g.: -node2=# ALTER SUBSCRIPTION sub1_node1_node2 REFRESH PUBLICATION; -ALTER SUBSCRIPTION +/* node2 # */ ALTER SUBSCRIPTION sub1_node1_node2 REFRESH PUBLICATION; @@ -2844,8 +2765,7 @@ ALTER SUBSCRIPTION ALTER SUBSCRIPTION ... DISABLE, e.g.: -node2=# ALTER SUBSCRIPTION sub1_node1_node2 DISABLE; -ALTER SUBSCRIPTION +/* node2 # */ ALTER SUBSCRIPTION sub1_node1_node2 DISABLE; @@ -2896,8 +2816,7 @@ pg_ctl -D /opt/PostgreSQL/data1_upgraded start -l logfile ALTER SUBSCRIPTION ... DISABLE, e.g.: -node3=# ALTER SUBSCRIPTION sub1_node2_node3 DISABLE; -ALTER SUBSCRIPTION +/* node3 # */ ALTER SUBSCRIPTION sub1_node2_node3 DISABLE; @@ -2948,8 +2867,7 @@ pg_ctl -D /opt/PostgreSQL/data2_upgraded start -l logfile and now, e.g.: -node2=# CREATE TABLE distributors (did integer PRIMARY KEY, name varchar(40)); -CREATE TABLE +/* node2 # */ CREATE TABLE distributors (did integer PRIMARY KEY, name varchar(40)); @@ -2961,8 +2879,7 @@ CREATE TABLE ALTER SUBSCRIPTION ... ENABLE, e.g.: -node2=# ALTER SUBSCRIPTION sub1_node1_node2 ENABLE; -ALTER SUBSCRIPTION +/* node2 # */ ALTER SUBSCRIPTION sub1_node1_node2 ENABLE; @@ -2973,8 +2890,7 @@ ALTER SUBSCRIPTION ALTER SUBSCRIPTION ... REFRESH PUBLICATION, e.g.: -node2=# ALTER SUBSCRIPTION sub1_node1_node2 REFRESH PUBLICATION; -ALTER SUBSCRIPTION +/* node2 # */ ALTER SUBSCRIPTION sub1_node1_node2 REFRESH PUBLICATION; @@ -3025,8 +2941,7 @@ pg_ctl -D /opt/PostgreSQL/data3_upgraded start -l logfile and now, e.g.: -node3=# CREATE TABLE distributors (did integer PRIMARY KEY, name varchar(40)); -CREATE TABLE +/* node3 # */ CREATE TABLE distributors (did integer PRIMARY KEY, name varchar(40)); @@ -3038,8 +2953,7 @@ CREATE TABLE ALTER SUBSCRIPTION ... ENABLE, e.g.: -node3=# ALTER SUBSCRIPTION sub1_node2_node3 ENABLE; -ALTER SUBSCRIPTION +/* node3 # */ ALTER SUBSCRIPTION sub1_node2_node3 ENABLE; @@ -3050,8 +2964,7 @@ ALTER SUBSCRIPTION ALTER SUBSCRIPTION ... REFRESH PUBLICATION, e.g.: -node3=# ALTER SUBSCRIPTION sub1_node2_node3 REFRESH PUBLICATION; -ALTER SUBSCRIPTION +/* node3 # */ ALTER SUBSCRIPTION sub1_node2_node3 REFRESH PUBLICATION; @@ -3082,8 +2995,7 @@ ALTER SUBSCRIPTION ALTER SUBSCRIPTION ... DISABLE, e.g.: -node2=# ALTER SUBSCRIPTION sub1_node1_node2 DISABLE; -ALTER SUBSCRIPTION +/* node2 # */ ALTER SUBSCRIPTION sub1_node1_node2 DISABLE; @@ -3134,8 +3046,7 @@ pg_ctl -D /opt/PostgreSQL/data1_upgraded start -l logfile ALTER SUBSCRIPTION ... ENABLE, e.g.: -node2=# ALTER SUBSCRIPTION sub1_node1_node2 ENABLE; -ALTER SUBSCRIPTION +/* node2 # */ ALTER SUBSCRIPTION sub1_node1_node2 ENABLE; @@ -3146,8 +3057,7 @@ ALTER SUBSCRIPTION node2 between and now, e.g.: -node1=# CREATE TABLE distributors (did integer PRIMARY KEY, name varchar(40)); -CREATE TABLE +/* node1 # */ CREATE TABLE distributors (did integer PRIMARY KEY, name varchar(40)); @@ -3160,8 +3070,7 @@ CREATE TABLE ALTER SUBSCRIPTION ... REFRESH PUBLICATION, e.g.: -node1=# ALTER SUBSCRIPTION sub1_node2_node1 REFRESH PUBLICATION; -ALTER SUBSCRIPTION +/* node1 # */ ALTER SUBSCRIPTION sub1_node2_node1 REFRESH PUBLICATION; @@ -3173,8 +3082,7 @@ ALTER SUBSCRIPTION ALTER SUBSCRIPTION ... DISABLE, e.g.: -node1=# ALTER SUBSCRIPTION sub1_node2_node1 DISABLE; -ALTER SUBSCRIPTION +/* node1 # */ ALTER SUBSCRIPTION sub1_node2_node1 DISABLE; @@ -3225,8 +3133,7 @@ pg_ctl -D /opt/PostgreSQL/data2_upgraded start -l logfile ALTER SUBSCRIPTION ... ENABLE, e.g.: -node1=# ALTER SUBSCRIPTION sub1_node2_node1 ENABLE; -ALTER SUBSCRIPTION +/* node1 # */ ALTER SUBSCRIPTION sub1_node2_node1 ENABLE; @@ -3237,8 +3144,7 @@ ALTER SUBSCRIPTION the upgraded node1 between and now, e.g.: -node2=# CREATE TABLE distributors (did integer PRIMARY KEY, name varchar(40)); -CREATE TABLE +/* node2 # */ CREATE TABLE distributors (did integer PRIMARY KEY, name varchar(40)); @@ -3250,8 +3156,7 @@ CREATE TABLE ALTER SUBSCRIPTION ... REFRESH PUBLICATION, e.g.: -node2=# ALTER SUBSCRIPTION sub1_node1_node2 REFRESH PUBLICATION; -ALTER SUBSCRIPTION +/* node2 # */ ALTER SUBSCRIPTION sub1_node1_node2 REFRESH PUBLICATION; From 52a1df85f29b7955e33e5ce69bc8b46ee499dbfd Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Mon, 26 May 2025 17:28:37 +0900 Subject: [PATCH 003/602] Fix race condition in subscription TAP test 021_twophase The test did not wait for all the subscriptions to have caught up when dropping the subscription "tab_copy". In a slow environment, it could be possible for the replay of the COMMIT PREPARED transaction "mygid" to not be confirmed yet, causing one prepared transaction to be left around before moving to the next steps of the test. One failure noticed is a transaction found in pg_prepared_xacts for the cases where copy_data = false and two_phase = true, but there should be none after dropping the subscription. As an extra safety measure, a check is added before dropping the subscription, scanning pg_prepared_xacts to make sure that no prepared transactions are left once both subscriptions have caught up. Issue introduced by a8fd13cab0ba, fixing a problem similar to eaf5321c3524. Per buildfarm member kestrel. Author: Vignesh C Reviewed-by: Amit Kapila Discussion: https://postgr.es/m/CALDaNm329QaZ+bwU--bW6GjbNSZ8-38cDE8QWofafub7NV67oA@mail.gmail.com Backpatch-through: 15 --- src/test/subscription/t/021_twophase.pl | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/test/subscription/t/021_twophase.pl b/src/test/subscription/t/021_twophase.pl index 61c427aed2100..b8e4242d1f121 100644 --- a/src/test/subscription/t/021_twophase.pl +++ b/src/test/subscription/t/021_twophase.pl @@ -373,7 +373,14 @@ $node_publisher->safe_psql('postgres', "SELECT count(*) FROM tab_copy;"); is($result, qq(6), 'publisher inserted data'); +# Wait for both subscribers to catchup $node_publisher->wait_for_catchup($appname_copy); +$node_publisher->wait_for_catchup($appname); + +# Make sure there are no prepared transactions on the subscriber +$result = $node_subscriber->safe_psql('postgres', + "SELECT count(*) FROM pg_prepared_xacts;"); +is($result, qq(0), 'should be no prepared transactions on subscriber'); $result = $node_subscriber->safe_psql('postgres', "SELECT count(*) FROM tab_copy;"); From 1f62dbf5f0ae375320b11362e0fccf8ec2766288 Mon Sep 17 00:00:00 2001 From: Daniel Gustafsson Date: Mon, 26 May 2025 13:30:01 +0200 Subject: [PATCH 004/602] doc: Fix wording in JIT README Remove superfluous 'is' from sentence. Author: Yugo Nagata Reviewed-by: Daniel Gustafsson Discussion: https://postgr.es/m/20250526154412.5f77dfead87af9afc089cc48@sraoss.co.jp --- src/backend/jit/README | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/backend/jit/README b/src/backend/jit/README index 5427bdf2153ff..a40950dfb039e 100644 --- a/src/backend/jit/README +++ b/src/backend/jit/README @@ -205,7 +205,7 @@ The ability to do so allows us to get the LLVM IR for all operators bitcode files get installed into the server's $pkglibdir/bitcode/postgres/ Using existing LLVM functionality (for parallel LTO compilation), -additionally an index is over these is stored to +additionally an index over these is stored to $pkglibdir/bitcode/postgres.index.bc Similarly extensions can install code into From c53f3b9cc8b1ece539291860a2def55ce1abf64a Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Mon, 26 May 2025 14:52:00 +0200 Subject: [PATCH 005/602] Improve file_copy_method entry in postgresql.conf.sample Improve the wording of the comment a bit, fix whitespace. Also move the entry so that the section order is consistent with config.sgml. --- src/backend/utils/misc/postgresql.conf.sample | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 63f991c4f9305..87ce76b18f41c 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -178,13 +178,11 @@ #temp_file_limit = -1 # limits per-process temp file space # in kilobytes, or -1 for no limit +#file_copy_method = copy # copy, clone (if supported by OS) + #max_notify_queue_pages = 1048576 # limits the number of SLRU pages allocated # for NOTIFY / LISTEN queue -#file_copy_method = copy # the default is the first option - # copy - # clone (if system support is available) - # - Kernel Resources - #max_files_per_process = 1000 # min 64 From 4c08ecd1618e3c5da664ba24a4aa7052772c4616 Mon Sep 17 00:00:00 2001 From: Masahiko Sawada Date: Tue, 27 May 2025 11:42:36 -0700 Subject: [PATCH 006/602] Fix assertion when decrementing eager scanning success and failure counters. Previously, we asserted that the eager scan's success and failure counters were positive before decrementing them. However, this assumption was incorrect, as it's possible that some blocks have already been eagerly scanned by the time eager scanning is disabled. This commit replaces the assertions with guards to handle this scenario gracefully. With this change, we continue to allow read-ahead operations by the read stream that exceed the success and failure caps. While there is a possibility that overruns will trigger eager scans of additional pages, this does not pose a practical concern as the overruns will not be substantial and remain within an acceptable range. Reviewed-by: Melanie Plageman Discussion: https://postgr.es/m/CAD21AoConf6tkVCv-=JhQJj56kYsDwo4jG5+WqgT+ukSkYomSQ@mail.gmail.com --- src/backend/access/heap/vacuumlazy.c | 29 ++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index f28326bad0951..708674d8fcf4a 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -1413,11 +1413,25 @@ lazy_scan_heap(LVRelState *vacrel) if (vm_page_frozen) { - Assert(vacrel->eager_scan_remaining_successes > 0); - vacrel->eager_scan_remaining_successes--; + if (vacrel->eager_scan_remaining_successes > 0) + vacrel->eager_scan_remaining_successes--; if (vacrel->eager_scan_remaining_successes == 0) { + /* + * Report only once that we disabled eager scanning. We + * may eagerly read ahead blocks in excess of the success + * or failure caps before attempting to freeze them, so we + * could reach here even after disabling additional eager + * scanning. + */ + if (vacrel->eager_scan_max_fails_per_region > 0) + ereport(vacrel->verbose ? INFO : DEBUG2, + (errmsg("disabling eager scanning after freezing %u eagerly scanned blocks of \"%s.%s.%s\"", + orig_eager_scan_success_limit, + vacrel->dbname, vacrel->relnamespace, + vacrel->relname))); + /* * If we hit our success cap, permanently disable eager * scanning by setting the other eager scan management @@ -1426,19 +1440,10 @@ lazy_scan_heap(LVRelState *vacrel) vacrel->eager_scan_remaining_fails = 0; vacrel->next_eager_scan_region_start = InvalidBlockNumber; vacrel->eager_scan_max_fails_per_region = 0; - - ereport(vacrel->verbose ? INFO : DEBUG2, - (errmsg("disabling eager scanning after freezing %u eagerly scanned blocks of \"%s.%s.%s\"", - orig_eager_scan_success_limit, - vacrel->dbname, vacrel->relnamespace, - vacrel->relname))); } } - else - { - Assert(vacrel->eager_scan_remaining_fails > 0); + else if (vacrel->eager_scan_remaining_fails > 0) vacrel->eager_scan_remaining_fails--; - } } /* From 34eb2a80d5a3ea1392e4a9f16ebac54e96d0c6ed Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Tue, 27 May 2025 12:53:10 -0700 Subject: [PATCH 007/602] Change pg_dump default for statistics export. Set the default behavior of pg_dump and pg_dumpall to be --no-statistics. Leave the default for pg_restore and pg_upgrade to be --with-statistics. Discussion: https://postgr.es/m/CA+TgmoZ9=RnWcCOZiKYYjZs_AW1P4QXCw--h4dOLLHuf1Omung@mail.gmail.com Reviewed-by: Greg Sabino Mullane Reviewed-by: Nathan Bossart Reviewed-by: Robert Haas Reviewed-by: Tom Lane --- doc/src/sgml/ref/pg_dump.sgml | 20 +++++----- doc/src/sgml/ref/pg_dumpall.sgml | 20 +++++----- src/bin/pg_dump/pg_backup_archiver.c | 2 +- src/bin/pg_dump/t/002_pg_dump.pl | 59 ++++++++++++++++++++++++++++ src/bin/pg_upgrade/dump.c | 2 +- 5 files changed, 81 insertions(+), 22 deletions(-) diff --git a/doc/src/sgml/ref/pg_dump.sgml b/doc/src/sgml/ref/pg_dump.sgml index c10bca63e55c5..d7595a7e5468d 100644 --- a/doc/src/sgml/ref/pg_dump.sgml +++ b/doc/src/sgml/ref/pg_dump.sgml @@ -1134,7 +1134,7 @@ PostgreSQL documentation - Do not dump statistics. + Do not dump statistics. This is the default. @@ -1461,7 +1461,7 @@ PostgreSQL documentation - Dump statistics. This is the default. + Dump statistics. @@ -1681,14 +1681,14 @@ CREATE DATABASE foo WITH TEMPLATE template0; - By default, pg_dump will include most optimizer - statistics in the resulting dump file. However, some statistics may not be - included, such as those created explicitly with or custom statistics added by an - extension. Therefore, it may be useful to run ANALYZE - after restoring from a dump file to ensure optimal performance; see and for more - information. + If is specified, + pg_dump will include most optimizer statistics in the + resulting dump file. However, some statistics may not be included, such as + those created explicitly with or + custom statistics added by an extension. Therefore, it may be useful to + run ANALYZE after restoring from a dump file to ensure + optimal performance; see and for more information. diff --git a/doc/src/sgml/ref/pg_dumpall.sgml b/doc/src/sgml/ref/pg_dumpall.sgml index 8c5141d036c76..723a466cfaad6 100644 --- a/doc/src/sgml/ref/pg_dumpall.sgml +++ b/doc/src/sgml/ref/pg_dumpall.sgml @@ -567,7 +567,7 @@ exclude database PATTERN - Do not dump statistics. + Do not dump statistics. This is the default. @@ -741,7 +741,7 @@ exclude database PATTERN - Dump statistics. This is the default. + Dump statistics. @@ -957,14 +957,14 @@ exclude database PATTERN - By default, pg_dumpall will include most optimizer - statistics in the resulting dump file. However, some statistics may not be - included, such as those created explicitly with or custom statistics added by an - extension. Therefore, it may be useful to run ANALYZE - on each database after restoring from a dump file to ensure optimal - performance. You can also run vacuumdb -a -z to analyze - all databases. + If is specified, + pg_dumpall will include most optimizer statistics in the + resulting dump file. However, some statistics may not be included, such as + those created explicitly with or + custom statistics added by an extension. Therefore, it may be useful to + run ANALYZE on each database after restoring from a dump + file to ensure optimal performance. You can also run vacuumdb -a + -z to analyze all databases. diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c index afa42337b110f..175fe9c427395 100644 --- a/src/bin/pg_dump/pg_backup_archiver.c +++ b/src/bin/pg_dump/pg_backup_archiver.c @@ -152,7 +152,7 @@ InitDumpOptions(DumpOptions *opts) opts->dumpSections = DUMP_UNSECTIONED; opts->dumpSchema = true; opts->dumpData = true; - opts->dumpStatistics = true; + opts->dumpStatistics = false; } /* diff --git a/src/bin/pg_dump/t/002_pg_dump.pl b/src/bin/pg_dump/t/002_pg_dump.pl index cf34f71ea1196..386e21e0c596a 100644 --- a/src/bin/pg_dump/t/002_pg_dump.pl +++ b/src/bin/pg_dump/t/002_pg_dump.pl @@ -68,6 +68,7 @@ '--no-data', '--sequence-data', '--binary-upgrade', + '--with-statistics', '--dbname' => 'postgres', # alternative way to specify database ], restore_cmd => [ @@ -75,6 +76,7 @@ '--format' => 'custom', '--verbose', '--file' => "$tempdir/binary_upgrade.sql", + '--with-statistics', "$tempdir/binary_upgrade.dump", ], }, @@ -88,11 +90,13 @@ '--format' => 'custom', '--compress' => '1', '--file' => "$tempdir/compression_gzip_custom.dump", + '--with-statistics', 'postgres', ], restore_cmd => [ 'pg_restore', '--file' => "$tempdir/compression_gzip_custom.sql", + '--with-statistics', "$tempdir/compression_gzip_custom.dump", ], command_like => { @@ -115,6 +119,7 @@ '--format' => 'directory', '--compress' => 'gzip:1', '--file' => "$tempdir/compression_gzip_dir", + '--with-statistics', 'postgres', ], # Give coverage for manually compressed blobs.toc files during @@ -132,6 +137,7 @@ 'pg_restore', '--jobs' => '2', '--file' => "$tempdir/compression_gzip_dir.sql", + '--with-statistics', "$tempdir/compression_gzip_dir", ], }, @@ -144,6 +150,7 @@ '--format' => 'plain', '--compress' => '1', '--file' => "$tempdir/compression_gzip_plain.sql.gz", + '--with-statistics', 'postgres', ], # Decompress the generated file to run through the tests. @@ -162,11 +169,13 @@ '--format' => 'custom', '--compress' => 'lz4', '--file' => "$tempdir/compression_lz4_custom.dump", + '--with-statistics', 'postgres', ], restore_cmd => [ 'pg_restore', '--file' => "$tempdir/compression_lz4_custom.sql", + '--with-statistics', "$tempdir/compression_lz4_custom.dump", ], command_like => { @@ -189,6 +198,7 @@ '--format' => 'directory', '--compress' => 'lz4:1', '--file' => "$tempdir/compression_lz4_dir", + '--with-statistics', 'postgres', ], # Verify that data files were compressed @@ -200,6 +210,7 @@ 'pg_restore', '--jobs' => '2', '--file' => "$tempdir/compression_lz4_dir.sql", + '--with-statistics', "$tempdir/compression_lz4_dir", ], }, @@ -212,6 +223,7 @@ '--format' => 'plain', '--compress' => 'lz4', '--file' => "$tempdir/compression_lz4_plain.sql.lz4", + '--with-statistics', 'postgres', ], # Decompress the generated file to run through the tests. @@ -233,11 +245,13 @@ '--format' => 'custom', '--compress' => 'zstd', '--file' => "$tempdir/compression_zstd_custom.dump", + '--with-statistics', 'postgres', ], restore_cmd => [ 'pg_restore', '--file' => "$tempdir/compression_zstd_custom.sql", + '--with-statistics', "$tempdir/compression_zstd_custom.dump", ], command_like => { @@ -259,6 +273,7 @@ '--format' => 'directory', '--compress' => 'zstd:1', '--file' => "$tempdir/compression_zstd_dir", + '--with-statistics', 'postgres', ], # Give coverage for manually compressed blobs.toc files during @@ -279,6 +294,7 @@ 'pg_restore', '--jobs' => '2', '--file' => "$tempdir/compression_zstd_dir.sql", + '--with-statistics', "$tempdir/compression_zstd_dir", ], }, @@ -292,6 +308,7 @@ '--format' => 'plain', '--compress' => 'zstd:long', '--file' => "$tempdir/compression_zstd_plain.sql.zst", + '--with-statistics', 'postgres', ], # Decompress the generated file to run through the tests. @@ -310,6 +327,7 @@ 'pg_dump', '--no-sync', '--file' => "$tempdir/clean.sql", '--clean', + '--with-statistics', '--dbname' => 'postgres', # alternative way to specify database ], }, @@ -320,6 +338,7 @@ '--clean', '--if-exists', '--encoding' => 'UTF8', # no-op, just for testing + '--with-statistics', 'postgres', ], }, @@ -338,6 +357,7 @@ '--create', '--no-reconnect', # no-op, just for testing '--verbose', + '--with-statistics', 'postgres', ], }, @@ -356,6 +376,7 @@ dump_cmd => [ 'pg_dump', '--no-sync', '--file' => "$tempdir/defaults.sql", + '--with-statistics', 'postgres', ], }, @@ -364,6 +385,7 @@ dump_cmd => [ 'pg_dump', '--no-sync', '--file' => "$tempdir/defaults_no_public.sql", + '--with-statistics', 'regress_pg_dump_test', ], }, @@ -373,6 +395,7 @@ 'pg_dump', '--no-sync', '--clean', '--file' => "$tempdir/defaults_no_public_clean.sql", + '--with-statistics', 'regress_pg_dump_test', ], }, @@ -381,6 +404,7 @@ dump_cmd => [ 'pg_dump', '--no-sync', '--file' => "$tempdir/defaults_public_owner.sql", + '--with-statistics', 'regress_public_owner', ], }, @@ -395,12 +419,14 @@ 'pg_dump', '--format' => 'custom', '--file' => "$tempdir/defaults_custom_format.dump", + '--with-statistics', 'postgres', ], restore_cmd => [ 'pg_restore', '--format' => 'custom', '--file' => "$tempdir/defaults_custom_format.sql", + '--with-statistics', "$tempdir/defaults_custom_format.dump", ], command_like => { @@ -425,12 +451,14 @@ 'pg_dump', '--format' => 'directory', '--file' => "$tempdir/defaults_dir_format", + '--with-statistics', 'postgres', ], restore_cmd => [ 'pg_restore', '--format' => 'directory', '--file' => "$tempdir/defaults_dir_format.sql", + '--with-statistics', "$tempdir/defaults_dir_format", ], command_like => { @@ -456,11 +484,13 @@ '--format' => 'directory', '--jobs' => 2, '--file' => "$tempdir/defaults_parallel", + '--with-statistics', 'postgres', ], restore_cmd => [ 'pg_restore', '--file' => "$tempdir/defaults_parallel.sql", + '--with-statistics', "$tempdir/defaults_parallel", ], }, @@ -472,12 +502,14 @@ 'pg_dump', '--format' => 'tar', '--file' => "$tempdir/defaults_tar_format.tar", + '--with-statistics', 'postgres', ], restore_cmd => [ 'pg_restore', '--format' => 'tar', '--file' => "$tempdir/defaults_tar_format.sql", + '--with-statistics', "$tempdir/defaults_tar_format.tar", ], }, @@ -486,6 +518,7 @@ 'pg_dump', '--no-sync', '--file' => "$tempdir/exclude_dump_test_schema.sql", '--exclude-schema' => 'dump_test', + '--with-statistics', 'postgres', ], }, @@ -494,6 +527,7 @@ 'pg_dump', '--no-sync', '--file' => "$tempdir/exclude_test_table.sql", '--exclude-table' => 'dump_test.test_table', + '--with-statistics', 'postgres', ], }, @@ -502,6 +536,7 @@ 'pg_dump', '--no-sync', '--file' => "$tempdir/exclude_measurement.sql", '--exclude-table-and-children' => 'dump_test.measurement', + '--with-statistics', 'postgres', ], }, @@ -511,6 +546,7 @@ '--file' => "$tempdir/exclude_measurement_data.sql", '--exclude-table-data-and-children' => 'dump_test.measurement', '--no-unlogged-table-data', + '--with-statistics', 'postgres', ], }, @@ -520,6 +556,7 @@ '--file' => "$tempdir/exclude_test_table_data.sql", '--exclude-table-data' => 'dump_test.test_table', '--no-unlogged-table-data', + '--with-statistics', 'postgres', ], }, @@ -538,6 +575,7 @@ '--file' => "$tempdir/pg_dumpall_globals.sql", '--globals-only', '--no-sync', + '--with-statistics', ], }, pg_dumpall_globals_clean => { @@ -547,12 +585,14 @@ '--globals-only', '--clean', '--no-sync', + '--with-statistics', ], }, pg_dumpall_dbprivs => { dump_cmd => [ 'pg_dumpall', '--no-sync', '--file' => "$tempdir/pg_dumpall_dbprivs.sql", + '--with-statistics', ], }, pg_dumpall_exclude => { @@ -562,6 +602,7 @@ '--file' => "$tempdir/pg_dumpall_exclude.sql", '--exclude-database' => '*dump_test*', '--no-sync', + '--with-statistics', ], }, no_toast_compression => { @@ -569,6 +610,7 @@ 'pg_dump', '--no-sync', '--file' => "$tempdir/no_toast_compression.sql", '--no-toast-compression', + '--with-statistics', 'postgres', ], }, @@ -577,6 +619,7 @@ 'pg_dump', '--no-sync', '--file' => "$tempdir/no_large_objects.sql", '--no-large-objects', + '--with-statistics', 'postgres', ], }, @@ -585,6 +628,7 @@ 'pg_dump', '--no-sync', '--file' => "$tempdir/no_policies.sql", '--no-policies', + '--with-statistics', 'postgres', ], }, @@ -593,6 +637,7 @@ 'pg_dump', '--no-sync', '--file' => "$tempdir/no_privs.sql", '--no-privileges', + '--with-statistics', 'postgres', ], }, @@ -601,6 +646,7 @@ 'pg_dump', '--no-sync', '--file' => "$tempdir/no_owner.sql", '--no-owner', + '--with-statistics', 'postgres', ], }, @@ -609,6 +655,7 @@ 'pg_dump', '--no-sync', '--file' => "$tempdir/no_table_access_method.sql", '--no-table-access-method', + '--with-statistics', 'postgres', ], }, @@ -617,6 +664,7 @@ 'pg_dump', '--no-sync', '--file' => "$tempdir/only_dump_test_schema.sql", '--schema' => 'dump_test', + '--with-statistics', 'postgres', ], }, @@ -627,6 +675,7 @@ '--table' => 'dump_test.test_table', '--lock-wait-timeout' => (1000 * $PostgreSQL::Test::Utils::timeout_default), + '--with-statistics', 'postgres', ], }, @@ -637,6 +686,7 @@ '--table-and-children' => 'dump_test.measurement', '--lock-wait-timeout' => (1000 * $PostgreSQL::Test::Utils::timeout_default), + '--with-statistics', 'postgres', ], }, @@ -646,6 +696,7 @@ '--file' => "$tempdir/role.sql", '--role' => 'regress_dump_test_role', '--schema' => 'dump_test_second_schema', + '--with-statistics', 'postgres', ], }, @@ -658,11 +709,13 @@ '--file' => "$tempdir/role_parallel", '--role' => 'regress_dump_test_role', '--schema' => 'dump_test_second_schema', + '--with-statistics', 'postgres', ], restore_cmd => [ 'pg_restore', '--file' => "$tempdir/role_parallel.sql", + '--with-statistics', "$tempdir/role_parallel", ], }, @@ -691,6 +744,7 @@ 'pg_dump', '--no-sync', '--file' => "$tempdir/section_pre_data.sql", '--section' => 'pre-data', + '--with-statistics', 'postgres', ], }, @@ -699,6 +753,7 @@ 'pg_dump', '--no-sync', '--file' => "$tempdir/section_data.sql", '--section' => 'data', + '--with-statistics', 'postgres', ], }, @@ -707,6 +762,7 @@ 'pg_dump', '--no-sync', '--file' => "$tempdir/section_post_data.sql", '--section' => 'post-data', + '--with-statistics', 'postgres', ], }, @@ -717,6 +773,7 @@ '--schema' => 'dump_test', '--large-objects', '--no-large-objects', + '--with-statistics', 'postgres', ], }, @@ -732,6 +789,7 @@ 'pg_dump', '--no-sync', "--file=$tempdir/no_data_no_schema.sql", '--no-data', '--no-schema', 'postgres', + '--with-statistics', ], }, statistics_only => { @@ -752,6 +810,7 @@ dump_cmd => [ 'pg_dump', '--no-sync', "--file=$tempdir/no_schema.sql", '--no-schema', + '--with-statistics', 'postgres', ], },); diff --git a/src/bin/pg_upgrade/dump.c b/src/bin/pg_upgrade/dump.c index 23cb08e83476e..183f08ce1e86f 100644 --- a/src/bin/pg_upgrade/dump.c +++ b/src/bin/pg_upgrade/dump.c @@ -58,7 +58,7 @@ generate_old_dump(void) (user_opts.transfer_mode == TRANSFER_MODE_SWAP) ? "" : "--sequence-data", log_opts.verbose ? "--verbose" : "", - user_opts.do_statistics ? "" : "--no-statistics", + user_opts.do_statistics ? "--with-statistics" : "--no-statistics", log_opts.dumpdir, sql_file_name, escaped_connstr.data); From 08b8aa1748400ca7dcecb289ffca5e90ac33d274 Mon Sep 17 00:00:00 2001 From: Bruce Momjian Date: Tue, 27 May 2025 17:34:45 -0400 Subject: [PATCH 008/602] doc PG 18 relnotes: fix markup Reported-by: Peter Smith Discussion: https://postgr.es/m/CAHut+PswZ7wFtpNgv3bdtYK5D0eGMpvz4CcnAxvj7gR_acazGQ@mail.gmail.com --- doc/src/sgml/release-18.sgml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/src/sgml/release-18.sgml b/doc/src/sgml/release-18.sgml index 2ae03065f9451..67fde0c1baa51 100644 --- a/doc/src/sgml/release-18.sgml +++ b/doc/src/sgml/release-18.sgml @@ -2745,7 +2745,7 @@ This is to handle cases where a pre-Postgres 18 cluster's default CPU signedness - Logical Replication Applications> + Logical Replication Applications From 3e782ca32225e5d5219251a5a3c06698c1e824f8 Mon Sep 17 00:00:00 2001 From: Bruce Momjian Date: Tue, 27 May 2025 17:50:52 -0400 Subject: [PATCH 009/602] doc PG 18 relnotes: add removal details to MD5 item Reported-by: Nathan Bossart Author: Nathan Bossart Discussion: https://postgr.es/m/aDXLoTcBYjfyqeTA@nathan --- doc/src/sgml/release-18.sgml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/src/sgml/release-18.sgml b/doc/src/sgml/release-18.sgml index 67fde0c1baa51..619592bd882d1 100644 --- a/doc/src/sgml/release-18.sgml +++ b/doc/src/sgml/release-18.sgml @@ -82,7 +82,8 @@ Deprecate MD5 password authentication (Nathan Bossart) -Warnings generated by their use can be disabled by the server variable md5_password_warnings. +Support for MD5 passwords will be removed in a future major version release. CREATE ROLE and ALTER ROLE now emit deprecation warnings when setting MD5 passwords. +These warnings can be disabled by setting the md5_password_warnings parameter to "off". From d46911e584d48ee01d5bf75699a77fbf635c865d Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Wed, 28 May 2025 08:58:40 +0900 Subject: [PATCH 010/602] Fix conversion of SIMILAR TO regexes for character classes The code that translates SIMILAR TO pattern matching expressions to POSIX-style regular expressions did not consider that square brackets can be nested. For example, in an expression like [[:alpha:]%_], the logic replaced the placeholders '_' and '%' but it should not. This commit fixes the conversion logic by tracking the nesting level of square brackets marking character class areas, while considering that in expressions like []] or [^]] the first closing square bracket is a regular character. Multiple tests are added to show how the conversions should or should not apply applied while in a character class area, with specific cases added for all the characters converted outside character classes like an opening parenthesis '(', dollar sign '$', etc. Author: Laurenz Albe Reviewed-by: Tom Lane Reviewed-by: Michael Paquier Discussion: https://postgr.es/m/16ab039d1af455652bdf4173402ddda145f2c73b.camel@cybertec.at Backpatch-through: 13 --- src/backend/utils/adt/regexp.c | 38 +++++++++++++--- src/test/regress/expected/strings.out | 62 +++++++++++++++++++++++++++ src/test/regress/sql/strings.sql | 20 +++++++++ 3 files changed, 114 insertions(+), 6 deletions(-) diff --git a/src/backend/utils/adt/regexp.c b/src/backend/utils/adt/regexp.c index edee1f7880bde..6e2864cbbda8c 100644 --- a/src/backend/utils/adt/regexp.c +++ b/src/backend/utils/adt/regexp.c @@ -773,8 +773,11 @@ similar_escape_internal(text *pat_text, text *esc_text) int plen, elen; bool afterescape = false; - bool incharclass = false; int nquotes = 0; + int charclass_depth = 0; /* Nesting level of character classes, + * encompassed by square brackets */ + int charclass_start = 0; /* State of the character class start, + * for carets */ p = VARDATA_ANY(pat_text); plen = VARSIZE_ANY_EXHDR(pat_text); @@ -904,7 +907,7 @@ similar_escape_internal(text *pat_text, text *esc_text) /* fast path */ if (afterescape) { - if (pchar == '"' && !incharclass) /* escape-double-quote? */ + if (pchar == '"' && charclass_depth < 1) /* escape-double-quote? */ { /* emit appropriate part separator, per notes above */ if (nquotes == 0) @@ -953,18 +956,41 @@ similar_escape_internal(text *pat_text, text *esc_text) /* SQL escape character; do not send to output */ afterescape = true; } - else if (incharclass) + else if (charclass_depth > 0) { if (pchar == '\\') *r++ = '\\'; *r++ = pchar; - if (pchar == ']') - incharclass = false; + + /* + * Ignore a closing bracket at the start of a character class. + * Such a bracket is taken literally rather than closing the + * class. "charclass_start" is 1 right at the beginning of a + * class and 2 after an initial caret. + */ + if (pchar == ']' && charclass_start > 2) + charclass_depth--; + else if (pchar == '[') + charclass_depth++; + + /* + * If there is a caret right after the opening bracket, it negates + * the character class, but a following closing bracket should + * still be treated as a normal character. That holds only for + * the first caret, so only the values 1 and 2 mean that closing + * brackets should be taken literally. + */ + if (pchar == '^') + charclass_start++; + else + charclass_start = 3; /* definitely past the start */ } else if (pchar == '[') { + /* start of a character class */ *r++ = pchar; - incharclass = true; + charclass_depth++; + charclass_start = 1; } else if (pchar == '%') { diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out index 174f0a68331bd..9c0aba5c02996 100644 --- a/src/test/regress/expected/strings.out +++ b/src/test/regress/expected/strings.out @@ -614,6 +614,68 @@ SELECT 'abcdefg' SIMILAR TO '_bcd%' ESCAPE NULL AS null; SELECT 'abcdefg' SIMILAR TO '_bcd#%' ESCAPE '##' AS error; ERROR: invalid escape string HINT: Escape string must be empty or one character. +-- Characters that should be left alone in character classes when a +-- SIMILAR TO regexp pattern is converted to POSIX style. +-- Underscore "_" +EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '_[_[:alpha:]_]_'; + QUERY PLAN +------------------------------------------------ + Seq Scan on text_tbl + Filter: (f1 ~ '^(?:.[_[:alpha:]_].)$'::text) +(2 rows) + +-- Percentage "%" +EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '%[%[:alnum:]%]%'; + QUERY PLAN +-------------------------------------------------- + Seq Scan on text_tbl + Filter: (f1 ~ '^(?:.*[%[:alnum:]%].*)$'::text) +(2 rows) + +-- Dot "." +EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '.[.[:alnum:].].'; + QUERY PLAN +-------------------------------------------------- + Seq Scan on text_tbl + Filter: (f1 ~ '^(?:\.[.[:alnum:].]\.)$'::text) +(2 rows) + +-- Dollar "$" +EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '$[$[:alnum:]$]$'; + QUERY PLAN +-------------------------------------------------- + Seq Scan on text_tbl + Filter: (f1 ~ '^(?:\$[$[:alnum:]$]\$)$'::text) +(2 rows) + +-- Opening parenthesis "(" +EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '([([:alnum:](]('; +ERROR: invalid regular expression: parentheses () not balanced +-- Caret "^" +EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '^[^[:alnum:]^[^^][[^^]][\^][[\^]]\^]^'; + QUERY PLAN +------------------------------------------------------------------------ + Seq Scan on text_tbl + Filter: (f1 ~ '^(?:\^[^[:alnum:]^[^^][[^^]][\^][[\^]]\^]\^)$'::text) +(2 rows) + +-- Closing square bracket "]" at the beginning of character class +EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '[]%][^]%][^%]%'; + QUERY PLAN +------------------------------------------------ + Seq Scan on text_tbl + Filter: (f1 ~ '^(?:[]%][^]%][^%].*)$'::text) +(2 rows) + +-- Closing square bracket effective after two carets at the beginning +-- of character class. +EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '[^^]^'; + QUERY PLAN +--------------------------------------- + Seq Scan on text_tbl + Filter: (f1 ~ '^(?:[^^]\^)$'::text) +(2 rows) + -- Test backslash escapes in regexp_replace's replacement string SELECT regexp_replace('1112223333', E'(\\d{3})(\\d{3})(\\d{4})', E'(\\1) \\2-\\3'); regexp_replace diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql index f7b325baadf4d..2a4fa53499746 100644 --- a/src/test/regress/sql/strings.sql +++ b/src/test/regress/sql/strings.sql @@ -197,6 +197,26 @@ SELECT 'abcd\efg' SIMILAR TO '_bcd\%' ESCAPE '' AS true; SELECT 'abcdefg' SIMILAR TO '_bcd%' ESCAPE NULL AS null; SELECT 'abcdefg' SIMILAR TO '_bcd#%' ESCAPE '##' AS error; +-- Characters that should be left alone in character classes when a +-- SIMILAR TO regexp pattern is converted to POSIX style. +-- Underscore "_" +EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '_[_[:alpha:]_]_'; +-- Percentage "%" +EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '%[%[:alnum:]%]%'; +-- Dot "." +EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '.[.[:alnum:].].'; +-- Dollar "$" +EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '$[$[:alnum:]$]$'; +-- Opening parenthesis "(" +EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '([([:alnum:](]('; +-- Caret "^" +EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '^[^[:alnum:]^[^^][[^^]][\^][[\^]]\^]^'; +-- Closing square bracket "]" at the beginning of character class +EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '[]%][^]%][^%]%'; +-- Closing square bracket effective after two carets at the beginning +-- of character class. +EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '[^^]^'; + -- Test backslash escapes in regexp_replace's replacement string SELECT regexp_replace('1112223333', E'(\\d{3})(\\d{3})(\\d{4})', E'(\\1) \\2-\\3'); SELECT regexp_replace('foobarrbazz', E'(.)\\1', E'X\\&Y', 'g'); From 4fbb46f61271f4b7f46ecad3de608fc2f4d7d80f Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Wed, 28 May 2025 09:43:31 +0900 Subject: [PATCH 011/602] Adjust regex for test with opening parenthesis in character classes As written, the test was throwing an error because of an unbalanced parenthesis. The regex used in the test is adjusted to not fail and to test the case of an opening parenthesis in a character class after some nested square brackets. Oversight in d46911e584d4. Discussion: https://postgr.es/m/16ab039d1af455652bdf4173402ddda145f2c73b.camel@cybertec.at --- src/test/regress/expected/strings.out | 9 +++++++-- src/test/regress/sql/strings.sql | 2 +- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out index 9c0aba5c02996..788844abd20e3 100644 --- a/src/test/regress/expected/strings.out +++ b/src/test/regress/expected/strings.out @@ -649,8 +649,13 @@ EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '$[$[:alnum:]$]$' (2 rows) -- Opening parenthesis "(" -EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '([([:alnum:](]('; -ERROR: invalid regular expression: parentheses () not balanced +EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '()[([:alnum:](]()'; + QUERY PLAN +------------------------------------------------------ + Seq Scan on text_tbl + Filter: (f1 ~ '^(?:(?:)[([:alnum:](](?:))$'::text) +(2 rows) + -- Caret "^" EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '^[^[:alnum:]^[^^][[^^]][\^][[\^]]\^]^'; QUERY PLAN diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql index 2a4fa53499746..2577a42987de7 100644 --- a/src/test/regress/sql/strings.sql +++ b/src/test/regress/sql/strings.sql @@ -208,7 +208,7 @@ EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '.[.[:alnum:].].' -- Dollar "$" EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '$[$[:alnum:]$]$'; -- Opening parenthesis "(" -EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '([([:alnum:](]('; +EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '()[([:alnum:](]()'; -- Caret "^" EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '^[^[:alnum:]^[^^][[^^]][\^][[\^]]\^]^'; -- Closing square bracket "]" at the beginning of character class From c861092b0e0fe2393bc5910843254714121f9e8f Mon Sep 17 00:00:00 2001 From: Bruce Momjian Date: Wed, 28 May 2025 12:34:11 -0400 Subject: [PATCH 012/602] doc PG 18 relnotes: clarify multiplication item Reported-by: Dean Rasheed Author: Dean Rasheed Discussion: https://postgr.es/m/CAEZATCXZGU3LLMZHobYys1MLpyNMAus7+UUpWeeFYwSaPNC2CA@mail.gmail.com --- doc/src/sgml/release-18.sgml | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/doc/src/sgml/release-18.sgml b/doc/src/sgml/release-18.sgml index 619592bd882d1..536d7c049789c 100644 --- a/doc/src/sgml/release-18.sgml +++ b/doc/src/sgml/release-18.sgml @@ -2905,13 +2905,22 @@ Add ARM Neon and SVE CPU intrinsics for popcount (integer bit counting) (Chiranm -Improve the speed of multiplication (Joel Jacobson, Dean Rasheed) +Improve the speed of numeric multiplication and division (Joel Jacobson, Dean Rasheed) +§ +§ § +§ From be86ca103a41224e091a0d9aaf30605a935546ec Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Wed, 28 May 2025 13:29:32 -0400 Subject: [PATCH 013/602] Fix memory leakage when function compilation fails. In pl_comp.c, initially create the plpgsql function's cache context under the assumed-short-lived caller's context, and reparent it under CacheMemoryContext only upon success. This avoids a process-lifespan leak of 8kB or more if the function contains syntax errors. (This leakage has existed for a long time without many complaints, but as we move towards a possibly multi-threaded future, getting rid of process-lifespan leaks grows more important.) In funccache.c, arrange to reclaim the CachedFunction struct in case the language-specific compile callback function throws an error; previously, that resulted in an independent process-lifespan leak. This is arguably a new bug in v18, since the leakage now occurred for SQL-language functions as well as plpgsql. Also, don't fill fn_xmin/fn_tid/dcallback until after successful completion of the compile callback. This avoids a scenario where a partially-built function cache might appear already valid upon later inspection, and another scenario where dcallback might fail upon being presented with an incomplete cache entry. We would have to reach such a faulty cache entry via a pre-existing fn_extra pointer, so I'm not sure these scenarios correspond to any live bug. (The predecessor code in pl_comp.c never took any care about this, and we've heard no complaints about that.) Still, it's better to be careful. Given the lack of field complaints, I'm not very excited about back-patching any of this; but it seems still in-scope for v18. Discussion: https://postgr.es/m/999171.1748300004@sss.pgh.pa.us --- src/backend/utils/cache/funccache.c | 38 +++++++++++++++++++++++------ src/pl/plpgsql/src/pl_comp.c | 12 ++++++++- 2 files changed, 41 insertions(+), 9 deletions(-) diff --git a/src/backend/utils/cache/funccache.c b/src/backend/utils/cache/funccache.c index 150c502a6121b..afc048a051ead 100644 --- a/src/backend/utils/cache/funccache.c +++ b/src/backend/utils/cache/funccache.c @@ -491,6 +491,7 @@ cached_function_compile(FunctionCallInfo fcinfo, CachedFunctionHashKey hashkey; bool function_valid = false; bool hashkey_valid = false; + bool new_function = false; /* * Lookup the pg_proc tuple by Oid; we'll need it in any case @@ -570,13 +571,15 @@ cached_function_compile(FunctionCallInfo fcinfo, /* * Create the new function struct, if not done already. The function - * structs are never thrown away, so keep them in TopMemoryContext. + * cache entry will be kept for the life of the backend, so put it in + * TopMemoryContext. */ Assert(cacheEntrySize >= sizeof(CachedFunction)); if (function == NULL) { function = (CachedFunction *) MemoryContextAllocZero(TopMemoryContext, cacheEntrySize); + new_function = true; } else { @@ -585,17 +588,36 @@ cached_function_compile(FunctionCallInfo fcinfo, } /* - * Fill in the CachedFunction part. fn_hashkey and use_count remain - * zeroes for now. + * However, if function compilation fails, we'd like not to leak the + * function struct, so use a PG_TRY block to prevent that. (It's up + * to the compile callback function to avoid its own internal leakage + * in such cases.) Unfortunately, freeing the struct is only safe if + * we just allocated it: otherwise there are probably fn_extra + * pointers to it. */ - function->fn_xmin = HeapTupleHeaderGetRawXmin(procTup->t_data); - function->fn_tid = procTup->t_self; - function->dcallback = dcallback; + PG_TRY(); + { + /* + * Do the hard, language-specific part. + */ + ccallback(fcinfo, procTup, &hashkey, function, forValidator); + } + PG_CATCH(); + { + if (new_function) + pfree(function); + PG_RE_THROW(); + } + PG_END_TRY(); /* - * Do the hard, language-specific part. + * Fill in the CachedFunction part. (We do this last to prevent the + * function from looking valid before it's fully built.) fn_hashkey + * will be set by cfunc_hashtable_insert; use_count remains zero. */ - ccallback(fcinfo, procTup, &hashkey, function, forValidator); + function->fn_xmin = HeapTupleHeaderGetRawXmin(procTup->t_data); + function->fn_tid = procTup->t_self; + function->dcallback = dcallback; /* * Add the completed struct to the hash table. diff --git a/src/pl/plpgsql/src/pl_comp.c b/src/pl/plpgsql/src/pl_comp.c index 519f7695d7c14..b80c59447fb57 100644 --- a/src/pl/plpgsql/src/pl_comp.c +++ b/src/pl/plpgsql/src/pl_comp.c @@ -226,8 +226,13 @@ plpgsql_compile_callback(FunctionCallInfo fcinfo, /* * All the permanent output of compilation (e.g. parse tree) is kept in a * per-function memory context, so it can be reclaimed easily. + * + * While the func_cxt needs to be long-lived, we initially make it a child + * of the assumed-short-lived caller's context, and reparent it under + * CacheMemoryContext only upon success. This arrangement avoids memory + * leakage during compilation of a faulty function. */ - func_cxt = AllocSetContextCreate(TopMemoryContext, + func_cxt = AllocSetContextCreate(CurrentMemoryContext, "PL/pgSQL function", ALLOCSET_DEFAULT_SIZES); plpgsql_compile_tmp_cxt = MemoryContextSwitchTo(func_cxt); @@ -703,6 +708,11 @@ plpgsql_compile_callback(FunctionCallInfo fcinfo, if (plpgsql_DumpExecTree) plpgsql_dumptree(function); + /* + * All is well, so make the func_cxt long-lived + */ + MemoryContextSetParent(func_cxt, CacheMemoryContext); + /* * Pop the error context stack */ From e5d64fd6545d1339b58e604b812f1a1200b48839 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Wed, 28 May 2025 15:10:48 -0400 Subject: [PATCH 014/602] Tighten parsing of datetime input. ParseFraction only expects to deal with fields that contain a decimal point and digit(s). However it's possible in some edge cases for it to be passed input that doesn't look like that. In particular the input could look like a valid floating-point number, such as ".123e6". strtod() will happily eat that, possibly producing a result that is not within the expected range 0..1, which can result in integer overflow in the callers. That doesn't have any security consequences, but it's still not very desirable. Fix by checking that the input has the expected form. Similarly, DecodeNumberField only expects to deal with fields that contain a decimal point and digit(s), but it's sometimes abused to parse strings that might not look like that. This could result in failure to reject bogus input, yielding silly results. Again, fix by rejecting input that doesn't look as-expected. That decision also means that we can affirmatively answer the very old comment questioning whether we couldn't save some duplicative code by using ParseFractionalSecond here. While these changes should only reject input that nobody would consider valid, it still doesn't seem like a change to make in stable branches. Apply to HEAD only. Reported-by: Evgeniy Gorbanev Author: Tom Lane Discussion: https://postgr.es/m/1328335.1748371099@sss.pgh.pa.us --- src/backend/utils/adt/datetime.c | 44 +++++++++++++++----------- src/test/regress/expected/horology.out | 9 ++++++ src/test/regress/sql/horology.sql | 4 +++ 3 files changed, 38 insertions(+), 19 deletions(-) diff --git a/src/backend/utils/adt/datetime.c b/src/backend/utils/adt/datetime.c index 793d8a9adccdc..680fee2a8447e 100644 --- a/src/backend/utils/adt/datetime.c +++ b/src/backend/utils/adt/datetime.c @@ -702,9 +702,18 @@ ParseFraction(char *cp, double *frac) } else { + /* + * On the other hand, let's reject anything that's not digits after + * the ".". strtod is happy with input like ".123e9", but that'd + * break callers' expectation that the result is in 0..1. (It's quite + * difficult to get here with such input, but not impossible.) + */ + if (strspn(cp + 1, "0123456789") != strlen(cp + 1)) + return DTERR_BAD_FORMAT; + errno = 0; *frac = strtod(cp, &cp); - /* check for parse failure */ + /* check for parse failure (probably redundant given prior check) */ if (*cp != '\0' || errno != 0) return DTERR_BAD_FORMAT; } @@ -2958,31 +2967,28 @@ DecodeNumberField(int len, char *str, int fmask, { char *cp; + /* + * This function was originally meant to cope only with DTK_NUMBER fields, + * but we now sometimes abuse it to parse (parts of) DTK_DATE fields, + * which can contain letters and other punctuation. Reject if it's not a + * valid DTK_NUMBER, that is digits and decimal point(s). (ParseFraction + * will reject if there's more than one decimal point.) + */ + if (strspn(str, "0123456789.") != len) + return DTERR_BAD_FORMAT; + /* * Have a decimal point? Then this is a date or something with a seconds * field... */ if ((cp = strchr(str, '.')) != NULL) { - /* - * Can we use ParseFractionalSecond here? Not clear whether trailing - * junk should be rejected ... - */ - if (cp[1] == '\0') - { - /* avoid assuming that strtod will accept "." */ - *fsec = 0; - } - else - { - double frac; + int dterr; - errno = 0; - frac = strtod(cp, NULL); - if (errno != 0) - return DTERR_BAD_FORMAT; - *fsec = rint(frac * 1000000); - } + /* Convert the fraction and store at *fsec */ + dterr = ParseFractionalSecond(cp, fsec); + if (dterr) + return dterr; /* Now truncate off the fraction for further processing */ *cp = '\0'; len = strlen(str); diff --git a/src/test/regress/expected/horology.out b/src/test/regress/expected/horology.out index b90bfcd794f45..5ae93d8e8a515 100644 --- a/src/test/regress/expected/horology.out +++ b/src/test/regress/expected/horology.out @@ -467,6 +467,15 @@ SELECT timestamp with time zone 'Y2001M12D27H04MM05S06.789-08'; ERROR: invalid input syntax for type timestamp with time zone: "Y2001M12D27H04MM05S06.789-08" LINE 1: SELECT timestamp with time zone 'Y2001M12D27H04MM05S06.789-0... ^ +-- More examples we used to accept and should not +SELECT timestamp with time zone 'J2452271 T X03456-08'; +ERROR: invalid input syntax for type timestamp with time zone: "J2452271 T X03456-08" +LINE 1: SELECT timestamp with time zone 'J2452271 T X03456-08'; + ^ +SELECT timestamp with time zone 'J2452271 T X03456.001e6-08'; +ERROR: invalid input syntax for type timestamp with time zone: "J2452271 T X03456.001e6-08" +LINE 1: SELECT timestamp with time zone 'J2452271 T X03456.001e6-08'... + ^ -- conflicting fields should throw errors SELECT date '1995-08-06 epoch'; ERROR: invalid input syntax for type date: "1995-08-06 epoch" diff --git a/src/test/regress/sql/horology.sql b/src/test/regress/sql/horology.sql index 1310b43277380..8978249a5dc1e 100644 --- a/src/test/regress/sql/horology.sql +++ b/src/test/regress/sql/horology.sql @@ -102,6 +102,10 @@ SELECT date 'J J 1520447'; SELECT timestamp with time zone 'Y2001M12D27H04M05S06.789+08'; SELECT timestamp with time zone 'Y2001M12D27H04MM05S06.789-08'; +-- More examples we used to accept and should not +SELECT timestamp with time zone 'J2452271 T X03456-08'; +SELECT timestamp with time zone 'J2452271 T X03456.001e6-08'; + -- conflicting fields should throw errors SELECT date '1995-08-06 epoch'; SELECT date '1995-08-06 infinity'; From bf6034d00dd4c7e82fe281df5a22c8e49ad76dde Mon Sep 17 00:00:00 2001 From: Bruce Momjian Date: Wed, 28 May 2025 18:43:31 -0400 Subject: [PATCH 015/602] doc PG 18 relnotes: move ANALYZE item,split ANALYZE/EXPLAIN item Reported-by: Yugo Nagata Author: Yugo Nagata Discussion: https://postgr.es/m/20250528232503.7db770f651c2c821c0e3c1df@sraoss.co.jp --- doc/src/sgml/release-18.sgml | 47 +++++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 19 deletions(-) diff --git a/doc/src/sgml/release-18.sgml b/doc/src/sgml/release-18.sgml index 536d7c049789c..83ff2225396b6 100644 --- a/doc/src/sgml/release-18.sgml +++ b/doc/src/sgml/release-18.sgml @@ -762,6 +762,33 @@ mode; tracking must be enabled with the server variable track_cost_delay_timing. + + + + +Add WAL, CPU, and average read statistics output to ANALYZE VERBOSE (Anthonin Bonnefoy) +§ +§ + + + + + + + +Add full WAL buffer count to VACUUM/ANALYZE (VERBOSE) and autovacuum log output (Bertrand Drouvot) +§ + + + - - - -Add WAL, CPU, and average read statistics output to EXPLAIN ANALYZE VERBOSE (Anthonin Bonnefoy) -§ -§ - - - -Add full WAL buffer count to EXPLAIN (WAL), VACUUM/ANALYZE (VERBOSE), and autovacuum log output (Bertrand Drouvot) +Add full WAL buffer count to EXPLAIN (WAL) output (Bertrand Drouvot) § -§ From 089f27cf8aa4716acc5601abce4b14a372c8569e Mon Sep 17 00:00:00 2001 From: Bruce Momjian Date: Wed, 28 May 2025 21:42:34 -0400 Subject: [PATCH 016/602] doc: clarify log_connections new "setup_durations" output --- doc/src/sgml/config.sgml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index ca2a567b2b19f..f4a0191c55b8e 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -7527,12 +7527,12 @@ local0.* /var/log/postgresql setup_durations Logs the time spent establishing the connection and setting up the - backend at the time the connection is ready to execute its first - query. The log message includes the total setup duration, starting - from the postmaster accepting the incoming connection and ending - when the connection is ready for query. It also includes the time - it took to fork the new backend and the time it took to - authenticate the user. + backend until the connection is ready to execute its first + query. The log message includes three durations: the total + setup duration (starting from the postmaster accepting the + incoming connection and ending when the connection is ready + for query), the time it took to fork the new backend, and + the time it took to authenticate the user. From 35a428f30b15a3ab0c9a0cc26ade3b4cc3e47d8e Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Thu, 29 May 2025 11:26:03 +0900 Subject: [PATCH 017/602] pg_stat_statements: Fix parameter number gaps in normalized queries pg_stat_statements anticipates that certain constant locations may be recorded multiple times and attempts to avoid calculating a length for these locations in fill_in_constant_lengths(). However, during generate_normalized_query() where normalized query strings are generated, these locations are not excluded from consideration. This could increment the parameter number counter for every recorded occurrence at such a location, leading to an incorrect normalization in certain cases with gaps in the numbers reported. For example, take this query: SELECT WHERE '1' IN ('2'::int, '3'::int::text) Before this commit, it would be normalized like that, with gaps in the parameter numbers: SELECT WHERE $1 IN ($3::int, $4::int::text) However the correct, less confusing one should be like that: SELECT WHERE $1 IN ($2::int, $3::int::text) This commit fixes the computation of the parameter numbers to track the number of constants replaced with an $n by a separate counter instead of the iterator used to loop through the list of locations. The underlying query IDs are not changed, neither are the normalized strings for existing PGSS hash entries. New entries with fresh normalized queries would automatically get reshaped based on the new parameter numbering. Issue discovered while discussing a separate problem for HEAD, but this affects all the stable branches. Author: Sami Imseih Discussion: https://postgr.es/m/CAA5RZ0tzxvWXsacGyxrixdhy3tTTDfJQqxyFBRFh31nNHBQ5qA@mail.gmail.com Backpatch-through: 13 --- .../pg_stat_statements/expected/extended.out | 58 +++++++++++++++++++ .../pg_stat_statements/expected/select.out | 29 ++++++++++ .../pg_stat_statements/pg_stat_statements.c | 10 +--- contrib/pg_stat_statements/sql/extended.sql | 16 +++++ contrib/pg_stat_statements/sql/select.sql | 8 +++ 5 files changed, 114 insertions(+), 7 deletions(-) diff --git a/contrib/pg_stat_statements/expected/extended.out b/contrib/pg_stat_statements/expected/extended.out index 04a05943372b9..7da308ba84f4f 100644 --- a/contrib/pg_stat_statements/expected/extended.out +++ b/contrib/pg_stat_statements/expected/extended.out @@ -68,3 +68,61 @@ SELECT calls, rows, query FROM pg_stat_statements ORDER BY query COLLATE "C"; 1 | 1 | SELECT pg_stat_statements_reset() IS NOT NULL AS t (4 rows) +-- Various parameter numbering patterns +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +-- Unique query IDs with parameter numbers switched. +SELECT WHERE ($1::int, 7) IN ((8, $2::int), ($3::int, 9)) \bind '1' '2' '3' \g +-- +(0 rows) + +SELECT WHERE ($2::int, 10) IN ((11, $3::int), ($1::int, 12)) \bind '1' '2' '3' \g +-- +(0 rows) + +SELECT WHERE $1::int IN ($2::int, $3::int) \bind '1' '2' '3' \g +-- +(0 rows) + +SELECT WHERE $2::int IN ($3::int, $1::int) \bind '1' '2' '3' \g +-- +(0 rows) + +SELECT WHERE $3::int IN ($1::int, $2::int) \bind '1' '2' '3' \g +-- +(0 rows) + +-- Two groups of two queries with the same query ID. +SELECT WHERE '1'::int IN ($1::int, '2'::int) \bind '1' \g +-- +(1 row) + +SELECT WHERE '4'::int IN ($1::int, '5'::int) \bind '2' \g +-- +(0 rows) + +SELECT WHERE $2::int IN ($1::int, '1'::int) \bind '1' '2' \g +-- +(0 rows) + +SELECT WHERE $2::int IN ($1::int, '2'::int) \bind '3' '4' \g +-- +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +--------------------------------------------------------------+------- + SELECT WHERE $1::int IN ($2::int, $3::int) | 1 + SELECT WHERE $2::int IN ($1::int, $3::int) | 2 + SELECT WHERE $2::int IN ($1::int, $3::int) | 2 + SELECT WHERE $2::int IN ($3::int, $1::int) | 1 + SELECT WHERE $3::int IN ($1::int, $2::int) | 1 + SELECT WHERE ($1::int, $4) IN (($5, $2::int), ($3::int, $6)) | 1 + SELECT WHERE ($2::int, $4) IN (($5, $3::int), ($1::int, $6)) | 1 + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 +(8 rows) + diff --git a/contrib/pg_stat_statements/expected/select.out b/contrib/pg_stat_statements/expected/select.out index 09476a7b699e9..038ae1103645e 100644 --- a/contrib/pg_stat_statements/expected/select.out +++ b/contrib/pg_stat_statements/expected/select.out @@ -238,6 +238,35 @@ SELECT pg_stat_statements_reset() IS NOT NULL AS t; t (1 row) +-- normalization of constants and parameters, with constant locations +-- recorded one or more times. +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +SELECT WHERE '1' IN ('1'::int, '3'::int::text); +-- +(1 row) + +SELECT WHERE (1, 2) IN ((1, 2), (2, 3)); +-- +(1 row) + +SELECT WHERE (3, 4) IN ((5, 6), (8, 7)); +-- +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT WHERE $1 IN ($2::int, $3::int::text) | 1 + SELECT WHERE ($1, $2) IN (($3, $4), ($5, $6)) | 2 + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0 +(4 rows) + -- -- queries with locking clauses -- diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c index d8fdf42df7935..c58f34e9f3046 100644 --- a/contrib/pg_stat_statements/pg_stat_statements.c +++ b/contrib/pg_stat_statements/pg_stat_statements.c @@ -2818,9 +2818,7 @@ generate_normalized_query(JumbleState *jstate, const char *query, last_off = 0, /* Offset from start for previous tok */ last_tok_len = 0; /* Length (in bytes) of that tok */ bool in_squashed = false; /* in a run of squashed consts? */ - int skipped_constants = 0; /* Position adjustment of later - * constants after squashed ones */ - + int num_constants_replaced = 0; /* * Get constants' lengths (core system only gives us locations). Note @@ -2878,7 +2876,7 @@ generate_normalized_query(JumbleState *jstate, const char *query, /* ... and then a param symbol replacing the constant itself */ n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d", - i + 1 + jstate->highest_extern_param_id - skipped_constants); + num_constants_replaced++ + 1 + jstate->highest_extern_param_id); /* In case previous constants were merged away, stop doing that */ in_squashed = false; @@ -2902,12 +2900,10 @@ generate_normalized_query(JumbleState *jstate, const char *query, /* ... and then start a run of squashed constants */ n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d /*, ... */", - i + 1 + jstate->highest_extern_param_id - skipped_constants); + num_constants_replaced++ + 1 + jstate->highest_extern_param_id); /* The next location will match the block below, to end the run */ in_squashed = true; - - skipped_constants++; } else { diff --git a/contrib/pg_stat_statements/sql/extended.sql b/contrib/pg_stat_statements/sql/extended.sql index 1af0711020c41..a366658a53a72 100644 --- a/contrib/pg_stat_statements/sql/extended.sql +++ b/contrib/pg_stat_statements/sql/extended.sql @@ -19,3 +19,19 @@ SELECT $1 \bind 'unnamed_val1' \g \bind_named stmt1 'stmt1_val1' \g SELECT calls, rows, query FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- Various parameter numbering patterns +SELECT pg_stat_statements_reset() IS NOT NULL AS t; +-- Unique query IDs with parameter numbers switched. +SELECT WHERE ($1::int, 7) IN ((8, $2::int), ($3::int, 9)) \bind '1' '2' '3' \g +SELECT WHERE ($2::int, 10) IN ((11, $3::int), ($1::int, 12)) \bind '1' '2' '3' \g +SELECT WHERE $1::int IN ($2::int, $3::int) \bind '1' '2' '3' \g +SELECT WHERE $2::int IN ($3::int, $1::int) \bind '1' '2' '3' \g +SELECT WHERE $3::int IN ($1::int, $2::int) \bind '1' '2' '3' \g +-- Two groups of two queries with the same query ID. +SELECT WHERE '1'::int IN ($1::int, '2'::int) \bind '1' \g +SELECT WHERE '4'::int IN ($1::int, '5'::int) \bind '2' \g +SELECT WHERE $2::int IN ($1::int, '1'::int) \bind '1' '2' \g +SELECT WHERE $2::int IN ($1::int, '2'::int) \bind '3' '4' \g + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; diff --git a/contrib/pg_stat_statements/sql/select.sql b/contrib/pg_stat_statements/sql/select.sql index c5e0b84ee5bf5..189d405512fcd 100644 --- a/contrib/pg_stat_statements/sql/select.sql +++ b/contrib/pg_stat_statements/sql/select.sql @@ -79,6 +79,14 @@ DEALLOCATE pgss_test; SELECT calls, rows, query FROM pg_stat_statements ORDER BY query COLLATE "C"; SELECT pg_stat_statements_reset() IS NOT NULL AS t; +-- normalization of constants and parameters, with constant locations +-- recorded one or more times. +SELECT pg_stat_statements_reset() IS NOT NULL AS t; +SELECT WHERE '1' IN ('1'::int, '3'::int::text); +SELECT WHERE (1, 2) IN ((1, 2), (2, 3)); +SELECT WHERE (3, 4) IN ((5, 6), (8, 7)); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + -- -- queries with locking clauses -- From a1de1b0833b8e940f33fb2c7ab011e841a4d9063 Mon Sep 17 00:00:00 2001 From: Bruce Momjian Date: Wed, 28 May 2025 22:43:13 -0400 Subject: [PATCH 018/602] doc PG 18 relnotes: split apart log_connections item Also add details to asynchronous I/O item. Reported-by: Melanie Plageman Discussion: https://postgr.es/m/CAAKRu_YsVvyantS0X0Y_-vp_97=yGaoYJMXXyCEkR7pumAH3Jg@mail.gmail.com --- doc/src/sgml/release-18.sgml | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/doc/src/sgml/release-18.sgml b/doc/src/sgml/release-18.sgml index 83ff2225396b6..718f7b20bf120 100644 --- a/doc/src/sgml/release-18.sgml +++ b/doc/src/sgml/release-18.sgml @@ -569,6 +569,7 @@ Add an asynchronous I/O subsystem (Andres Freund, Thomas Munro, Nazir Bilal Yavu +This feature allows backends to queue multiple read requests, which allows for more efficient sequential scans, bitmap heap scans, and vacuums. This is enabled by server variable io_method, with server variables io_combine_limit and io_max_combine_limit added to control it. This also enables effective_io_concurrency and maintenance_io_concurrency values greater than zero for systems without fadvise() support. The new system view pg_aios shows the file handles being used for asynchronous I/O. @@ -682,15 +683,12 @@ This more accurately reflects modern hardware. Increase the logging granularity of server variable log_connections (Melanie Plageman) § -§ @@ -698,6 +696,18 @@ This server variable was previously only boolean; these options are still suppo + + + + +Add log_connections option to report the duration of connection stages (Melanie Plageman) +§ + + + + + + +initdb defaults to enabling data checksums +§ + + + +The previous default behavior (checksums disabled) can be obtained using the +new option --no-data-checksums. Note that pg_upgrade will reject upgrading +between clusters with different checksum settings, so if the old cluster does +not have checksums enabled (the previous default), then the new cluster will +need to be initialized with --no-data-checksums in order to allow pg_upgrade +to succeed. + From 30c15987d9b7ecc93674cf8ade0ad53ac265ec3b Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Wed, 4 Jun 2025 15:27:44 +0200 Subject: [PATCH 045/602] doc: Update description of pg_constraint.convalidated The previous description listed the constraint types that this column was used for, but that was outdated, since not-valid not-null constraints are now possible. So just remove that qualification, rather than trying to keep it updated. Author: jian he Reviewed-by: Robert Treat Discussion: https://www.postgresql.org/message-id/flat/CACJufxFo4yTwzbSZrP%2BzQiR6_M00skoZMFaUnNJCdY6he%3DuQfA%40mail.gmail.com --- doc/src/sgml/catalogs.sgml | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index cbd4e40a320b3..018f6e3e08bae 100644 --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -2629,7 +2629,6 @@ SCRAM-SHA-256$<iteration count>:&l Has the constraint been validated? - Currently, can be false only for foreign keys and CHECK constraints From f9b1192190aac56a319a1e3e93cf3c29d9f722dd Mon Sep 17 00:00:00 2001 From: Nathan Bossart Date: Wed, 4 Jun 2025 09:47:25 -0500 Subject: [PATCH 046/602] doc: Remove notes about "unencrypted" passwords. The documentation for the pg_authid system catalog and the pg_shadow system view indicates that passwords might be stored in cleartext, but that hasn't been possible for some time. Oversight in commit eb61136dc7. Reviewed-by: Michael Paquier Discussion: https://postgr.es/m/aD2yKkZro4nbl5ol%40nathan Backpatch-through: 13 --- doc/src/sgml/catalogs.sgml | 7 +------ doc/src/sgml/system-views.sgml | 2 +- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index 018f6e3e08bae..fa86c569dc497 100644 --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -1582,7 +1582,7 @@ rolpassword text - Password (possibly encrypted); null if none. The format depends + Encrypted password; null if none. The format depends on the form of encryption used. @@ -1627,11 +1627,6 @@ SCRAM-SHA-256$<iteration count>:&l ServerKey are in Base64 encoded format. This format is the same as that specified by RFC 5803. - - - A password that does not follow either of those formats is assumed to be - unencrypted. - diff --git a/doc/src/sgml/system-views.sgml b/doc/src/sgml/system-views.sgml index b58c52ea50f5a..986ae1f543dbd 100644 --- a/doc/src/sgml/system-views.sgml +++ b/doc/src/sgml/system-views.sgml @@ -3932,7 +3932,7 @@ SELECT * FROM pg_locks pl LEFT JOIN pg_prepared_xacts ppx passwd text - Password (possibly encrypted); null if none. See + Encrypted password; null if none. See pg_authid for details of how encrypted passwords are stored. From b87163e5f3847730ee5f59718d215c6e63e13bff Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Thu, 5 Jun 2025 09:39:24 +0900 Subject: [PATCH 047/602] Fix copy-pasto with process count calculation in method_io_uring.c This commit replaces the formula used for "TotalProcs" with a call to pgaio_uring_procs() in pgaio_uring_shmem_init() for the shared memory initialization, which is exactly the same, removing a duplication. pgaio_uring_procs() is used for shared memory sizing and a sanity check, and it has some documentation explaining some reasoning behind the formula. Author: Japin Li Discussion: https://postgr.es/m/ME0P300MB044521067A1EDDA9EDEC3793B66DA@ME0P300MB0445.AUSP300.PROD.OUTLOOK.COM --- src/backend/storage/aio/method_io_uring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/backend/storage/aio/method_io_uring.c b/src/backend/storage/aio/method_io_uring.c index c719ba2727a81..cc312b641ca6f 100644 --- a/src/backend/storage/aio/method_io_uring.c +++ b/src/backend/storage/aio/method_io_uring.c @@ -126,7 +126,7 @@ pgaio_uring_shmem_size(void) static void pgaio_uring_shmem_init(bool first_time) { - int TotalProcs = MaxBackends + NUM_AUXILIARY_PROCS - MAX_IO_WORKERS; + int TotalProcs = pgaio_uring_procs(); bool found; pgaio_uring_contexts = (PgAioUringContext *) From 112e40b867b3351ecc49b86a4fa07ec182b82e65 Mon Sep 17 00:00:00 2001 From: Magnus Hagander Date: Thu, 5 Jun 2025 09:54:16 +0200 Subject: [PATCH 048/602] psql: fix order of join clauses when listing extensions Commit d696406a9b2 added a new join to the query for extensions, but did so in the wrong place, causing the AND clause to be applied to the wrong join. Author: Suraj Kharage Reviewed-By: Dilip Kumar Discussion: https://postgr.es/m/CAF1DzPVBrN-cmPB2zb7ZU=2J4vEF2fNdArGCG9w+9fnKq4v8tg@mail.gmail.com --- src/bin/psql/describe.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/bin/psql/describe.c b/src/bin/psql/describe.c index 1d08268393e3c..24e0100c9f0a8 100644 --- a/src/bin/psql/describe.c +++ b/src/bin/psql/describe.c @@ -6188,8 +6188,8 @@ listExtensions(const char *pattern) "FROM pg_catalog.pg_extension e " "LEFT JOIN pg_catalog.pg_namespace n ON n.oid = e.extnamespace " "LEFT JOIN pg_catalog.pg_description d ON d.objoid = e.oid " - "LEFT JOIN pg_catalog.pg_available_extensions() ae(name, default_version, comment) ON ae.name = e.extname " - "AND d.classoid = 'pg_catalog.pg_extension'::pg_catalog.regclass\n", + "AND d.classoid = 'pg_catalog.pg_extension'::pg_catalog.regclass " + "LEFT JOIN pg_catalog.pg_available_extensions() ae(name, default_version, comment) ON ae.name = e.extname\n", gettext_noop("Name"), gettext_noop("Version"), gettext_noop("Default version"), From 4b05ebf0957bc796b9ea8f4d2ab5985714ef3252 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Thu, 5 Jun 2025 11:05:53 -0400 Subject: [PATCH 049/602] Change role names used in trigger test. The choices made in commit 01463e1cc might pose copyright hazards, and are more cutesy than informative anyway. Reported-by: Noah Misch Author: Tom Lane Discussion: https://postgr.es/m/20250415155850.9b.nmisch@google.com --- src/test/regress/expected/triggers.out | 26 +++++++++++++------------- src/test/regress/sql/triggers.sql | 18 +++++++++--------- 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/src/test/regress/expected/triggers.out b/src/test/regress/expected/triggers.out index f245d7f154924..2bf0e77d61ecb 100644 --- a/src/test/regress/expected/triggers.out +++ b/src/test/regress/expected/triggers.out @@ -3535,8 +3535,8 @@ drop table parent, child; drop function f(); -- Test who runs deferred trigger functions -- setup -create role regress_groot; -create role regress_outis; +create role regress_caller; +create role regress_fn_owner; create function whoami() returns trigger language plpgsql as $$ begin @@ -3544,7 +3544,7 @@ begin return null; end; $$; -alter function whoami() owner to regress_outis; +alter function whoami() owner to regress_fn_owner; create table defer_trig (id integer); grant insert on defer_trig to public; create constraint trigger whoami after insert on defer_trig @@ -3553,23 +3553,23 @@ create constraint trigger whoami after insert on defer_trig execute function whoami(); -- deferred triggers must run as the user that queued the trigger begin; -set role regress_groot; +set role regress_caller; insert into defer_trig values (1); reset role; -set role regress_outis; +set role regress_fn_owner; insert into defer_trig values (2); reset role; commit; -NOTICE: I am regress_groot -NOTICE: I am regress_outis +NOTICE: I am regress_caller +NOTICE: I am regress_fn_owner -- security definer functions override the user who queued the trigger alter function whoami() security definer; begin; -set role regress_groot; +set role regress_caller; insert into defer_trig values (3); reset role; commit; -NOTICE: I am regress_outis +NOTICE: I am regress_fn_owner alter function whoami() security invoker; -- make sure the current user is restored after error create or replace function whoami() returns trigger language plpgsql @@ -3581,11 +3581,11 @@ begin end; $$; begin; -set role regress_groot; +set role regress_caller; insert into defer_trig values (4); reset role; commit; -- error expected -NOTICE: I am regress_groot +NOTICE: I am regress_caller ERROR: division by zero CONTEXT: SQL statement "SELECT 1 / 0" PL/pgSQL function whoami() line 4 at PERFORM @@ -3598,5 +3598,5 @@ select current_user = session_user; -- clean up drop table defer_trig; drop function whoami(); -drop role regress_outis; -drop role regress_groot; +drop role regress_fn_owner; +drop role regress_caller; diff --git a/src/test/regress/sql/triggers.sql b/src/test/regress/sql/triggers.sql index d3d242dd29b74..9ffd318385ff6 100644 --- a/src/test/regress/sql/triggers.sql +++ b/src/test/regress/sql/triggers.sql @@ -2701,8 +2701,8 @@ drop function f(); -- Test who runs deferred trigger functions -- setup -create role regress_groot; -create role regress_outis; +create role regress_caller; +create role regress_fn_owner; create function whoami() returns trigger language plpgsql as $$ begin @@ -2710,7 +2710,7 @@ begin return null; end; $$; -alter function whoami() owner to regress_outis; +alter function whoami() owner to regress_fn_owner; create table defer_trig (id integer); grant insert on defer_trig to public; @@ -2721,10 +2721,10 @@ create constraint trigger whoami after insert on defer_trig -- deferred triggers must run as the user that queued the trigger begin; -set role regress_groot; +set role regress_caller; insert into defer_trig values (1); reset role; -set role regress_outis; +set role regress_fn_owner; insert into defer_trig values (2); reset role; commit; @@ -2732,7 +2732,7 @@ commit; -- security definer functions override the user who queued the trigger alter function whoami() security definer; begin; -set role regress_groot; +set role regress_caller; insert into defer_trig values (3); reset role; commit; @@ -2749,7 +2749,7 @@ end; $$; begin; -set role regress_groot; +set role regress_caller; insert into defer_trig values (4); reset role; commit; -- error expected @@ -2758,5 +2758,5 @@ select current_user = session_user; -- clean up drop table defer_trig; drop function whoami(); -drop role regress_outis; -drop role regress_groot; +drop role regress_fn_owner; +drop role regress_caller; From cc733ed164c5b57fdf34d16e4cc8e9bbdc171699 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Herrera?= Date: Thu, 5 Jun 2025 17:17:13 +0200 Subject: [PATCH 050/602] Avoid bogus scans of partitions when validating FKs to partitioned tables Validating an unvalidated foreign key that references a partitioned table would try to queue validations for each individual partition of the referenced table, but this is wrong: each individual partition would not necessarily have all the referenced rows, so errors would be raised. Avoid doing that. The pg_constraint rows that cause this to happen are only there to support the action triggers that implement the DELETE/ UPDATE actions of the FK, so no validating scan is necessary. This was an oversight in commit b663b9436e75. An equivalent oversight exists for NOT ENFORCED constraints, which is not fixed in this commit. Author: Amul Sul Reported-by: Antonin Houska Reviewed-by: jian he Reviewed-by: Tender Wang Discussion: https://postgr.es/m/26983.1748418675@localhost --- src/backend/commands/tablecmds.c | 51 ++++++++++++----- src/test/regress/expected/foreign_key.out | 68 ++++++++++++++++++----- src/test/regress/sql/foreign_key.sql | 29 ++++++++-- 3 files changed, 114 insertions(+), 34 deletions(-) diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index acf11e83c04e3..164ec56a5929b 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -430,8 +430,8 @@ static void AlterConstrUpdateConstraintEntry(ATAlterConstraint *cmdcon, Relation static ObjectAddress ATExecValidateConstraint(List **wqueue, Relation rel, char *constrName, bool recurse, bool recursing, LOCKMODE lockmode); -static void QueueFKConstraintValidation(List **wqueue, Relation conrel, Relation rel, - HeapTuple contuple, LOCKMODE lockmode); +static void QueueFKConstraintValidation(List **wqueue, Relation conrel, Relation fkrel, + Oid pkrelid, HeapTuple contuple, LOCKMODE lockmode); static void QueueCheckConstraintValidation(List **wqueue, Relation conrel, Relation rel, char *constrName, HeapTuple contuple, bool recurse, bool recursing, LOCKMODE lockmode); @@ -11858,6 +11858,7 @@ AttachPartitionForeignKey(List **wqueue, if (queueValidation) { Relation conrel; + Oid confrelid; conrel = table_open(ConstraintRelationId, RowExclusiveLock); @@ -11865,9 +11866,11 @@ AttachPartitionForeignKey(List **wqueue, if (!HeapTupleIsValid(partcontup)) elog(ERROR, "cache lookup failed for constraint %u", partConstrOid); + confrelid = ((Form_pg_constraint) GETSTRUCT(partcontup))->confrelid; + /* Use the same lock as for AT_ValidateConstraint */ - QueueFKConstraintValidation(wqueue, conrel, partition, partcontup, - ShareUpdateExclusiveLock); + QueueFKConstraintValidation(wqueue, conrel, partition, confrelid, + partcontup, ShareUpdateExclusiveLock); ReleaseSysCache(partcontup); table_close(conrel, RowExclusiveLock); } @@ -12919,7 +12922,8 @@ ATExecValidateConstraint(List **wqueue, Relation rel, char *constrName, { if (con->contype == CONSTRAINT_FOREIGN) { - QueueFKConstraintValidation(wqueue, conrel, rel, tuple, lockmode); + QueueFKConstraintValidation(wqueue, conrel, rel, con->confrelid, + tuple, lockmode); } else if (con->contype == CONSTRAINT_CHECK) { @@ -12952,8 +12956,8 @@ ATExecValidateConstraint(List **wqueue, Relation rel, char *constrName, * for the specified relation and all its children. */ static void -QueueFKConstraintValidation(List **wqueue, Relation conrel, Relation rel, - HeapTuple contuple, LOCKMODE lockmode) +QueueFKConstraintValidation(List **wqueue, Relation conrel, Relation fkrel, + Oid pkrelid, HeapTuple contuple, LOCKMODE lockmode) { Form_pg_constraint con; AlteredTableInfo *tab; @@ -12964,7 +12968,17 @@ QueueFKConstraintValidation(List **wqueue, Relation conrel, Relation rel, Assert(con->contype == CONSTRAINT_FOREIGN); Assert(!con->convalidated); - if (rel->rd_rel->relkind == RELKIND_RELATION) + /* + * Add the validation to phase 3's queue; not needed for partitioned + * tables themselves, only for their partitions. + * + * When the referenced table (pkrelid) is partitioned, the referencing + * table (fkrel) has one pg_constraint row pointing to each partition + * thereof. These rows are there only to support action triggers and no + * table scan is needed, therefore skip this for them as well. + */ + if (fkrel->rd_rel->relkind == RELKIND_RELATION && + con->confrelid == pkrelid) { NewConstraint *newcon; Constraint *fkconstraint; @@ -12983,15 +12997,16 @@ QueueFKConstraintValidation(List **wqueue, Relation conrel, Relation rel, newcon->qual = (Node *) fkconstraint; /* Find or create work queue entry for this table */ - tab = ATGetQueueEntry(wqueue, rel); + tab = ATGetQueueEntry(wqueue, fkrel); tab->constraints = lappend(tab->constraints, newcon); } /* * If the table at either end of the constraint is partitioned, we need to - * recurse and handle every constraint that is a child of this constraint. + * recurse and handle every unvalidate constraint that is a child of this + * constraint. */ - if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE || + if (fkrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE || get_rel_relkind(con->confrelid) == RELKIND_PARTITIONED_TABLE) { ScanKeyData pkey; @@ -13023,8 +13038,12 @@ QueueFKConstraintValidation(List **wqueue, Relation conrel, Relation rel, childrel = table_open(childcon->conrelid, lockmode); - QueueFKConstraintValidation(wqueue, conrel, childrel, childtup, - lockmode); + /* + * NB: Note that pkrelid should be passed as-is during recursion, + * as it is required to identify the root referenced table. + */ + QueueFKConstraintValidation(wqueue, conrel, childrel, pkrelid, + childtup, lockmode); table_close(childrel, NoLock); } @@ -13032,7 +13051,11 @@ QueueFKConstraintValidation(List **wqueue, Relation conrel, Relation rel, } /* - * Now update the catalog, while we have the door open. + * Now mark the pg_constraint row as validated (even if we didn't check, + * notably the ones for partitions on the referenced side). + * + * We rely on transaction abort to roll back this change if phase 3 + * ultimately finds violating rows. This is a bit ugly. */ copyTuple = heap_copytuple(contuple); copy_con = (Form_pg_constraint) GETSTRUCT(copyTuple); diff --git a/src/test/regress/expected/foreign_key.out b/src/test/regress/expected/foreign_key.out index 4f3f280a439bc..cfb47b8cafc8b 100644 --- a/src/test/regress/expected/foreign_key.out +++ b/src/test/regress/expected/foreign_key.out @@ -1895,29 +1895,67 @@ WHERE conrelid::regclass::text like 'fk_partitioned_fk%' ORDER BY oid::regclass: (5 rows) DROP TABLE fk_partitioned_fk, fk_notpartitioned_pk; --- NOT VALID foreign key on a non-partitioned table referencing a partitioned table +-- NOT VALID foreign key on a non-partitioned table referencing a partitioned +-- table CREATE TABLE fk_partitioned_pk (a int, b int, PRIMARY KEY (a, b)) PARTITION BY RANGE (a, b); CREATE TABLE fk_partitioned_pk_1 PARTITION OF fk_partitioned_pk FOR VALUES FROM (0,0) TO (1000,1000); +CREATE TABLE fk_partitioned_pk_2 PARTITION OF fk_partitioned_pk FOR VALUES FROM (1000,1000) TO (2000,2000); CREATE TABLE fk_notpartitioned_fk (b int, a int); -ALTER TABLE fk_notpartitioned_fk ADD FOREIGN KEY (a, b) REFERENCES fk_partitioned_pk NOT VALID; --- Constraint will be invalid. -SELECT conname, convalidated FROM pg_constraint +INSERT INTO fk_partitioned_pk VALUES(100,100), (1000,1000); +INSERT INTO fk_notpartitioned_fk VALUES(100,100), (1000,1000); +ALTER TABLE fk_notpartitioned_fk ADD CONSTRAINT fk_notpartitioned_fk_a_b_fkey + FOREIGN KEY (a, b) REFERENCES fk_partitioned_pk NOT VALID; +-- All constraints will be invalid. +SELECT conname, conenforced, convalidated FROM pg_constraint WHERE conrelid = 'fk_notpartitioned_fk'::regclass ORDER BY oid::regclass::text; - conname | convalidated ----------------------------------+-------------- - fk_notpartitioned_fk_a_b_fkey | f - fk_notpartitioned_fk_a_b_fkey_1 | f -(2 rows) + conname | conenforced | convalidated +---------------------------------+-------------+-------------- + fk_notpartitioned_fk_a_b_fkey | t | f + fk_notpartitioned_fk_a_b_fkey_1 | t | f + fk_notpartitioned_fk_a_b_fkey_2 | t | f +(3 rows) ALTER TABLE fk_notpartitioned_fk VALIDATE CONSTRAINT fk_notpartitioned_fk_a_b_fkey; -- All constraints are now valid. -SELECT conname, convalidated FROM pg_constraint +SELECT conname, conenforced, convalidated FROM pg_constraint WHERE conrelid = 'fk_notpartitioned_fk'::regclass ORDER BY oid::regclass::text; - conname | convalidated ----------------------------------+-------------- - fk_notpartitioned_fk_a_b_fkey | t - fk_notpartitioned_fk_a_b_fkey_1 | t -(2 rows) + conname | conenforced | convalidated +---------------------------------+-------------+-------------- + fk_notpartitioned_fk_a_b_fkey | t | t + fk_notpartitioned_fk_a_b_fkey_1 | t | t + fk_notpartitioned_fk_a_b_fkey_2 | t | t +(3 rows) + +-- test a self-referential FK +ALTER TABLE fk_partitioned_pk ADD CONSTRAINT selffk FOREIGN KEY (a, b) REFERENCES fk_partitioned_pk NOT VALID; +CREATE TABLE fk_partitioned_pk_3 PARTITION OF fk_partitioned_pk FOR VALUES FROM (2000,2000) TO (3000,3000) + PARTITION BY RANGE (a); +CREATE TABLE fk_partitioned_pk_3_1 PARTITION OF fk_partitioned_pk_3 FOR VALUES FROM (2000) TO (2100); +SELECT conname, conenforced, convalidated FROM pg_constraint +WHERE conrelid = 'fk_partitioned_pk'::regclass AND contype = 'f' +ORDER BY oid::regclass::text; + conname | conenforced | convalidated +------------+-------------+-------------- + selffk | t | f + selffk_1 | t | f + selffk_2 | t | f + selffk_3 | t | f + selffk_3_1 | t | f +(5 rows) + +ALTER TABLE fk_partitioned_pk_2 VALIDATE CONSTRAINT selffk; +ALTER TABLE fk_partitioned_pk VALIDATE CONSTRAINT selffk; +SELECT conname, conenforced, convalidated FROM pg_constraint +WHERE conrelid = 'fk_partitioned_pk'::regclass AND contype = 'f' +ORDER BY oid::regclass::text; + conname | conenforced | convalidated +------------+-------------+-------------- + selffk | t | t + selffk_1 | t | t + selffk_2 | t | t + selffk_3 | t | t + selffk_3_1 | t | t +(5 rows) DROP TABLE fk_notpartitioned_fk, fk_partitioned_pk; -- Test some other exotic foreign key features: MATCH SIMPLE, ON UPDATE/DELETE diff --git a/src/test/regress/sql/foreign_key.sql b/src/test/regress/sql/foreign_key.sql index 8159e36302242..6126868e368ad 100644 --- a/src/test/regress/sql/foreign_key.sql +++ b/src/test/regress/sql/foreign_key.sql @@ -1389,22 +1389,41 @@ WHERE conrelid::regclass::text like 'fk_partitioned_fk%' ORDER BY oid::regclass: DROP TABLE fk_partitioned_fk, fk_notpartitioned_pk; --- NOT VALID foreign key on a non-partitioned table referencing a partitioned table +-- NOT VALID foreign key on a non-partitioned table referencing a partitioned +-- table CREATE TABLE fk_partitioned_pk (a int, b int, PRIMARY KEY (a, b)) PARTITION BY RANGE (a, b); CREATE TABLE fk_partitioned_pk_1 PARTITION OF fk_partitioned_pk FOR VALUES FROM (0,0) TO (1000,1000); +CREATE TABLE fk_partitioned_pk_2 PARTITION OF fk_partitioned_pk FOR VALUES FROM (1000,1000) TO (2000,2000); CREATE TABLE fk_notpartitioned_fk (b int, a int); -ALTER TABLE fk_notpartitioned_fk ADD FOREIGN KEY (a, b) REFERENCES fk_partitioned_pk NOT VALID; +INSERT INTO fk_partitioned_pk VALUES(100,100), (1000,1000); +INSERT INTO fk_notpartitioned_fk VALUES(100,100), (1000,1000); +ALTER TABLE fk_notpartitioned_fk ADD CONSTRAINT fk_notpartitioned_fk_a_b_fkey + FOREIGN KEY (a, b) REFERENCES fk_partitioned_pk NOT VALID; --- Constraint will be invalid. -SELECT conname, convalidated FROM pg_constraint +-- All constraints will be invalid. +SELECT conname, conenforced, convalidated FROM pg_constraint WHERE conrelid = 'fk_notpartitioned_fk'::regclass ORDER BY oid::regclass::text; ALTER TABLE fk_notpartitioned_fk VALIDATE CONSTRAINT fk_notpartitioned_fk_a_b_fkey; -- All constraints are now valid. -SELECT conname, convalidated FROM pg_constraint +SELECT conname, conenforced, convalidated FROM pg_constraint WHERE conrelid = 'fk_notpartitioned_fk'::regclass ORDER BY oid::regclass::text; +-- test a self-referential FK +ALTER TABLE fk_partitioned_pk ADD CONSTRAINT selffk FOREIGN KEY (a, b) REFERENCES fk_partitioned_pk NOT VALID; +CREATE TABLE fk_partitioned_pk_3 PARTITION OF fk_partitioned_pk FOR VALUES FROM (2000,2000) TO (3000,3000) + PARTITION BY RANGE (a); +CREATE TABLE fk_partitioned_pk_3_1 PARTITION OF fk_partitioned_pk_3 FOR VALUES FROM (2000) TO (2100); +SELECT conname, conenforced, convalidated FROM pg_constraint +WHERE conrelid = 'fk_partitioned_pk'::regclass AND contype = 'f' +ORDER BY oid::regclass::text; +ALTER TABLE fk_partitioned_pk_2 VALIDATE CONSTRAINT selffk; +ALTER TABLE fk_partitioned_pk VALIDATE CONSTRAINT selffk; +SELECT conname, conenforced, convalidated FROM pg_constraint +WHERE conrelid = 'fk_partitioned_pk'::regclass AND contype = 'f' +ORDER BY oid::regclass::text; + DROP TABLE fk_notpartitioned_fk, fk_partitioned_pk; -- Test some other exotic foreign key features: MATCH SIMPLE, ON UPDATE/DELETE From 04acad82b0f912e779795a4661f7d63f5e35b9da Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Thu, 5 Jun 2025 11:29:24 -0400 Subject: [PATCH 051/602] Doc: you must own the target object to use SECURITY LABEL. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For some reason this wasn't mentioned before. Author: Patrick Stählin Reviewed-by: Tom Lane Discussion: https://postgr.es/m/931e012a-57ba-41ba-9b88-24323a46dec5@packi.ch Backpatch-through: 13 --- doc/src/sgml/ref/security_label.sgml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/doc/src/sgml/ref/security_label.sgml b/doc/src/sgml/ref/security_label.sgml index e5e5fb483e94e..aa45c0af2487b 100644 --- a/doc/src/sgml/ref/security_label.sgml +++ b/doc/src/sgml/ref/security_label.sgml @@ -84,6 +84,10 @@ SECURITY LABEL [ FOR provider ] ON based on object labels, rather than traditional discretionary access control (DAC) concepts such as users and groups. + + + You must own the database object to use SECURITY LABEL. + From e6f98d8848f1803fda32011998c786a1bf4eb87c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Herrera?= Date: Thu, 5 Jun 2025 18:39:06 +0200 Subject: [PATCH 052/602] Avoid bogus scans of partitions when marking FKs enforced Similar to commit cc733ed164c5: when an unenforced foreign key that references a partitioned table is altered to be enforced, we scan the constrained table based on each partition on the referenced partitioned table. This is bogus and likely to cause the ALTER TABLE to fail: we must only scan the constrained table as pointing to the top-level partitioned table. Oversight in commit eec0040c4bcd. Fix by eliding those scans. Author: Amul Sul Reported-by: jian he Discussion: https://postgr.es/m/CACJufxF1e_gPOLtsDoaE4VCgQPC8KZW_kPAjPR5Rvv4Ew=fb2A@mail.gmail.com --- src/backend/commands/tablecmds.c | 7 ++-- src/test/regress/expected/foreign_key.out | 41 ++++++++++++++--------- src/test/regress/sql/foreign_key.sql | 11 +++--- 3 files changed, 37 insertions(+), 22 deletions(-) diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 164ec56a5929b..ea96947d81305 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -12466,9 +12466,12 @@ ATExecAlterConstrEnforceability(List **wqueue, ATAlterConstraint *cmdcon, /* * Tell Phase 3 to check that the constraint is satisfied by existing - * rows. + * rows. Only applies to leaf partitions, and (for constraints that + * reference a partitioned table) only if this is not one of the + * pg_constraint rows that exist solely to support action triggers. */ - if (rel->rd_rel->relkind == RELKIND_RELATION) + if (rel->rd_rel->relkind == RELKIND_RELATION && + currcon->confrelid == pkrelid) { AlteredTableInfo *tab; NewConstraint *newcon; diff --git a/src/test/regress/expected/foreign_key.out b/src/test/regress/expected/foreign_key.out index cfb47b8cafc8b..6a8f395934520 100644 --- a/src/test/regress/expected/foreign_key.out +++ b/src/test/regress/expected/foreign_key.out @@ -1895,8 +1895,8 @@ WHERE conrelid::regclass::text like 'fk_partitioned_fk%' ORDER BY oid::regclass: (5 rows) DROP TABLE fk_partitioned_fk, fk_notpartitioned_pk; --- NOT VALID foreign key on a non-partitioned table referencing a partitioned --- table +-- NOT VALID and NOT ENFORCED foreign key on a non-partitioned table +-- referencing a partitioned table CREATE TABLE fk_partitioned_pk (a int, b int, PRIMARY KEY (a, b)) PARTITION BY RANGE (a, b); CREATE TABLE fk_partitioned_pk_1 PARTITION OF fk_partitioned_pk FOR VALUES FROM (0,0) TO (1000,1000); CREATE TABLE fk_partitioned_pk_2 PARTITION OF fk_partitioned_pk FOR VALUES FROM (1000,1000) TO (2000,2000); @@ -1905,26 +1905,35 @@ INSERT INTO fk_partitioned_pk VALUES(100,100), (1000,1000); INSERT INTO fk_notpartitioned_fk VALUES(100,100), (1000,1000); ALTER TABLE fk_notpartitioned_fk ADD CONSTRAINT fk_notpartitioned_fk_a_b_fkey FOREIGN KEY (a, b) REFERENCES fk_partitioned_pk NOT VALID; --- All constraints will be invalid. +ALTER TABLE fk_notpartitioned_fk ADD CONSTRAINT fk_notpartitioned_fk_a_b_fkey2 + FOREIGN KEY (a, b) REFERENCES fk_partitioned_pk NOT ENFORCED; +-- All constraints will be invalid, and _fkey2 constraints will not be enforced. SELECT conname, conenforced, convalidated FROM pg_constraint WHERE conrelid = 'fk_notpartitioned_fk'::regclass ORDER BY oid::regclass::text; - conname | conenforced | convalidated ----------------------------------+-------------+-------------- - fk_notpartitioned_fk_a_b_fkey | t | f - fk_notpartitioned_fk_a_b_fkey_1 | t | f - fk_notpartitioned_fk_a_b_fkey_2 | t | f -(3 rows) + conname | conenforced | convalidated +----------------------------------+-------------+-------------- + fk_notpartitioned_fk_a_b_fkey | t | f + fk_notpartitioned_fk_a_b_fkey_1 | t | f + fk_notpartitioned_fk_a_b_fkey_2 | t | f + fk_notpartitioned_fk_a_b_fkey2 | f | f + fk_notpartitioned_fk_a_b_fkey2_1 | f | f + fk_notpartitioned_fk_a_b_fkey2_2 | f | f +(6 rows) ALTER TABLE fk_notpartitioned_fk VALIDATE CONSTRAINT fk_notpartitioned_fk_a_b_fkey; --- All constraints are now valid. +ALTER TABLE fk_notpartitioned_fk ALTER CONSTRAINT fk_notpartitioned_fk_a_b_fkey2 ENFORCED; +-- All constraints are now valid and enforced. SELECT conname, conenforced, convalidated FROM pg_constraint WHERE conrelid = 'fk_notpartitioned_fk'::regclass ORDER BY oid::regclass::text; - conname | conenforced | convalidated ----------------------------------+-------------+-------------- - fk_notpartitioned_fk_a_b_fkey | t | t - fk_notpartitioned_fk_a_b_fkey_1 | t | t - fk_notpartitioned_fk_a_b_fkey_2 | t | t -(3 rows) + conname | conenforced | convalidated +----------------------------------+-------------+-------------- + fk_notpartitioned_fk_a_b_fkey | t | t + fk_notpartitioned_fk_a_b_fkey_1 | t | t + fk_notpartitioned_fk_a_b_fkey_2 | t | t + fk_notpartitioned_fk_a_b_fkey2 | t | t + fk_notpartitioned_fk_a_b_fkey2_1 | t | t + fk_notpartitioned_fk_a_b_fkey2_2 | t | t +(6 rows) -- test a self-referential FK ALTER TABLE fk_partitioned_pk ADD CONSTRAINT selffk FOREIGN KEY (a, b) REFERENCES fk_partitioned_pk NOT VALID; diff --git a/src/test/regress/sql/foreign_key.sql b/src/test/regress/sql/foreign_key.sql index 6126868e368ad..cfcecb4e911ad 100644 --- a/src/test/regress/sql/foreign_key.sql +++ b/src/test/regress/sql/foreign_key.sql @@ -1389,8 +1389,8 @@ WHERE conrelid::regclass::text like 'fk_partitioned_fk%' ORDER BY oid::regclass: DROP TABLE fk_partitioned_fk, fk_notpartitioned_pk; --- NOT VALID foreign key on a non-partitioned table referencing a partitioned --- table +-- NOT VALID and NOT ENFORCED foreign key on a non-partitioned table +-- referencing a partitioned table CREATE TABLE fk_partitioned_pk (a int, b int, PRIMARY KEY (a, b)) PARTITION BY RANGE (a, b); CREATE TABLE fk_partitioned_pk_1 PARTITION OF fk_partitioned_pk FOR VALUES FROM (0,0) TO (1000,1000); CREATE TABLE fk_partitioned_pk_2 PARTITION OF fk_partitioned_pk FOR VALUES FROM (1000,1000) TO (2000,2000); @@ -1399,14 +1399,17 @@ INSERT INTO fk_partitioned_pk VALUES(100,100), (1000,1000); INSERT INTO fk_notpartitioned_fk VALUES(100,100), (1000,1000); ALTER TABLE fk_notpartitioned_fk ADD CONSTRAINT fk_notpartitioned_fk_a_b_fkey FOREIGN KEY (a, b) REFERENCES fk_partitioned_pk NOT VALID; +ALTER TABLE fk_notpartitioned_fk ADD CONSTRAINT fk_notpartitioned_fk_a_b_fkey2 + FOREIGN KEY (a, b) REFERENCES fk_partitioned_pk NOT ENFORCED; --- All constraints will be invalid. +-- All constraints will be invalid, and _fkey2 constraints will not be enforced. SELECT conname, conenforced, convalidated FROM pg_constraint WHERE conrelid = 'fk_notpartitioned_fk'::regclass ORDER BY oid::regclass::text; ALTER TABLE fk_notpartitioned_fk VALIDATE CONSTRAINT fk_notpartitioned_fk_a_b_fkey; +ALTER TABLE fk_notpartitioned_fk ALTER CONSTRAINT fk_notpartitioned_fk_a_b_fkey2 ENFORCED; --- All constraints are now valid. +-- All constraints are now valid and enforced. SELECT conname, conenforced, convalidated FROM pg_constraint WHERE conrelid = 'fk_notpartitioned_fk'::regclass ORDER BY oid::regclass::text; From 54c6ea8c81db718508eeea50991d3c1c5dff54a5 Mon Sep 17 00:00:00 2001 From: Peter Geoghegan Date: Thu, 5 Jun 2025 14:50:43 -0400 Subject: [PATCH 053/602] nbtree: Remove useless row compare arg. Use of a RowCompare key makes nbtree index scans ineligible to use pstate.forcenonrequired following recent bugfix commit 5f4d98d4. There's no longer any need for _bt_check_rowcompare to accept a forcenonrequired argument, so remove it. --- src/backend/access/nbtree/nbtutils.c | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/src/backend/access/nbtree/nbtutils.c b/src/backend/access/nbtree/nbtutils.c index 1a15dfcb7d357..e0aa83fc8897b 100644 --- a/src/backend/access/nbtree/nbtutils.c +++ b/src/backend/access/nbtree/nbtutils.c @@ -63,7 +63,7 @@ static bool _bt_check_compare(IndexScanDesc scan, ScanDirection dir, bool *continuescan, int *ikey); static bool _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts, TupleDesc tupdesc, - ScanDirection dir, bool forcenonrequired, bool *continuescan); + ScanDirection dir, bool *continuescan); static void _bt_checkkeys_look_ahead(IndexScanDesc scan, BTReadPageState *pstate, int tupnatts, TupleDesc tupdesc); static int _bt_keep_natts(Relation rel, IndexTuple lastleft, @@ -2902,8 +2902,10 @@ _bt_check_compare(IndexScanDesc scan, ScanDirection dir, /* row-comparison keys need special processing */ if (key->sk_flags & SK_ROW_HEADER) { + Assert(!forcenonrequired); /* forbidden by _bt_set_startikey */ + if (_bt_check_rowcompare(key, tuple, tupnatts, tupdesc, dir, - forcenonrequired, continuescan)) + continuescan)) continue; return false; } @@ -3060,8 +3062,7 @@ _bt_check_compare(IndexScanDesc scan, ScanDirection dir, */ static bool _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts, - TupleDesc tupdesc, ScanDirection dir, - bool forcenonrequired, bool *continuescan) + TupleDesc tupdesc, ScanDirection dir, bool *continuescan) { ScanKey subkey = (ScanKey) DatumGetPointer(skey->sk_argument); int32 cmpresult = 0; @@ -3101,11 +3102,7 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts, if (isNull) { - if (forcenonrequired) - { - /* treating scan's keys as non-required */ - } - else if (subkey->sk_flags & SK_BT_NULLS_FIRST) + if (subkey->sk_flags & SK_BT_NULLS_FIRST) { /* * Since NULLs are sorted before non-NULLs, we know we have @@ -3159,12 +3156,8 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts, */ Assert(subkey != (ScanKey) DatumGetPointer(skey->sk_argument)); subkey--; - if (forcenonrequired) - { - /* treating scan's keys as non-required */ - } - else if ((subkey->sk_flags & SK_BT_REQFWD) && - ScanDirectionIsForward(dir)) + if ((subkey->sk_flags & SK_BT_REQFWD) && + ScanDirectionIsForward(dir)) *continuescan = false; else if ((subkey->sk_flags & SK_BT_REQBKWD) && ScanDirectionIsBackward(dir)) @@ -3216,7 +3209,7 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts, break; } - if (!result && !forcenonrequired) + if (!result) { /* * Tuple fails this qual. If it's a required qual for the current From c37be39a74b25cc9c7b052ddae43061a6a2c6019 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Thu, 5 Jun 2025 15:24:15 -0400 Subject: [PATCH 054/602] Doc: improve description of which role runs a trigger. Refine wording from commit 01463e1cc. Author: Noah Misch Reviewed-by: Tom Lane Discussion: https://postgr.es/m/20250605163441.2f.nmisch@google.com --- doc/src/sgml/trigger.sgml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/doc/src/sgml/trigger.sgml b/doc/src/sgml/trigger.sgml index e9214dcf1b1bd..bb1b5faf34e31 100644 --- a/doc/src/sgml/trigger.sgml +++ b/doc/src/sgml/trigger.sgml @@ -129,10 +129,9 @@ In all cases, a trigger is executed as part of the same transaction as the statement that triggered it, so if either the statement or the trigger causes an error, the effects of both will be rolled back. - Also, the trigger will always run in the security context of the role - that executed the statement that caused the trigger to fire, unless - the trigger function is defined as SECURITY DEFINER, - in which case it will run as the function owner. + Also, the trigger will always run as the role that queued the trigger + event, unless the trigger function is marked as SECURITY + DEFINER, in which case it will run as the function owner. From 016e407f4ba10b230f5094c9ba36a1df3d34fb22 Mon Sep 17 00:00:00 2001 From: Robert Haas Date: Fri, 6 Jun 2025 08:18:27 -0400 Subject: [PATCH 055/602] pg_prewarm: Allow autoprewarm to use more than 1GB to dump blocks. Reported-by: Daria Shanina Author: Daria Shanina Author: Robert Haas Backpatch-through: 13 --- contrib/pg_prewarm/autoprewarm.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/contrib/pg_prewarm/autoprewarm.c b/contrib/pg_prewarm/autoprewarm.c index c52f4d4dc9ea2..c01b9c7e6a4d6 100644 --- a/contrib/pg_prewarm/autoprewarm.c +++ b/contrib/pg_prewarm/autoprewarm.c @@ -693,8 +693,15 @@ apw_dump_now(bool is_bgworker, bool dump_unlogged) return 0; } - block_info_array = - (BlockInfoRecord *) palloc(sizeof(BlockInfoRecord) * NBuffers); + /* + * With sufficiently large shared_buffers, allocation will exceed 1GB, so + * allow for a huge allocation to prevent outright failure. + * + * (In the future, it might be a good idea to redesign this to use a more + * memory-efficient data structure.) + */ + block_info_array = (BlockInfoRecord *) + palloc_extended((sizeof(BlockInfoRecord) * NBuffers), MCXT_ALLOC_HUGE); for (num_blocks = 0, i = 0; i < NBuffers; i++) { From e6eed40e44419e3268d01fe0d2daec08a7df68f7 Mon Sep 17 00:00:00 2001 From: Peter Geoghegan Date: Fri, 6 Jun 2025 10:19:44 -0400 Subject: [PATCH 056/602] Avoid BufferGetLSNAtomic() calls during nbtree scans. Delay calling BufferGetLSNAtomic() until we finish reading a page that actually contains items that btgettuple will return to the executor. This reduces the number of calls during plain index scans (we'll only call BufferGetLSNAtomic() when _bt_readpage returns true), and totally eliminates calls during index-only scans, bitmap index scans, and plain index scans of an unlogged relation. Currently, when checksums (or wal_log_hints) are enabled, acquiring a page's LSN in BufferGetLSNAtomic() involves locking the buffer header (which involves the use of spinlocks). Testing has shown that enabling page-level checksums causes large regressions with certain workloads, especially on larger multi-socket systems. The regression isn't tied to any Postgres 18 commit. However, Postgres 18 commit 04bec894 made initdb use checksums by default, so it seems prudent to address the problem now. Author: Peter Geoghegan Reviewed-By: Tomas Vondra Discussion: https://postgr.es/m/941f0190-e3c6-4622-9ac7-c04e936e5fdb@vondra.me Discussion: https://postgr.es/m/CAH2-Wzk-Dg5XWs_jDuiHt4_7ryrSY+n=vxmHY51EVqPDFsKXmg@mail.gmail.com --- src/backend/access/nbtree/nbtree.c | 30 ++++++++++++ src/backend/access/nbtree/nbtsearch.c | 70 ++++++++++++++------------- src/backend/access/nbtree/nbtutils.c | 68 ++++++++++++-------------- src/include/access/nbtree.h | 5 +- 4 files changed, 102 insertions(+), 71 deletions(-) diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index 765659887af73..03a1d7b027afc 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -228,6 +228,8 @@ btgettuple(IndexScanDesc scan, ScanDirection dir) BTScanOpaque so = (BTScanOpaque) scan->opaque; bool res; + Assert(scan->heapRelation != NULL); + /* btree indexes are never lossy */ scan->xs_recheck = false; @@ -289,6 +291,8 @@ btgetbitmap(IndexScanDesc scan, TIDBitmap *tbm) int64 ntids = 0; ItemPointer heapTid; + Assert(scan->heapRelation == NULL); + /* Each loop iteration performs another primitive index scan */ do { @@ -393,6 +397,32 @@ btrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys, BTScanPosInvalidate(so->currPos); } + /* + * We prefer to eagerly drop leaf page pins before btgettuple returns. + * This avoids making VACUUM wait to acquire a cleanup lock on the page. + * + * We cannot safely drop leaf page pins during index-only scans due to a + * race condition involving VACUUM setting pages all-visible in the VM. + * It's also unsafe for plain index scans that use a non-MVCC snapshot. + * + * When we drop pins eagerly, the mechanism that marks so->killedItems[] + * index tuples LP_DEAD has to deal with concurrent TID recycling races. + * The scheme used to detect unsafe TID recycling won't work when scanning + * unlogged relations (since it involves saving an affected page's LSN). + * Opt out of eager pin dropping during unlogged relation scans for now + * (this is preferable to opting out of kill_prior_tuple LP_DEAD setting). + * + * Also opt out of dropping leaf page pins eagerly during bitmap scans. + * Pins cannot be held for more than an instant during bitmap scans either + * way, so we might as well avoid wasting cycles on acquiring page LSNs. + * + * See nbtree/README section on making concurrent TID recycling safe. + */ + so->dropPin = (!scan->xs_want_itup && + IsMVCCSnapshot(scan->xs_snapshot) && + RelationNeedsWAL(scan->indexRelation) && + scan->heapRelation != NULL); + so->markItemIndex = -1; so->needPrimScan = false; so->scanBehind = false; diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c index fe9a3886913d8..070f14c8b91f0 100644 --- a/src/backend/access/nbtree/nbtsearch.c +++ b/src/backend/access/nbtree/nbtsearch.c @@ -25,7 +25,7 @@ #include "utils/rel.h" -static void _bt_drop_lock_and_maybe_pin(IndexScanDesc scan, BTScanPos sp); +static inline void _bt_drop_lock_and_maybe_pin(Relation rel, BTScanOpaque so); static Buffer _bt_moveright(Relation rel, Relation heaprel, BTScanInsert key, Buffer buf, bool forupdate, BTStack stack, int access); @@ -57,24 +57,29 @@ static bool _bt_endpoint(IndexScanDesc scan, ScanDirection dir); /* * _bt_drop_lock_and_maybe_pin() * - * Unlock the buffer; and if it is safe to release the pin, do that, too. - * This will prevent vacuum from stalling in a blocked state trying to read a - * page when a cursor is sitting on it. - * - * See nbtree/README section on making concurrent TID recycling safe. + * Unlock so->currPos.buf. If scan is so->dropPin, drop the pin, too. + * Dropping the pin prevents VACUUM from blocking on acquiring a cleanup lock. */ -static void -_bt_drop_lock_and_maybe_pin(IndexScanDesc scan, BTScanPos sp) +static inline void +_bt_drop_lock_and_maybe_pin(Relation rel, BTScanOpaque so) { - _bt_unlockbuf(scan->indexRelation, sp->buf); - - if (IsMVCCSnapshot(scan->xs_snapshot) && - RelationNeedsWAL(scan->indexRelation) && - !scan->xs_want_itup) + if (!so->dropPin) { - ReleaseBuffer(sp->buf); - sp->buf = InvalidBuffer; + /* Just drop the lock (not the pin) */ + _bt_unlockbuf(rel, so->currPos.buf); + return; } + + /* + * Drop both the lock and the pin. + * + * Have to set so->currPos.lsn so that _bt_killitems has a way to detect + * when concurrent heap TID recycling by VACUUM might have taken place. + */ + Assert(RelationNeedsWAL(rel)); + so->currPos.lsn = BufferGetLSNAtomic(so->currPos.buf); + _bt_relbuf(rel, so->currPos.buf); + so->currPos.buf = InvalidBuffer; } /* @@ -866,8 +871,8 @@ _bt_compare(Relation rel, * if backwards scan, the last item) in the tree that satisfies the * qualifications in the scan key. On success exit, data about the * matching tuple(s) on the page has been loaded into so->currPos. We'll - * drop all locks and hold onto a pin on page's buffer, except when - * _bt_drop_lock_and_maybe_pin dropped the pin to avoid blocking VACUUM. + * drop all locks and hold onto a pin on page's buffer, except during + * so->dropPin scans, when we drop both the lock and the pin. * _bt_returnitem sets the next item to return to scan on success exit. * * If there are no matching items in the index, we return false, with no @@ -1610,7 +1615,13 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum, so->currPos.currPage = BufferGetBlockNumber(so->currPos.buf); so->currPos.prevPage = opaque->btpo_prev; so->currPos.nextPage = opaque->btpo_next; + /* delay setting so->currPos.lsn until _bt_drop_lock_and_maybe_pin */ + so->currPos.dir = dir; + so->currPos.nextTupleOffset = 0; + /* either moreRight or moreLeft should be set now (may be unset later) */ + Assert(ScanDirectionIsForward(dir) ? so->currPos.moreRight : + so->currPos.moreLeft); Assert(!P_IGNORE(opaque)); Assert(BTScanPosIsPinned(so->currPos)); Assert(!so->needPrimScan); @@ -1626,14 +1637,6 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum, so->currPos.currPage); } - /* initialize remaining currPos fields related to current page */ - so->currPos.lsn = BufferGetLSNAtomic(so->currPos.buf); - so->currPos.dir = dir; - so->currPos.nextTupleOffset = 0; - /* either moreLeft or moreRight should be set now (may be unset later) */ - Assert(ScanDirectionIsForward(dir) ? so->currPos.moreRight : - so->currPos.moreLeft); - PredicateLockPage(rel, so->currPos.currPage, scan->xs_snapshot); /* initialize local variables */ @@ -2107,10 +2110,9 @@ _bt_returnitem(IndexScanDesc scan, BTScanOpaque so) * * Wrapper on _bt_readnextpage that performs final steps for the current page. * - * On entry, if so->currPos.buf is valid the buffer is pinned but not locked. - * If there's no pin held, it's because _bt_drop_lock_and_maybe_pin dropped - * the pin eagerly earlier on. The scan must have so->currPos.currPage set to - * a valid block, in any case. + * On entry, so->currPos must be valid. Its buffer will be pinned, though + * never locked. (Actually, when so->dropPin there won't even be a pin held, + * though so->currPos.currPage must still be set to a valid block number.) */ static bool _bt_steppage(IndexScanDesc scan, ScanDirection dir) @@ -2251,12 +2253,14 @@ _bt_readfirstpage(IndexScanDesc scan, OffsetNumber offnum, ScanDirection dir) */ if (_bt_readpage(scan, dir, offnum, true)) { + Relation rel = scan->indexRelation; + /* * _bt_readpage succeeded. Drop the lock (and maybe the pin) on * so->currPos.buf in preparation for btgettuple returning tuples. */ Assert(BTScanPosIsPinned(so->currPos)); - _bt_drop_lock_and_maybe_pin(scan, &so->currPos); + _bt_drop_lock_and_maybe_pin(rel, so); return true; } @@ -2294,8 +2298,8 @@ _bt_readfirstpage(IndexScanDesc scan, OffsetNumber offnum, ScanDirection dir) * * On success exit, so->currPos is updated to contain data from the next * interesting page, and we return true. We hold a pin on the buffer on - * success exit, except when _bt_drop_lock_and_maybe_pin decided it was safe - * to eagerly drop the pin (to avoid blocking VACUUM). + * success exit (except during so->dropPin index scans, when we drop the pin + * eagerly to avoid blocking VACUUM). * * If there are no more matching records in the given direction, we drop all * locks and pins, invalidate so->currPos, and return false. @@ -2413,7 +2417,7 @@ _bt_readnextpage(IndexScanDesc scan, BlockNumber blkno, */ Assert(so->currPos.currPage == blkno); Assert(BTScanPosIsPinned(so->currPos)); - _bt_drop_lock_and_maybe_pin(scan, &so->currPos); + _bt_drop_lock_and_maybe_pin(rel, so); return true; } diff --git a/src/backend/access/nbtree/nbtutils.c b/src/backend/access/nbtree/nbtutils.c index e0aa83fc8897b..29f0dca1b08aa 100644 --- a/src/backend/access/nbtree/nbtutils.c +++ b/src/backend/access/nbtree/nbtutils.c @@ -3335,75 +3335,71 @@ _bt_checkkeys_look_ahead(IndexScanDesc scan, BTReadPageState *pstate, * * Note that if we hold a pin on the target page continuously from initially * reading the items until applying this function, VACUUM cannot have deleted - * any items from the page, and so there is no need to search left from the - * recorded offset. (This observation also guarantees that the item is still - * the right one to delete, which might otherwise be questionable since heap - * TIDs can get recycled.) This holds true even if the page has been modified - * by inserts and page splits, so there is no need to consult the LSN. - * - * If the pin was released after reading the page, then we re-read it. If it - * has been modified since we read it (as determined by the LSN), we dare not - * flag any entries because it is possible that the old entry was vacuumed - * away and the TID was re-used by a completely different heap tuple. + * any items on the page, so the page's TIDs can't have been recycled by now. + * There's no risk that we'll confuse a new index tuple that happens to use a + * recycled TID with a now-removed tuple with the same TID (that used to be on + * this same page). We can't rely on that during scans that drop pins eagerly + * (so->dropPin scans), though, so we must condition setting LP_DEAD bits on + * the page LSN having not changed since back when _bt_readpage saw the page. */ void _bt_killitems(IndexScanDesc scan) { + Relation rel = scan->indexRelation; BTScanOpaque so = (BTScanOpaque) scan->opaque; Page page; BTPageOpaque opaque; OffsetNumber minoff; OffsetNumber maxoff; - int i; int numKilled = so->numKilled; bool killedsomething = false; - bool droppedpin PG_USED_FOR_ASSERTS_ONLY; + Assert(numKilled > 0); Assert(BTScanPosIsValid(so->currPos)); + Assert(scan->heapRelation != NULL); /* can't be a bitmap index scan */ - /* - * Always reset the scan state, so we don't look for same items on other - * pages. - */ + /* Always invalidate so->killedItems[] before leaving so->currPos */ so->numKilled = 0; - if (BTScanPosIsPinned(so->currPos)) + if (!so->dropPin) { /* * We have held the pin on this page since we read the index tuples, * so all we need to do is lock it. The pin will have prevented - * re-use of any TID on the page, so there is no need to check the - * LSN. + * concurrent VACUUMs from recycling any of the TIDs on the page. */ - droppedpin = false; - _bt_lockbuf(scan->indexRelation, so->currPos.buf, BT_READ); - - page = BufferGetPage(so->currPos.buf); + Assert(BTScanPosIsPinned(so->currPos)); + _bt_lockbuf(rel, so->currPos.buf, BT_READ); } else { Buffer buf; + XLogRecPtr latestlsn; - droppedpin = true; - /* Attempt to re-read the buffer, getting pin and lock. */ - buf = _bt_getbuf(scan->indexRelation, so->currPos.currPage, BT_READ); + Assert(!BTScanPosIsPinned(so->currPos)); + Assert(RelationNeedsWAL(rel)); + buf = _bt_getbuf(rel, so->currPos.currPage, BT_READ); - page = BufferGetPage(buf); - if (BufferGetLSNAtomic(buf) == so->currPos.lsn) - so->currPos.buf = buf; - else + latestlsn = BufferGetLSNAtomic(buf); + Assert(!XLogRecPtrIsInvalid(so->currPos.lsn)); + Assert(so->currPos.lsn <= latestlsn); + if (so->currPos.lsn != latestlsn) { - /* Modified while not pinned means hinting is not safe. */ - _bt_relbuf(scan->indexRelation, buf); + /* Modified, give up on hinting */ + _bt_relbuf(rel, buf); return; } + + /* Unmodified, hinting is safe */ + so->currPos.buf = buf; } + page = BufferGetPage(so->currPos.buf); opaque = BTPageGetOpaque(page); minoff = P_FIRSTDATAKEY(opaque); maxoff = PageGetMaxOffsetNumber(page); - for (i = 0; i < numKilled; i++) + for (int i = 0; i < numKilled; i++) { int itemIndex = so->killedItems[i]; BTScanPosItem *kitem = &so->currPos.items[itemIndex]; @@ -3435,7 +3431,7 @@ _bt_killitems(IndexScanDesc scan) * correctness. * * Note that the page may have been modified in almost any way - * since we first read it (in the !droppedpin case), so it's + * since we first read it (in the !so->dropPin case), so it's * possible that this posting list tuple wasn't a posting list * tuple when we first encountered its heap TIDs. */ @@ -3451,7 +3447,7 @@ _bt_killitems(IndexScanDesc scan) * though only in the common case where the page can't * have been concurrently modified */ - Assert(kitem->indexOffset == offnum || !droppedpin); + Assert(kitem->indexOffset == offnum || !so->dropPin); /* * Read-ahead to later kitems here. @@ -3518,7 +3514,7 @@ _bt_killitems(IndexScanDesc scan) MarkBufferDirtyHint(so->currPos.buf, true); } - _bt_unlockbuf(scan->indexRelation, so->currPos.buf); + _bt_unlockbuf(rel, so->currPos.buf); } diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h index ebca02588d3e2..e709d2e0afe94 100644 --- a/src/include/access/nbtree.h +++ b/src/include/access/nbtree.h @@ -939,7 +939,7 @@ typedef BTVacuumPostingData *BTVacuumPosting; * processing. This approach minimizes lock/unlock traffic. We must always * drop the lock to make it okay for caller to process the returned items. * Whether or not we can also release the pin during this window will vary. - * We drop the pin eagerly (when safe) to avoid blocking progress by VACUUM + * We drop the pin (when so->dropPin) to avoid blocking progress by VACUUM * (see nbtree/README section about making concurrent TID recycling safe). * We'll always release both the lock and the pin on the current page before * moving on to its sibling page. @@ -967,7 +967,7 @@ typedef struct BTScanPosData BlockNumber currPage; /* page referenced by items array */ BlockNumber prevPage; /* currPage's left link */ BlockNumber nextPage; /* currPage's right link */ - XLogRecPtr lsn; /* currPage's LSN */ + XLogRecPtr lsn; /* currPage's LSN (when so->dropPin) */ /* scan direction for the saved position's call to _bt_readpage */ ScanDirection dir; @@ -1070,6 +1070,7 @@ typedef struct BTScanOpaqueData /* info about killed items if any (killedItems is NULL if never used) */ int *killedItems; /* currPos.items indexes of killed items */ int numKilled; /* number of currently stored items */ + bool dropPin; /* drop leaf pin before btgettuple returns? */ /* * If we are doing an index-only scan, these are the tuple storage From 304862973e9a77c31cbf05d10b8b71c1a2870e59 Mon Sep 17 00:00:00 2001 From: Nathan Bossart Date: Fri, 6 Jun 2025 11:40:52 -0500 Subject: [PATCH 057/602] Fixed signed/unsigned mismatch in test_dsm_registry. Oversight in commit 8b2bcf3f28. Reviewed-by: Masahiko Sawada Discussion: https://postgr.es/m/aECi_gSD9JnVWQ8T%40nathan Backpatch-through: 17 --- src/test/modules/test_dsm_registry/test_dsm_registry.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/test/modules/test_dsm_registry/test_dsm_registry.c b/src/test/modules/test_dsm_registry/test_dsm_registry.c index 462a80f8790d9..96a890be22826 100644 --- a/src/test/modules/test_dsm_registry/test_dsm_registry.c +++ b/src/test/modules/test_dsm_registry/test_dsm_registry.c @@ -54,7 +54,7 @@ set_val_in_shmem(PG_FUNCTION_ARGS) tdr_attach_shmem(); LWLockAcquire(&tdr_state->lck, LW_EXCLUSIVE); - tdr_state->val = PG_GETARG_UINT32(0); + tdr_state->val = PG_GETARG_INT32(0); LWLockRelease(&tdr_state->lck); PG_RETURN_VOID(); @@ -72,5 +72,5 @@ get_val_in_shmem(PG_FUNCTION_ARGS) ret = tdr_state->val; LWLockRelease(&tdr_state->lck); - PG_RETURN_UINT32(ret); + PG_RETURN_INT32(ret); } From a31767fc0935bf8dce884ed35dbe644516d0115e Mon Sep 17 00:00:00 2001 From: Nathan Bossart Date: Fri, 6 Jun 2025 12:08:17 -0500 Subject: [PATCH 058/602] Use NULL instead of 0 for pointer arguments. Commit 5fe08c006c fixed this for calls to dshash_create(). This commit fixes calls to dshash_attach() and dsa_create_in_place(). Reviewed-by: Masahiko Sawada Reviewed-by: Michael Paquier Discussion: https://postgr.es/m/aECi_gSD9JnVWQ8T%40nathan --- src/backend/replication/logical/launcher.c | 2 +- src/backend/utils/activity/pgstat_shmem.c | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/backend/replication/logical/launcher.c b/src/backend/replication/logical/launcher.c index 10677da56b2b6..1c3c051403dd6 100644 --- a/src/backend/replication/logical/launcher.c +++ b/src/backend/replication/logical/launcher.c @@ -1016,7 +1016,7 @@ logicalrep_launcher_attach_dshmem(void) last_start_times_dsa = dsa_attach(LogicalRepCtx->last_start_dsa); dsa_pin_mapping(last_start_times_dsa); last_start_times = dshash_attach(last_start_times_dsa, &dsh_params, - LogicalRepCtx->last_start_dsh, 0); + LogicalRepCtx->last_start_dsh, NULL); } MemoryContextSwitchTo(oldcontext); diff --git a/src/backend/utils/activity/pgstat_shmem.c b/src/backend/utils/activity/pgstat_shmem.c index 2e33293b00097..53e7d534270ac 100644 --- a/src/backend/utils/activity/pgstat_shmem.c +++ b/src/backend/utils/activity/pgstat_shmem.c @@ -183,7 +183,7 @@ StatsShmemInit(void) p += MAXALIGN(pgstat_dsa_init_size()); dsa = dsa_create_in_place(ctl->raw_dsa_area, pgstat_dsa_init_size(), - LWTRANCHE_PGSTATS_DSA, 0); + LWTRANCHE_PGSTATS_DSA, NULL); dsa_pin(dsa); /* @@ -255,7 +255,8 @@ pgstat_attach_shmem(void) dsa_pin_mapping(pgStatLocal.dsa); pgStatLocal.shared_hash = dshash_attach(pgStatLocal.dsa, &dsh_params, - pgStatLocal.shmem->hash_handle, 0); + pgStatLocal.shmem->hash_handle, + NULL); MemoryContextSwitchTo(oldcontext); } From 5b40feab594c3019fd6b09e46f97f5b367050cf9 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Fri, 6 Jun 2025 15:28:51 -0700 Subject: [PATCH 059/602] Improve CREATE DATABASE error message for invalid libc locale. Discussion: https://postgr.es/m/73959a14-267b-49c1-8293-291b175682cb@manitou-mail.org Reviewed-by: Daniel Verite --- src/backend/commands/dbcommands.c | 41 +++++++++++++++++++++++++------ 1 file changed, 33 insertions(+), 8 deletions(-) diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c index 5fbbcdaabb1d2..c95eb94501671 100644 --- a/src/backend/commands/dbcommands.c +++ b/src/backend/commands/dbcommands.c @@ -1065,16 +1065,41 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) /* Check that the chosen locales are valid, and get canonical spellings */ if (!check_locale(LC_COLLATE, dbcollate, &canonname)) - ereport(ERROR, - (errcode(ERRCODE_WRONG_OBJECT_TYPE), - errmsg("invalid LC_COLLATE locale name: \"%s\"", dbcollate), - errhint("If the locale name is specific to ICU, use ICU_LOCALE."))); + { + if (dblocprovider == COLLPROVIDER_BUILTIN) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("invalid LC_COLLATE locale name: \"%s\"", dbcollate), + errhint("If the locale name is specific to the builtin provider, use BUILTIN_LOCALE."))); + else if (dblocprovider == COLLPROVIDER_ICU) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("invalid LC_COLLATE locale name: \"%s\"", dbcollate), + errhint("If the locale name is specific to the ICU provider, use ICU_LOCALE."))); + else + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("invalid LC_COLLATE locale name: \"%s\"", dbcollate))); + } dbcollate = canonname; if (!check_locale(LC_CTYPE, dbctype, &canonname)) - ereport(ERROR, - (errcode(ERRCODE_WRONG_OBJECT_TYPE), - errmsg("invalid LC_CTYPE locale name: \"%s\"", dbctype), - errhint("If the locale name is specific to ICU, use ICU_LOCALE."))); + { + if (dblocprovider == COLLPROVIDER_BUILTIN) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("invalid LC_CTYPE locale name: \"%s\"", dbctype), + errhint("If the locale name is specific to the builtin provider, use BUILTIN_LOCALE."))); + else if (dblocprovider == COLLPROVIDER_ICU) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("invalid LC_CTYPE locale name: \"%s\"", dbctype), + errhint("If the locale name is specific to the ICU provider, use ICU_LOCALE."))); + else + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("invalid LC_CTYPE locale name: \"%s\"", dbctype))); + } + dbctype = canonname; check_encoding_locale_matches(encoding, dbcollate, dbctype); From 1a857348e47dc4f1bbf104e746abe542c99004e8 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Sat, 7 Jun 2025 09:03:11 +0200 Subject: [PATCH 060/602] plpython: Remove obsolete test expected file Move plpython_error_5.out to plpython_error.out, since the pre-3.5 version is no longer needed, since we raised the Python requirement to 3.6 (commit 45363fca637). Reviewed-by: Tom Lane Reviewed-by: Jacob Champion Discussion: https://www.postgresql.org/message-id/d620e7c6-becc-4a8e-9b43-eea0da55faf2@eisentraut.org --- src/pl/plpython/expected/README | 3 - src/pl/plpython/expected/plpython_error.out | 2 +- src/pl/plpython/expected/plpython_error_5.out | 460 ------------------ 3 files changed, 1 insertion(+), 464 deletions(-) delete mode 100644 src/pl/plpython/expected/README delete mode 100644 src/pl/plpython/expected/plpython_error_5.out diff --git a/src/pl/plpython/expected/README b/src/pl/plpython/expected/README deleted file mode 100644 index 388c553a5890a..0000000000000 --- a/src/pl/plpython/expected/README +++ /dev/null @@ -1,3 +0,0 @@ -Guide to alternative expected files: - -plpython_error_5.out Python 3.5 and newer diff --git a/src/pl/plpython/expected/plpython_error.out b/src/pl/plpython/expected/plpython_error.out index 68722b00097ed..fd9cd73be743a 100644 --- a/src/pl/plpython/expected/plpython_error.out +++ b/src/pl/plpython/expected/plpython_error.out @@ -243,7 +243,7 @@ $$ plpy.nonexistent $$ LANGUAGE plpython3u; SELECT toplevel_attribute_error(); -ERROR: AttributeError: 'module' object has no attribute 'nonexistent' +ERROR: AttributeError: module 'plpy' has no attribute 'nonexistent' CONTEXT: Traceback (most recent call last): PL/Python function "toplevel_attribute_error", line 2, in plpy.nonexistent diff --git a/src/pl/plpython/expected/plpython_error_5.out b/src/pl/plpython/expected/plpython_error_5.out deleted file mode 100644 index fd9cd73be743a..0000000000000 --- a/src/pl/plpython/expected/plpython_error_5.out +++ /dev/null @@ -1,460 +0,0 @@ --- test error handling, i forgot to restore Warn_restart in --- the trigger handler once. the errors and subsequent core dump were --- interesting. -/* Flat out Python syntax error - */ -CREATE FUNCTION python_syntax_error() RETURNS text - AS -'.syntaxerror' - LANGUAGE plpython3u; -ERROR: could not compile PL/Python function "python_syntax_error" -DETAIL: SyntaxError: invalid syntax (, line 2) -/* With check_function_bodies = false the function should get defined - * and the error reported when called - */ -SET check_function_bodies = false; -CREATE FUNCTION python_syntax_error() RETURNS text - AS -'.syntaxerror' - LANGUAGE plpython3u; -SELECT python_syntax_error(); -ERROR: could not compile PL/Python function "python_syntax_error" -DETAIL: SyntaxError: invalid syntax (, line 2) -/* Run the function twice to check if the hashtable entry gets cleaned up */ -SELECT python_syntax_error(); -ERROR: could not compile PL/Python function "python_syntax_error" -DETAIL: SyntaxError: invalid syntax (, line 2) -RESET check_function_bodies; -/* Flat out syntax error - */ -CREATE FUNCTION sql_syntax_error() RETURNS text - AS -'plpy.execute("syntax error")' - LANGUAGE plpython3u; -SELECT sql_syntax_error(); -ERROR: spiexceptions.SyntaxError: syntax error at or near "syntax" -LINE 1: syntax error - ^ -QUERY: syntax error -CONTEXT: Traceback (most recent call last): - PL/Python function "sql_syntax_error", line 1, in - plpy.execute("syntax error") -PL/Python function "sql_syntax_error" -/* check the handling of uncaught python exceptions - */ -CREATE FUNCTION exception_index_invalid(text) RETURNS text - AS -'return args[1]' - LANGUAGE plpython3u; -SELECT exception_index_invalid('test'); -ERROR: IndexError: list index out of range -CONTEXT: Traceback (most recent call last): - PL/Python function "exception_index_invalid", line 1, in - return args[1] -PL/Python function "exception_index_invalid" -/* check handling of nested exceptions - */ -CREATE FUNCTION exception_index_invalid_nested() RETURNS text - AS -'rv = plpy.execute("SELECT test5(''foo'')") -return rv[0]' - LANGUAGE plpython3u; -SELECT exception_index_invalid_nested(); -ERROR: spiexceptions.UndefinedFunction: function test5(unknown) does not exist -LINE 1: SELECT test5('foo') - ^ -HINT: No function matches the given name and argument types. You might need to add explicit type casts. -QUERY: SELECT test5('foo') -CONTEXT: Traceback (most recent call last): - PL/Python function "exception_index_invalid_nested", line 1, in - rv = plpy.execute("SELECT test5('foo')") -PL/Python function "exception_index_invalid_nested" -/* a typo - */ -CREATE FUNCTION invalid_type_uncaught(a text) RETURNS text - AS -'if "plan" not in SD: - q = "SELECT fname FROM users WHERE lname = $1" - SD["plan"] = plpy.prepare(q, [ "test" ]) -rv = plpy.execute(SD["plan"], [ a ]) -if len(rv): - return rv[0]["fname"] -return None -' - LANGUAGE plpython3u; -SELECT invalid_type_uncaught('rick'); -ERROR: spiexceptions.UndefinedObject: type "test" does not exist -CONTEXT: Traceback (most recent call last): - PL/Python function "invalid_type_uncaught", line 3, in - SD["plan"] = plpy.prepare(q, [ "test" ]) -PL/Python function "invalid_type_uncaught" -/* for what it's worth catch the exception generated by - * the typo, and return None - */ -CREATE FUNCTION invalid_type_caught(a text) RETURNS text - AS -'if "plan" not in SD: - q = "SELECT fname FROM users WHERE lname = $1" - try: - SD["plan"] = plpy.prepare(q, [ "test" ]) - except plpy.SPIError as ex: - plpy.notice(str(ex)) - return None -rv = plpy.execute(SD["plan"], [ a ]) -if len(rv): - return rv[0]["fname"] -return None -' - LANGUAGE plpython3u; -SELECT invalid_type_caught('rick'); -NOTICE: type "test" does not exist - invalid_type_caught ---------------------- - -(1 row) - -/* for what it's worth catch the exception generated by - * the typo, and reraise it as a plain error - */ -CREATE FUNCTION invalid_type_reraised(a text) RETURNS text - AS -'if "plan" not in SD: - q = "SELECT fname FROM users WHERE lname = $1" - try: - SD["plan"] = plpy.prepare(q, [ "test" ]) - except plpy.SPIError as ex: - plpy.error(str(ex)) -rv = plpy.execute(SD["plan"], [ a ]) -if len(rv): - return rv[0]["fname"] -return None -' - LANGUAGE plpython3u; -SELECT invalid_type_reraised('rick'); -ERROR: plpy.Error: type "test" does not exist -CONTEXT: Traceback (most recent call last): - PL/Python function "invalid_type_reraised", line 6, in - plpy.error(str(ex)) -PL/Python function "invalid_type_reraised" -/* no typo no messing about - */ -CREATE FUNCTION valid_type(a text) RETURNS text - AS -'if "plan" not in SD: - SD["plan"] = plpy.prepare("SELECT fname FROM users WHERE lname = $1", [ "text" ]) -rv = plpy.execute(SD["plan"], [ a ]) -if len(rv): - return rv[0]["fname"] -return None -' - LANGUAGE plpython3u; -SELECT valid_type('rick'); - valid_type ------------- - -(1 row) - -/* error in nested functions to get a traceback -*/ -CREATE FUNCTION nested_error() RETURNS text - AS -'def fun1(): - plpy.error("boom") - -def fun2(): - fun1() - -def fun3(): - fun2() - -fun3() -return "not reached" -' - LANGUAGE plpython3u; -SELECT nested_error(); -ERROR: plpy.Error: boom -CONTEXT: Traceback (most recent call last): - PL/Python function "nested_error", line 10, in - fun3() - PL/Python function "nested_error", line 8, in fun3 - fun2() - PL/Python function "nested_error", line 5, in fun2 - fun1() - PL/Python function "nested_error", line 2, in fun1 - plpy.error("boom") -PL/Python function "nested_error" -/* raising plpy.Error is just like calling plpy.error -*/ -CREATE FUNCTION nested_error_raise() RETURNS text - AS -'def fun1(): - raise plpy.Error("boom") - -def fun2(): - fun1() - -def fun3(): - fun2() - -fun3() -return "not reached" -' - LANGUAGE plpython3u; -SELECT nested_error_raise(); -ERROR: plpy.Error: boom -CONTEXT: Traceback (most recent call last): - PL/Python function "nested_error_raise", line 10, in - fun3() - PL/Python function "nested_error_raise", line 8, in fun3 - fun2() - PL/Python function "nested_error_raise", line 5, in fun2 - fun1() - PL/Python function "nested_error_raise", line 2, in fun1 - raise plpy.Error("boom") -PL/Python function "nested_error_raise" -/* using plpy.warning should not produce a traceback -*/ -CREATE FUNCTION nested_warning() RETURNS text - AS -'def fun1(): - plpy.warning("boom") - -def fun2(): - fun1() - -def fun3(): - fun2() - -fun3() -return "you''ve been warned" -' - LANGUAGE plpython3u; -SELECT nested_warning(); -WARNING: boom - nested_warning --------------------- - you've been warned -(1 row) - -/* AttributeError at toplevel used to give segfaults with the traceback -*/ -CREATE FUNCTION toplevel_attribute_error() RETURNS void AS -$$ -plpy.nonexistent -$$ LANGUAGE plpython3u; -SELECT toplevel_attribute_error(); -ERROR: AttributeError: module 'plpy' has no attribute 'nonexistent' -CONTEXT: Traceback (most recent call last): - PL/Python function "toplevel_attribute_error", line 2, in - plpy.nonexistent -PL/Python function "toplevel_attribute_error" -/* Calling PL/Python functions from SQL and vice versa should not lose context. - */ -CREATE OR REPLACE FUNCTION python_traceback() RETURNS void AS $$ -def first(): - second() - -def second(): - third() - -def third(): - plpy.execute("select sql_error()") - -first() -$$ LANGUAGE plpython3u; -CREATE OR REPLACE FUNCTION sql_error() RETURNS void AS $$ -begin - select 1/0; -end -$$ LANGUAGE plpgsql; -CREATE OR REPLACE FUNCTION python_from_sql_error() RETURNS void AS $$ -begin - select python_traceback(); -end -$$ LANGUAGE plpgsql; -CREATE OR REPLACE FUNCTION sql_from_python_error() RETURNS void AS $$ -plpy.execute("select sql_error()") -$$ LANGUAGE plpython3u; -SELECT python_traceback(); -ERROR: spiexceptions.DivisionByZero: division by zero -CONTEXT: Traceback (most recent call last): - PL/Python function "python_traceback", line 11, in - first() - PL/Python function "python_traceback", line 3, in first - second() - PL/Python function "python_traceback", line 6, in second - third() - PL/Python function "python_traceback", line 9, in third - plpy.execute("select sql_error()") -PL/Python function "python_traceback" -SELECT sql_error(); -ERROR: division by zero -CONTEXT: SQL statement "select 1/0" -PL/pgSQL function sql_error() line 3 at SQL statement -SELECT python_from_sql_error(); -ERROR: spiexceptions.DivisionByZero: division by zero -CONTEXT: Traceback (most recent call last): - PL/Python function "python_traceback", line 11, in - first() - PL/Python function "python_traceback", line 3, in first - second() - PL/Python function "python_traceback", line 6, in second - third() - PL/Python function "python_traceback", line 9, in third - plpy.execute("select sql_error()") -PL/Python function "python_traceback" -SQL statement "select python_traceback()" -PL/pgSQL function python_from_sql_error() line 3 at SQL statement -SELECT sql_from_python_error(); -ERROR: spiexceptions.DivisionByZero: division by zero -CONTEXT: Traceback (most recent call last): - PL/Python function "sql_from_python_error", line 2, in - plpy.execute("select sql_error()") -PL/Python function "sql_from_python_error" -/* check catching specific types of exceptions - */ -CREATE TABLE specific ( - i integer PRIMARY KEY -); -CREATE FUNCTION specific_exception(i integer) RETURNS void AS -$$ -from plpy import spiexceptions -try: - plpy.execute("insert into specific values (%s)" % (i or "NULL")); -except spiexceptions.NotNullViolation as e: - plpy.notice("Violated the NOT NULL constraint, sqlstate %s" % e.sqlstate) -except spiexceptions.UniqueViolation as e: - plpy.notice("Violated the UNIQUE constraint, sqlstate %s" % e.sqlstate) -$$ LANGUAGE plpython3u; -SELECT specific_exception(2); - specific_exception --------------------- - -(1 row) - -SELECT specific_exception(NULL); -NOTICE: Violated the NOT NULL constraint, sqlstate 23502 - specific_exception --------------------- - -(1 row) - -SELECT specific_exception(2); -NOTICE: Violated the UNIQUE constraint, sqlstate 23505 - specific_exception --------------------- - -(1 row) - -/* SPI errors in PL/Python functions should preserve the SQLSTATE value - */ -CREATE FUNCTION python_unique_violation() RETURNS void AS $$ -plpy.execute("insert into specific values (1)") -plpy.execute("insert into specific values (1)") -$$ LANGUAGE plpython3u; -CREATE FUNCTION catch_python_unique_violation() RETURNS text AS $$ -begin - begin - perform python_unique_violation(); - exception when unique_violation then - return 'ok'; - end; - return 'not reached'; -end; -$$ language plpgsql; -SELECT catch_python_unique_violation(); - catch_python_unique_violation -------------------------------- - ok -(1 row) - -/* manually starting subtransactions - a bad idea - */ -CREATE FUNCTION manual_subxact() RETURNS void AS $$ -plpy.execute("savepoint save") -plpy.execute("create table foo(x integer)") -plpy.execute("rollback to save") -$$ LANGUAGE plpython3u; -SELECT manual_subxact(); -ERROR: plpy.SPIError: SPI_execute failed: SPI_ERROR_TRANSACTION -CONTEXT: Traceback (most recent call last): - PL/Python function "manual_subxact", line 2, in - plpy.execute("savepoint save") -PL/Python function "manual_subxact" -/* same for prepared plans - */ -CREATE FUNCTION manual_subxact_prepared() RETURNS void AS $$ -save = plpy.prepare("savepoint save") -rollback = plpy.prepare("rollback to save") -plpy.execute(save) -plpy.execute("create table foo(x integer)") -plpy.execute(rollback) -$$ LANGUAGE plpython3u; -SELECT manual_subxact_prepared(); -ERROR: plpy.SPIError: SPI_execute_plan failed: SPI_ERROR_TRANSACTION -CONTEXT: Traceback (most recent call last): - PL/Python function "manual_subxact_prepared", line 4, in - plpy.execute(save) -PL/Python function "manual_subxact_prepared" -/* raising plpy.spiexception.* from python code should preserve sqlstate - */ -CREATE FUNCTION plpy_raise_spiexception() RETURNS void AS $$ -raise plpy.spiexceptions.DivisionByZero() -$$ LANGUAGE plpython3u; -DO $$ -BEGIN - SELECT plpy_raise_spiexception(); -EXCEPTION WHEN division_by_zero THEN - -- NOOP -END -$$ LANGUAGE plpgsql; -/* setting a custom sqlstate should be handled - */ -CREATE FUNCTION plpy_raise_spiexception_override() RETURNS void AS $$ -exc = plpy.spiexceptions.DivisionByZero() -exc.sqlstate = 'SILLY' -raise exc -$$ LANGUAGE plpython3u; -DO $$ -BEGIN - SELECT plpy_raise_spiexception_override(); -EXCEPTION WHEN SQLSTATE 'SILLY' THEN - -- NOOP -END -$$ LANGUAGE plpgsql; -/* test the context stack trace for nested execution levels - */ -CREATE FUNCTION notice_innerfunc() RETURNS int AS $$ -plpy.execute("DO LANGUAGE plpython3u $x$ plpy.notice('inside DO') $x$") -return 1 -$$ LANGUAGE plpython3u; -CREATE FUNCTION notice_outerfunc() RETURNS int AS $$ -plpy.execute("SELECT notice_innerfunc()") -return 1 -$$ LANGUAGE plpython3u; -\set SHOW_CONTEXT always -SELECT notice_outerfunc(); -NOTICE: inside DO -CONTEXT: PL/Python anonymous code block -SQL statement "DO LANGUAGE plpython3u $x$ plpy.notice('inside DO') $x$" -PL/Python function "notice_innerfunc" -SQL statement "SELECT notice_innerfunc()" -PL/Python function "notice_outerfunc" - notice_outerfunc ------------------- - 1 -(1 row) - -/* test error logged with an underlying exception that includes a detail - * string (bug #18070). - */ -CREATE FUNCTION python_error_detail() RETURNS SETOF text AS $$ - plan = plpy.prepare("SELECT to_date('xy', 'DD') d") - for row in plpy.cursor(plan): - yield row['d'] -$$ LANGUAGE plpython3u; -SELECT python_error_detail(); -ERROR: error fetching next item from iterator -DETAIL: spiexceptions.InvalidDatetimeFormat: invalid value "xy" for "DD" -CONTEXT: Traceback (most recent call last): -PL/Python function "python_error_detail" From 37e5f0b61fa8aad82a0b06438ed95c282ace994a Mon Sep 17 00:00:00 2001 From: Bruce Momjian Date: Sat, 7 Jun 2025 11:06:25 -0400 Subject: [PATCH 061/602] doc PG 18 relnotes: adjust wording of initdb item 48814415d5a And move to the top of the incompatibility list. This will impact users more than any other incompatibility item because of pg_upgrade. --- doc/src/sgml/release-18.sgml | 39 +++++++++++++++++------------------- 1 file changed, 18 insertions(+), 21 deletions(-) diff --git a/doc/src/sgml/release-18.sgml b/doc/src/sgml/release-18.sgml index 19e770c65b5dc..268fa88cbe0e7 100644 --- a/doc/src/sgml/release-18.sgml +++ b/doc/src/sgml/release-18.sgml @@ -53,6 +53,24 @@ + + + + +Change initdb default to enable data checksums +§ + + + +Checksums can be disabled with the new initdb option --no-data-checksums. +pg_upgrade requires matching cluster checksum settings, so this new +option can be useful to upgrade non-checksum old clusters. + + + - - - -initdb defaults to enabling data checksums -§ - - - -The previous default behavior (checksums disabled) can be obtained using the -new option --no-data-checksums. Note that pg_upgrade will reject upgrading -between clusters with different checksum settings, so if the old cluster does -not have checksums enabled (the previous default), then the new cluster will -need to be initialized with --no-data-checksums in order to allow pg_upgrade -to succeed. - From 73e26cbeb5927053eea4e209e5eda34a30c353f1 Mon Sep 17 00:00:00 2001 From: Bruce Momjian Date: Sat, 7 Jun 2025 11:25:17 -0400 Subject: [PATCH 062/602] doc PG 18 relnotes: add AFTER trigger user change item Reported-by: Noah Misch Discussion: https://postgr.es/m/20250603172123.5f.nmisch@google.com --- doc/src/sgml/release-18.sgml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/doc/src/sgml/release-18.sgml b/doc/src/sgml/release-18.sgml index 268fa88cbe0e7..c5e60f88fdf15 100644 --- a/doc/src/sgml/release-18.sgml +++ b/doc/src/sgml/release-18.sgml @@ -157,6 +157,22 @@ Previously ALTER TABLE SET [UN]LOGGED did nothing, and the creation of an unlogg + + + + +Execute AFTER triggers as the role that was active when trigger events were queued (Laurenz Albe) + + + +Previously such triggers were run as the role that was active at trigger execution time (e.g., at COMMIT). This is significant for cases where the role is changed between queue time and +transaction commit. + + + @@ -1980,7 +1981,7 @@ Author: Tom Lane -Add function array_sort() which sorts an array's first dimension (Junwang Zhao, Jian He) +Add function array_sort() which sorts an array's first dimension (Junwang Zhao, Jian He) § @@ -1992,7 +1993,7 @@ Author: Michael Paquier -Add function array_reverse() which reverses an array's first dimension (Aleksander Alekseev) +Add function array_reverse() which reverses an array's first dimension (Aleksander Alekseev) § @@ -2004,7 +2005,7 @@ Author: Nathan Bossart -Add function reverse() to reverse bytea bytes (Aleksander Alekseev) +Add function reverse() to reverse bytea bytes (Aleksander Alekseev) § @@ -2016,12 +2017,12 @@ Author: Dean Rasheed -Allow casting between integer types and bytea (Aleksander Alekseev) +Allow casting between integer types and bytea (Aleksander Alekseev) § -The integer values are stored as bytea two's complement values. +The integer values are stored as bytea two's complement values. @@ -2056,12 +2057,12 @@ Author: Tom Lane -Improve the XML error codes to more closely match the SQL standard (Tom Lane) +Improve the XML error codes to more closely match the SQL standard (Tom Lane) § -These errors are reported via SQLSTATE. +These errors are reported via SQLSTATE. @@ -2081,12 +2082,12 @@ Author: Jeff Davis -Add function CASEFOLD() to allow for more sophisticated case-insensitive matching (Jeff Davis) +Add function CASEFOLD() to allow for more sophisticated case-insensitive matching (Jeff Davis) § -Allows more accurate comparison, i.e., a character can have multiple upper or lower case equivalents, or upper or lower case conversion changes the number of characters. +This allows more accurate comparisons, i.e., a character can have multiple upper or lower case equivalents, or upper or lower case conversion changes the number of characters. @@ -2099,7 +2100,7 @@ Author: Tom Lane -Allow MIN()/MAX() aggregates on arrays and composite types (Aleksander Alekseev, Marat Buharov) +Allow MIN()/MAX() aggregates on arrays and composite types (Aleksander Alekseev, Marat Buharov) § § @@ -2112,7 +2113,7 @@ Author: Tom Lane -Add a WEEK option to EXTRACT() (Tom Lane) +Add a WEEK option to EXTRACT() (Tom Lane) § @@ -2124,7 +2125,7 @@ Author: Tom Lane -Improve the output EXTRACT(QUARTER ...) for negative values (Tom Lane) +Improve the output EXTRACT(QUARTER ...) for negative values (Tom Lane) § @@ -2136,12 +2137,12 @@ Author: Tom Lane -Add roman numeral support to to_number() (Hunaid Sohail) +Add roman numeral support to to_number() (Hunaid Sohail) § -This is accessed via the "RN" pattern. +This is accessed via the RN pattern. @@ -2152,12 +2153,12 @@ Author: Masahiko Sawada -Add UUID version 7 generation function uuidv7() (Andrey Borodin) +Add UUID version 7 generation function uuidv7() (Andrey Borodin) § -This UUID value is temporally sortable. Function alias uuidv4() has been added to explicitly generate version 4 UUIDs. +This UUID value is temporally sortable. Function alias uuidv4() has been added to explicitly generate version 4 UUIDs. @@ -2168,7 +2169,7 @@ Author: Nathan Bossart -Add functions crc32() and crc32c() to compute CRC values (Aleksander Alekseev) +Add functions crc32() and crc32c() to compute CRC values (Aleksander Alekseev) § @@ -2180,7 +2181,7 @@ Author: Dean Rasheed -Add math functions gamma() and lgamma() (Dean Rasheed) +Add math functions gamma() and lgamma() (Dean Rasheed) § @@ -2192,12 +2193,12 @@ Author: Tom Lane -Allow "=>" syntax for named cursor arguments in plpgsql (Pavel Stehule) +Allow => syntax for named cursor arguments in plpgsql (Pavel Stehule) § -We previously only accepted ":=". +We previously only accepted :=. @@ -2208,7 +2209,7 @@ Author: Tom Lane -Allow regexp_match[es]/regexp_like/regexp_replace/regexp_count/regexp_instr/regexp_substr/regexp_split_to_table/regexp_split_to_array() to use named arguments (Jian He) +Allow regexp_match[es]()/regexp_like()/regexp_replace()/regexp_count()/regexp_instr()/regexp_substr()/regexp_split_to_table()/regexp_split_to_array() to use named arguments (Jian He) § @@ -2229,7 +2230,7 @@ Author: Robert Haas -Add function PQfullProtocolVersion() to report the full, including minor, protocol version number (Jacob Champion, Jelte Fennema-Nio) +Add function PQfullProtocolVersion() to report the full, including minor, protocol version number (Jacob Champion, Jelte Fennema-Nio) § @@ -2256,7 +2257,7 @@ Author: Michael Paquier -Add libpq function PQservice() to return the connection service name (Michael Banck) +Add libpq function PQservice() to return the connection service name (Michael Banck) § @@ -2270,7 +2271,7 @@ Author: Tomas Vondra -Report search_path changes to the client (Alexander Kukushkin, Jelte Fennema-Nio, Tomas Vondra) +Report search_path changes to the client (Alexander Kukushkin, Jelte Fennema-Nio, Tomas Vondra) § § @@ -2291,7 +2292,7 @@ Author: Álvaro Herrera -Add PQtrace() output for all message types, including authentication (Jelte Fennema-Nio) +Add PQtrace() output for all message types, including authentication (Jelte Fennema-Nio) § § § @@ -2307,7 +2308,7 @@ Author: Daniel Gustafsson -Add libpq connection parameter sslkeylogfile which dumps out SSL key material (Abhishek Chanda, Daniel Gustafsson) +Add libpq connection parameter sslkeylogfile which dumps out SSL key material (Abhishek Chanda, Daniel Gustafsson) § @@ -2323,12 +2324,12 @@ Author: Thomas Munro -Modify some libpq function signatures to use int64_t (Thomas Munro) +Modify some libpq function signatures to use int64_t (Thomas Munro) § -These previously used pg_int64, which is now deprecated. +These previously used pg_int64, which is now deprecated. @@ -2348,12 +2349,12 @@ Author: Michael Paquier -Allow psql to parse, bind, and close named prepared statements (Anthonin Bonnefoy, Michael Paquier) +Allow psql to parse, bind, and close named prepared statements (Anthonin Bonnefoy, Michael Paquier) § -This is accomplished with new commands \parse, \bind_named, and \close. +This is accomplished with new commands \parse, \bind_named, and \close. @@ -2368,14 +2369,14 @@ Author: Michael Paquier -Add psql backslash commands to allowing issuance of pipeline queries (Anthonin Bonnefoy) +Add psql backslash commands to allowing issuance of pipeline queries (Anthonin Bonnefoy) § § § -The new commands are \startpipeline, \syncpipeline, \sendpipeline, \endpipeline, \flushrequest, \flush, and \getresults. +The new commands are \startpipeline, \syncpipeline, \sendpipeline, \endpipeline, \flushrequest, \flush, and \getresults. @@ -2386,12 +2387,12 @@ Author: Michael Paquier -Allow adding pipeline status to the psql prompt and add related state variables (Anthonin Bonnefoy) +Allow adding pipeline status to the psql prompt and add related state variables (Anthonin Bonnefoy) § -The new prompt character is "%P" and the new psql variables are PIPELINE_SYNC_COUNT, PIPELINE_COMMAND_COUNT, and PIPELINE_RESULT_COUNT. +The new prompt character is %P and the new psql variables are PIPELINE_SYNC_COUNT, PIPELINE_COMMAND_COUNT, and PIPELINE_RESULT_COUNT. @@ -2402,7 +2403,7 @@ Author: Michael Paquier -Allow adding the connection service name to the psql prompt or access it via psql variable (Michael Banck) +Allow adding the connection service name to the psql prompt or access it via psql variable (Michael Banck) § @@ -2414,12 +2415,12 @@ Author: Dean Rasheed -Add psql option to use expanded mode on all list commands (Dean Rasheed) +Add psql option to use expanded mode on all list commands (Dean Rasheed) § -Adding 'x' enables this. +Adding backslash suffix x enables this. @@ -2430,7 +2431,7 @@ Author: Álvaro Herrera -Change psql's \conninfo to use tabular format and include more information (Álvaro Herrera, Maiquel Grassi, Hunaid Sohail) +Change psql's \conninfo to use tabular format and include more information (Álvaro Herrera, Maiquel Grassi, Hunaid Sohail) § @@ -2442,7 +2443,7 @@ Author: Dean Rasheed -Add function's leakproof indicator to psql's \df+, \do+, \dAo+, and \dC+ outputs (Yugo Nagata) +Add function's leakproof indicator to psql's \df+, \do+, \dAo+, and \dC+ outputs (Yugo Nagata) § @@ -2454,7 +2455,7 @@ Author: Michael Paquier -Add access method details for partitioned relations in \dP+ (Justin Pryzby) +Add access method details for partitioned relations in \dP+ (Justin Pryzby) § @@ -2466,7 +2467,7 @@ Author: Magnus Hagander -Add "default_version" to the psql \dx extension output (Magnus Hagander) +Add default_version to the psql \dx extension output (Magnus Hagander) § @@ -2478,7 +2479,7 @@ Author: Daniel Gustafsson -Add psql variable WATCH_INTERVAL to set the default \watch wait time (Daniel Gustafsson) +Add psql variable WATCH_INTERVAL to set the default \watch wait time (Daniel Gustafsson) § @@ -2501,13 +2502,13 @@ Author: Peter Eisentraut -Change initdb to default to enabling checksums (Greg Sabino Mullane) +Change initdb to default to enabling checksums (Greg Sabino Mullane) § § -The new initdb option --no-data-checksums disables checksums. +The new initdb option disables checksums. @@ -2518,12 +2519,12 @@ Author: Nathan Bossart -Add initdb option --no-sync-data-files to avoid syncing heap/index files (Nathan Bossart) +Add initdb option to avoid syncing heap/index files (Nathan Bossart) § -initdb --no-sync is still available to avoid syncing any files. +initdb option is still available to avoid syncing any files. @@ -2536,13 +2537,13 @@ Author: Nathan Bossart -Add vacuumdb option --missing-stats-only to compute only missing optimizer statistics (Corey Huinker, Nathan Bossart) +Add vacuumdb option to compute only missing optimizer statistics (Corey Huinker, Nathan Bossart) § § -This option can only be used by --analyze-only and --analyze-in-stages. +This option can only be used by and . @@ -2553,7 +2554,7 @@ Author: Robert Haas -Add pg_combinebackup option -k/--link to enable hard linking (Israel Barth Rubio, Robert Haas) +Add pg_combinebackup option / to enable hard linking (Israel Barth Rubio, Robert Haas) § @@ -2569,7 +2570,7 @@ Author: Robert Haas -Allow pg_verifybackup to verify tar-format backups (Amul Sul) +Allow pg_verifybackup to verify tar-format backups (Amul Sul) § @@ -2581,7 +2582,7 @@ Author: Masahiko Sawada -If pg_rewind's --source-server specifies a database name, use it in --write-recovery-conf output (Masahiko Sawada) +If pg_rewind's specifies a database name, use it in output (Masahiko Sawada) § @@ -2593,7 +2594,7 @@ Author: Masahiko Sawada -Add pg_resetwal option --char-signedness to change the default char signedness (Masahiko Sawada) +Add pg_resetwal option to change the default char signedness (Masahiko Sawada) § @@ -2616,12 +2617,12 @@ Author: Andrew Dunstan -Allow pg_dumpall to dump in the same output formats as pg_dump supports (Mahendra Singh Thalor, Andrew Dunstan) +Allow pg_dumpall to dump in the same output formats as pg_dump supports (Mahendra Singh Thalor, Andrew Dunstan) § -Also modify pg_restore to handle such dumps. Previously pg_dumpall only supported text format. +Also modify pg_restore to handle such dumps. Previously pg_dumpall only supported text format. @@ -2632,7 +2633,7 @@ Author: Jeff Davis -Add pg_dump options --with-schema, --with-data, and --with-statistics (Jeff Davis) +Add pg_dump options , , and (Jeff Davis) § @@ -2646,7 +2647,7 @@ Author: Nathan Bossart -Add pg_dump and pg_dumpall option --sequence-data to dump sequence data that would normally be excluded (Nathan Bossart) +Add pg_dump and pg_dumpall option to dump sequence data that would normally be excluded (Nathan Bossart) § § @@ -2659,7 +2660,7 @@ Author: Jeff Davis -Add pg_dump, pg_dumpall, and pg_restore options --statistics-only, --no-statistics, --no-data, and --no-schema (Corey Huinker, Jeff Davis) +Add pg_dump, pg_dumpall, and pg_restore options , , , and (Corey Huinker, Jeff Davis) § @@ -2671,7 +2672,7 @@ Author: Tom Lane -Add option --no-policies to disable row level security policy processing in pg_dump, pg_dumpall, pg_restore (Nikolay Samokhvalov) +Add option to disable row level security policy processing in pg_dump, pg_dumpall, pg_restore (Nikolay Samokhvalov) § @@ -2702,7 +2703,7 @@ Author: Jeff Davis -Allow pg_upgrade to preserve optimizer statistics (Corey Huinker, Jeff Davis, Nathan Bossart) +Allow pg_upgrade to preserve optimizer statistics (Corey Huinker, Jeff Davis, Nathan Bossart) § § § @@ -2710,7 +2711,7 @@ Allow pg_upgrade to preserve optimizer statistics (Corey Huinker, Jeff Davis, Na -Extended statistics are not preserved. Also add pg_upgrade option --no-statistics to disable statistics preservation. +Extended statistics are not preserved. Also add pg_upgrade option to disable statistics preservation. @@ -2741,7 +2742,7 @@ Author: Nathan Bossart -Allow pg_upgrade to process database checks in parallel (Nathan Bossart) +Allow pg_upgrade to process database checks in parallel (Nathan Bossart) § § § @@ -2756,7 +2757,7 @@ Allow pg_upgrade to process database checks in parallel (Nathan Bossart) -This is controlled by the existing --jobs option. +This is controlled by the existing option. @@ -2767,7 +2768,7 @@ Author: Nathan Bossart -Add pg_upgrade option --swap to swap directories rather than copy, clone, or link files (Nathan Bossart) +Add pg_upgrade option to swap directories rather than copy, clone, or link files (Nathan Bossart) § @@ -2785,13 +2786,13 @@ Author: Masahiko Sawada -Add pg_upgrade option --set-char-signedness to set the default char signedness of new cluster (Masahiko Sawada) +Add pg_upgrade option to set the default char signedness of new cluster (Masahiko Sawada) § § -This is to handle cases where a pre-Postgres 18 cluster's default CPU signedness does not match the new cluster. +This is to handle cases where a pre-PostgreSQL 18 cluster's default CPU signedness does not match the new cluster. @@ -2811,7 +2812,7 @@ Author: Amit Kapila -Add pg_createsubscriber option --all to create logical replicas for all databases (Shubham Khanna) +Add pg_createsubscriber option to create logical replicas for all databases (Shubham Khanna) § @@ -2823,7 +2824,7 @@ Author: Amit Kapila -Add pg_createsubscriber option --remove to remove publications (Shubham Khanna) +Add pg_createsubscriber option to remove publications (Shubham Khanna) § @@ -2835,7 +2836,7 @@ Author: Amit Kapila -Add pg_createsubscriber option --enable-two-phase to enable prepared transactions (Shubham Khanna) +Add pg_createsubscriber option to enable prepared transactions (Shubham Khanna) § @@ -2847,7 +2848,7 @@ Author: Masahiko Sawada -Add pg_recvlogical option --failover to specify failover slots (Hayato Kuroda) +Add pg_recvlogical option to specify failover slots (Hayato Kuroda) § @@ -2859,7 +2860,7 @@ Author: Fujii Masao -Allow pg_recvlogical --drop-slot to work without --dbname (Hayato Kuroda) +Allow pg_recvlogical to work without (Hayato Kuroda) § @@ -2890,7 +2891,7 @@ Separate the loading and running of injection points (Michael Paquier, Heikki Li -Injection points can now be created, but not run, via INJECTION_POINT_LOAD(), and such injection points can be run via INJECTION_POINT_CACHED(). +Injection points can now be created, but not run, via INJECTION_POINT_LOAD(), and such injection points can be run via INJECTION_POINT_CACHED(). @@ -2913,7 +2914,7 @@ Author: Heikki Linnakangas -Allow inline injection point test code with IS_INJECTION_POINT_ATTACHED() (Heikki Linnakangas) +Allow inline injection point test code with IS_INJECTION_POINT_ATTACHED() (Heikki Linnakangas) § @@ -2925,7 +2926,7 @@ Author: David Rowley -Improve the performance of processing long JSON strings using SIMD instructions (David Rowley) +Improve the performance of processing long JSON strings using SIMD instructions (David Rowley) § @@ -2937,7 +2938,7 @@ Author: John Naylor -Speed up CRC32C calculations using x86 AVX-512 instructions (Raghuveer Devulapalli, Paul Amonson) +Speed up CRC32C calculations using x86 AVX-512 instructions (Raghuveer Devulapalli, Paul Amonson) § @@ -2951,7 +2952,7 @@ Author: Nathan Bossart -Add ARM Neon and SVE CPU intrinsics for popcount (integer bit counting) (Chiranmoy Bhattacharya, Devanga Susmitha, Rama Malladi) +Add ARM Neon and SVE CPU intrinsics for popcount (integer bit counting) (Chiranmoy Bhattacharya, Devanga Susmitha, Rama Malladi) § § @@ -2989,15 +2990,15 @@ Author: Tomas Vondra -Add configure option --with-libnuma to enable NUMA awareness (Jakub Wartak, Bertrand Drouvot) +Add configure option to enable NUMA awareness (Jakub Wartak, Bertrand Drouvot) § § § -The function pg_numa_available() reports on NUMA awareness, and system views pg_shmem_allocations_numa and pg_buffercache_numa which report on shared memory distribution across -NUMA nodes. +The function pg_numa_available() reports on NUMA awareness, and system views pg_shmem_allocations_numa and pg_buffercache_numa which report on shared memory distribution across +NUMA nodes. @@ -3008,7 +3009,7 @@ Author: Nathan Bossart -Add TOAST table to pg_index to allow for very large index expression indexes (Nathan Bossart) +Add TOAST table to pg_index to allow for very large expression indexes (Nathan Bossart) § @@ -3020,7 +3021,8 @@ Author: David Rowley -Remove column pg_attribute.attcacheoff (David Rowley) +Remove column +pg_attribute.attcacheoff (David Rowley) § @@ -3032,7 +3034,7 @@ Author: Melanie Plageman -Add column pg_class.relallfrozen (Melanie Plageman) +Add column pg_class.relallfrozen (Melanie Plageman) § @@ -3046,7 +3048,7 @@ Author: Peter Eisentraut -Add amgettreeheight, amconsistentequality, and amconsistentordering to the index access method API (Mark Dilger) +Add amgettreeheight, amconsistentequality, and amconsistentordering to the index access method API (Mark Dilger) § § @@ -3059,7 +3061,7 @@ Author: Peter Eisentraut -Add GiST support function stratnum (Paul A. Jungwirth) +Add GiST support function stratnum() (Paul A. Jungwirth) § @@ -3071,7 +3073,7 @@ Author: Masahiko Sawada -Record the default CPU signedness of "char" in pg_controldata (Masahiko Sawada) +Record the default CPU signedness of char in pg_controldata (Masahiko Sawada) § @@ -3085,13 +3087,13 @@ Author: Peter Eisentraut -Add support for Python "Limited API" in PL/Python (Peter Eisentraut) +Add support for Python "Limited API" in PL/Python (Peter Eisentraut) § § -This helps prevent problems caused by Python 3.x version mismatches. +This helps prevent problems caused by Python 3.x version mismatches. @@ -3102,7 +3104,7 @@ Author: Jacob Champion -Change the minimum supported Python version to 3.6.8 (Jacob Champion) +Change the minimum supported Python version to 3.6.8 (Jacob Champion) § @@ -3116,7 +3118,7 @@ Author: Daniel Gustafsson -Remove support for OpenSSL versions older than 1.1.1 (Daniel Gustafsson) +Remove support for OpenSSL versions older than 1.1.1 (Daniel Gustafsson) § § @@ -3129,7 +3131,7 @@ Author: Peter Eisentraut -If LLVM is enabled, require version 14 or later (Thomas Munro) +If LLVM is enabled, require version 14 or later (Thomas Munro) § @@ -3141,12 +3143,12 @@ Author: Tom Lane -Add macro PG_MODULE_MAGIC_EXT to allow extensions to report their name and version (Andrei Lepikhov) +Add macro PG_MODULE_MAGIC_EXT to allow extensions to report their name and version (Andrei Lepikhov) § -This information can be access via the new function pg_get_loaded_modules(). +This information can be access via the new function pg_get_loaded_modules(). @@ -3157,12 +3159,12 @@ Author: Tom Lane -Document that SPI_connect/SPI_connect_ext() always returns success (SPI_OK_CONNECT) (Stepan Neretin) +Document that SPI_connect()/SPI_connect_ext() always returns success (SPI_OK_CONNECT) (Stepan Neretin) § -Errors are always reported via ereport(). +Errors are always reported via ereport(). @@ -3173,7 +3175,7 @@ Author: Peter Eisentraut -Remove the experimental designation of Meson builds on Windows (Aleksander Alekseev) +Remove the experimental designation of Meson builds on Windows (Aleksander Alekseev) § @@ -3185,7 +3187,7 @@ Author: Peter Eisentraut -Add documentation section about API and ABI compatibility (David Wheeler, Peter Eisentraut) +Add documentation section about API and ABI compatibility (David Wheeler, Peter Eisentraut) § @@ -3199,13 +3201,13 @@ Author: Thomas Munro -Remove configure options --disable-spinlocks and --disable-atomics (Thomas Munro) +Remove configure options and (Thomas Munro) § § -Thirty-two bit atomic operations are now required. +Thirty-two-bit atomic operations are now required. @@ -3216,7 +3218,7 @@ Author: Tom Lane -Remove support for the HPPA/PA-RISC architecture (Tom Lane) +Remove support for the HPPA/PA-RISC architecture (Tom Lane) § @@ -3237,7 +3239,7 @@ Author: Masahiko Sawada -Add extension pg_logicalinspect to inspect logical snapshots (Bertrand Drouvot) +Add extension pg_logicalinspect to inspect logical snapshots (Bertrand Drouvot) § @@ -3249,7 +3251,7 @@ Author: Robert Haas -Add extension pg_overexplain which adds debug details to EXPLAIN output (Robert Haas) +Add extension pg_overexplain which adds debug details to EXPLAIN output (Robert Haas) § @@ -3267,7 +3269,7 @@ Author: Fujii Masao -Add output columns to postgres_fdw_get_connections() (Hayato Kuroda, Sagar Dilip Shedge) +Add output columns to postgres_fdw_get_connections() (Hayato Kuroda, Sagar Dilip Shedge) § § § @@ -3275,8 +3277,8 @@ Add output columns to postgres_fdw_get_connections() (Hayato Kuroda, Sagar Dilip -New output column "used_in_xact" indicates if the foreign data wrapper is being used by a current transaction, "closed" indicates if it is closed, "user_name" indicates the -user name, and "remote_backend_pid" indicates the remote backend process identifier. +New output column used_in_xact indicates if the foreign data wrapper is being used by a current transaction, closed indicates if it is closed, user_name indicates the +user name, and remote_backend_pid indicates the remote backend process identifier. @@ -3287,13 +3289,14 @@ Author: Peter Eisentraut -Allow SCRAM authentication from the client to be passed to postgres_fdw servers (Matheus Alcantara, Peter Eisentraut) +Allow SCRAM authentication from the client to be passed to postgres_fdw servers (Matheus Alcantara, Peter Eisentraut) § -This avoids storing postgres_fdw authentication information in the database, and is enabled with the postgres_fdw "use_scram_passthrough" connection option. libpq uses new connection -parameters scram_client_key and scram_server_key. +This avoids storing postgres_fdw authentication information in the database, and is enabled with the +postgres_fdw use_scram_passthrough connection option. libpq uses new connection +parameters scram_client_key and scram_server_key. @@ -3304,7 +3307,7 @@ Author: Peter Eisentraut -Allow SCRAM authentication from the client to be passed to dblink servers (Matheus Alcantara) +Allow SCRAM authentication from the client to be passed to dblink servers (Matheus Alcantara) § @@ -3316,12 +3319,12 @@ Author: Fujii Masao -Add on_error and log_verbosity options to file_fdw (Atsushi Torikoshi) +Add on_error and log_verbosity options to file_fdw (Atsushi Torikoshi) § -These control how file_fdw handles and reports invalid file rows. +These control how file_fdw handles and reports invalid file rows. @@ -3332,12 +3335,12 @@ Author: Fujii Masao -Add "reject_limit" to control the number of invalid rows file_fdw can ignore (Atsushi Torikoshi) +Add reject_limit to control the number of invalid rows file_fdw can ignore (Atsushi Torikoshi) § -This is active when ON_ERROR = 'ignore'. +This is active when ON_ERROR = 'ignore'. @@ -3348,7 +3351,7 @@ Author: Nathan Bossart -Add configurable variable min_password_length to passwordcheck (Emanuele Musella, Maurizio Boriani) +Add configurable variable min_password_length to passwordcheck (Emanuele Musella, Maurizio Boriani) § @@ -3364,7 +3367,7 @@ Author: Tatsuo Ishii -Have pgbench report the number of failed, retried, or skipped transactions in per-script reports (Yugo Nagata) +Have pgbench report the number of failed, retried, or skipped transactions in per-script reports (Yugo Nagata) § @@ -3376,12 +3379,12 @@ Author: Tom Lane -Add isn server variable "weak" to control invalid check digit acceptance (Viktor Holmberg) +Add isn server variable weak to control invalid check digit acceptance (Viktor Holmberg) § -This was previously only controlled by function isn_weak(). +This was previously only controlled by function isn_weak(). @@ -3392,7 +3395,7 @@ Author: Heikki Linnakangas -Allow values to be sorted to speed btree_gist index builds (Bernd Helmle, Andrey Borodin) +Allow values to be sorted to speed btree_gist index builds (Bernd Helmle, Andrey Borodin) § @@ -3404,7 +3407,7 @@ Author: Tomas Vondra -Add amcheck function gin_index_check() to verify GIN indexes (Grigory Kryachko, Heikki Linnakangas, Andrey Borodin) +Add amcheck function gin_index_check() to verify GIN indexes (Grigory Kryachko, Heikki Linnakangas, Andrey Borodin) § @@ -3416,12 +3419,12 @@ Author: Andres Freund -Add functions pg_buffercache_evict_relation() and pg_buffercache_evict_all() to evict unpinned shared buffers (Nazir Bilal Yavuz) +Add functions pg_buffercache_evict_relation() and pg_buffercache_evict_all() to evict unpinned shared buffers (Nazir Bilal Yavuz) § -The existing function pg_buffercache_evict() now returns the buffer flush status. +The existing function pg_buffercache_evict() now returns the buffer flush status. @@ -3436,7 +3439,7 @@ Author: Robert Haas -Allow extensions to install custom EXPLAIN options (Robert Haas, Sami Imseih) +Allow extensions to install custom EXPLAIN options (Robert Haas, Sami Imseih) § § § @@ -3452,7 +3455,7 @@ Author: Michael Paquier -Allow extensions to use the server's cumulative statistics API (Michael Paquier) +Allow extensions to use the server's cumulative statistics API (Michael Paquier) § § @@ -3472,7 +3475,7 @@ Author: Michael Paquier -Allow the queries of CREATE TABLE AS and DECLARE to be tracked by pg_stat_statements (Anthonin Bonnefoy) +Allow the queries of CREATE TABLE AS and DECLARE to be tracked by pg_stat_statements (Anthonin Bonnefoy) § @@ -3488,12 +3491,12 @@ Author: Michael Paquier -Allow the parameterization of SET values in pg_stat_statements (Greg Sabino Mullane, Michael Paquier) +Allow the parameterization of SET values in pg_stat_statements (Greg Sabino Mullane, Michael Paquier) § -This reduces the bloat caused by SET statements with differing constants. +This reduces the bloat caused by SET statements with differing constants. @@ -3504,12 +3507,12 @@ Author: Michael Paquier -Add pg_stat_statements columns to report parallel activity (Guillaume Lelarge) +Add pg_stat_statements columns to report parallel activity (Guillaume Lelarge) § -The new columns are parallel_workers_to_launch and parallel_workers_launched. +The new columns are parallel_workers_to_launch and parallel_workers_launched. @@ -3520,7 +3523,7 @@ Author: Michael Paquier -Add pg_stat_statements.wal_buffers_full to report full WAL buffers (Bertrand Drouvot) +Add pg_stat_statements.wal_buffers_full to report full WAL buffers (Bertrand Drouvot) § @@ -3541,7 +3544,7 @@ Author: Álvaro Herrera -Add pgcrypto functions sha256crypt() and sha512crypt() (Bernd Helmle) +Add pgcrypto functions sha256crypt() and sha512crypt() (Bernd Helmle) § @@ -3553,7 +3556,7 @@ Author: Daniel Gustafsson -Add CFB mode to pgcrypto encryption and decryption (Umar Hayat) +Add CFB mode to pgcrypto encryption and decryption (Umar Hayat) § @@ -3565,12 +3568,12 @@ Author: Daniel Gustafsson -Add pgcrypto server variable builtin_crypto_enabled to allow disabling builtin non-FIPS mode cryptographic functions (Daniel Gustafsson, Joe Conway) +Add pgcrypto server variable builtin_crypto_enabled to allow disabling builtin non-FIPS mode cryptographic functions (Daniel Gustafsson, Joe Conway) § -This is useful for guaranteeing FIPS mode behavior. +This is useful for guaranteeing FIPS mode behavior. From 428a87607b58949cfc35eeab94825e2de0d541a5 Mon Sep 17 00:00:00 2001 From: Fujii Masao Date: Wed, 18 Jun 2025 09:18:40 +0900 Subject: [PATCH 109/602] doc: Reorder protocol version option descriptions in libpq docs. Commit 285613c60a7 introduced the min_protocol_version and max_protocol_version connection options for libpq, but their descriptions were placed in the middle of the unrelated ssl_min_protocol_version and ssl_max_protocol_version entries. This commit moves the min_protocol_version and max_protocol_version descriptions to appear after the SSL-related options. This improves the logical order and makes it easier for users to locate the relevant settings in the libpq documentation. Author: Fujii Masao Reviewed-by: Jelte Fennema-Nio Discussion: https://postgr.es/m/a3391f36-30f5-4d4a-825b-232476819de8@oss.nttdata.com --- doc/src/sgml/libpq.sgml | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/doc/src/sgml/libpq.sgml b/doc/src/sgml/libpq.sgml index 695fe958c3ed3..08bd51219262d 100644 --- a/doc/src/sgml/libpq.sgml +++ b/doc/src/sgml/libpq.sgml @@ -2168,6 +2168,24 @@ postgresql://%2Fvar%2Flib%2Fpostgresql/dbname + + ssl_max_protocol_version + + + This parameter specifies the maximum SSL/TLS protocol version to allow + for the connection. Valid values are TLSv1, + TLSv1.1, TLSv1.2 and + TLSv1.3. The supported protocols depend on the + version of OpenSSL used, older versions + not supporting the most modern protocol versions. If not set, this + parameter is ignored and the connection will use the maximum bound + defined by the backend, if set. Setting the maximum protocol version + is mainly useful for testing or if some component has issues working + with a newer protocol. + + + + min_protocol_version @@ -2216,24 +2234,6 @@ postgresql://%2Fvar%2Flib%2Fpostgresql/dbname - - ssl_max_protocol_version - - - This parameter specifies the maximum SSL/TLS protocol version to allow - for the connection. Valid values are TLSv1, - TLSv1.1, TLSv1.2 and - TLSv1.3. The supported protocols depend on the - version of OpenSSL used, older versions - not supporting the most modern protocol versions. If not set, this - parameter is ignored and the connection will use the maximum bound - defined by the backend, if set. Setting the maximum protocol version - is mainly useful for testing or if some component has issues working - with a newer protocol. - - - - krbsrvname From 9e1183953f0aee6b8040cd782a8af9996f5ca942 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Wed, 18 Jun 2025 11:03:21 +0900 Subject: [PATCH 110/602] Document "relrewrite" at the top of heap_create_with_catalog() This parameter has been introduced in 325f2ec5557f, and it was not documented contrary to all the other arguments of heap_create_with_catalog(). Reviewed-by: Yugo Nagata Reviewed-by: Steven Niu Discussion: https://postgr.es/m/aE--bmEv-gJUTH5v@paquier.xyz --- src/backend/catalog/heap.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index fbaed5359ad7c..10f43c51c5af0 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -1100,6 +1100,7 @@ AddNewRelationType(const char *typeName, * if false, relacl is always set NULL * allow_system_table_mods: true to allow creation in system namespaces * is_internal: is this a system-generated catalog? + * relrewrite: link to original relation during a table rewrite * * Output parameters: * typaddress: if not null, gets the object address of the new pg_type entry From c2e2589ab969eb802493191c79de844bf7dc3a6e Mon Sep 17 00:00:00 2001 From: Fujii Masao Date: Wed, 18 Jun 2025 14:53:55 +0900 Subject: [PATCH 111/602] pg_dump: Allow pg_dump to dump the statistics for foreign tables. Commit 1fd1bd87101 introduced support for dumping statistics with pg_dump and pg_dumpall, covering tables, materialized views, and indexes. However, it overlooked foreign tables, even though functions like pg_restore_relation_stats() support them. This commit fixes that oversight by allowing pg_dump and pg_dumpall to include statistics for foreign tables. Author: Fujii Masao Reviewed-by: Corey Huinker Reviewed-by: Nathan Bossart Discussion: https://postgr.es/m/3772e4e4-ef39-4deb-bb76-aa8165f33fb6@oss.nttdata.com --- doc/src/sgml/ref/pg_dump.sgml | 7 ++++--- doc/src/sgml/ref/pg_dumpall.sgml | 3 ++- src/bin/pg_dump/pg_dump.c | 4 +++- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/doc/src/sgml/ref/pg_dump.sgml b/doc/src/sgml/ref/pg_dump.sgml index d7595a7e5468d..1e06bd33bdcd1 100644 --- a/doc/src/sgml/ref/pg_dump.sgml +++ b/doc/src/sgml/ref/pg_dump.sgml @@ -1277,8 +1277,8 @@ PostgreSQL documentation The data section contains actual table data, large-object - contents, statistics for tables and materialized views and - sequence values. + contents, sequence values, and statistics for tables, + materialized views, and foriegn tables. Post-data items include definitions of indexes, triggers, rules, statistics for indexes, and constraints other than validated check constraints. @@ -1359,7 +1359,8 @@ PostgreSQL documentation Dump only the statistics, not the schema (data definitions) or data. - Statistics for tables, materialized views, and indexes are dumped. + Statistics for tables, materialized views, foreign tables, + and indexes are dumped. diff --git a/doc/src/sgml/ref/pg_dumpall.sgml b/doc/src/sgml/ref/pg_dumpall.sgml index 723a466cfaad6..43f384ed16a9c 100644 --- a/doc/src/sgml/ref/pg_dumpall.sgml +++ b/doc/src/sgml/ref/pg_dumpall.sgml @@ -690,7 +690,8 @@ exclude database PATTERN Dump only the statistics, not the schema (data definitions) or data. - Statistics for tables, materialized views, and indexes are dumped. + Statistics for tables, materialized views, foreign tables, + and indexes are dumped. diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 7bc0724cd301f..a8f0309e8fc1e 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -6890,7 +6890,8 @@ getRelationStatistics(Archive *fout, DumpableObject *rel, int32 relpages, (relkind == RELKIND_PARTITIONED_TABLE) || (relkind == RELKIND_INDEX) || (relkind == RELKIND_PARTITIONED_INDEX) || - (relkind == RELKIND_MATVIEW)) + (relkind == RELKIND_MATVIEW || + relkind == RELKIND_FOREIGN_TABLE)) { RelStatsInfo *info = pg_malloc0(sizeof(RelStatsInfo)); DumpableObject *dobj = &info->dobj; @@ -6929,6 +6930,7 @@ getRelationStatistics(Archive *fout, DumpableObject *rel, int32 relpages, case RELKIND_RELATION: case RELKIND_PARTITIONED_TABLE: case RELKIND_MATVIEW: + case RELKIND_FOREIGN_TABLE: info->section = SECTION_DATA; break; case RELKIND_INDEX: From 09f7d36ba16e9665bb25d2c097e71c7439485fd7 Mon Sep 17 00:00:00 2001 From: Bruce Momjian Date: Wed, 18 Jun 2025 16:43:27 -0400 Subject: [PATCH 112/602] doc config.sgml: use "-" and not "_" for varlistentry "id"s Change "id"s of file_copy_method and enable_self_join_elimination for consistency with the rest of the guc "id"s. These are new entries for PG 18. --- doc/src/sgml/config.sgml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index 7e0e5400ee128..5ea554ad3c36d 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -2363,7 +2363,7 @@ include_dir 'conf.d' - + file_copy_method (enum) file_copy_method configuration parameter @@ -5765,7 +5765,7 @@ ANY num_sync ( + enable_self_join_elimination (boolean) enable_self_join_elimination configuration parameter From d0d1bcb1e8b2e324bc243d69ccfce55b25a79f8c Mon Sep 17 00:00:00 2001 From: Bruce Momjian Date: Wed, 18 Jun 2025 16:48:26 -0400 Subject: [PATCH 113/602] doc: fix for commit 09f7d36ba16 in changing "_" to "-". I thought underscores wouldn't even work in "id"s, so I never checked to see if anything referenced it, but it seems it does work, so adjust the calling site for the dash syntax. --- doc/src/sgml/ref/alter_database.sgml | 2 +- doc/src/sgml/ref/create_database.sgml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/src/sgml/ref/alter_database.sgml b/doc/src/sgml/ref/alter_database.sgml index 9da8920e12eff..1fc051e11a311 100644 --- a/doc/src/sgml/ref/alter_database.sgml +++ b/doc/src/sgml/ref/alter_database.sgml @@ -83,7 +83,7 @@ ALTER DATABASE name RESET ALL must be empty for this database, and no one can be connected to the database. Tables and indexes in non-default tablespaces are unaffected. The method used to copy files to the new tablespace - is affected by the setting. + is affected by the setting. diff --git a/doc/src/sgml/ref/create_database.sgml b/doc/src/sgml/ref/create_database.sgml index 640c0425faec5..4da8aeebb50a2 100644 --- a/doc/src/sgml/ref/create_database.sgml +++ b/doc/src/sgml/ref/create_database.sgml @@ -140,7 +140,7 @@ CREATE DATABASE name after the creation of the new database. In some situations, this may have a noticeable negative impact on overall system performance. The FILE_COPY strategy is affected by the setting. + linkend="guc-file-copy-method"/> setting. From b57d707708181f988fd1fa697976059510fc4f76 Mon Sep 17 00:00:00 2001 From: Fujii Masao Date: Thu, 19 Jun 2025 09:07:19 +0900 Subject: [PATCH 114/602] doc: Fix incorrect description of INCLUDING COMMENTS in CREATE FOREIGN TABLE. Commit 302cf157592 added support for LIKE in CREATE FOREIGN TABLE. In this feature, since indexes are not created for foreign tables, comments on indexes are not copied either. However, the documentation incorrectly stated that index comments would be copied when using INCLUDING COMMENTS. This commit corrects that by removing the mention of index comments. Author: Fujii Masao Reviewed-by: Michael Paquier Discussion: https://postgr.es/m/f86cd84f-a6a3-4451-bae7-5cca9e63b06d@oss.nttdata.com --- doc/src/sgml/ref/create_foreign_table.sgml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/src/sgml/ref/create_foreign_table.sgml b/doc/src/sgml/ref/create_foreign_table.sgml index d08834ac9d291..009fa46532bbe 100644 --- a/doc/src/sgml/ref/create_foreign_table.sgml +++ b/doc/src/sgml/ref/create_foreign_table.sgml @@ -232,7 +232,7 @@ WITH ( MODULUS numeric_literal, REM INCLUDING COMMENTS - Comments for the copied columns, constraints, and indexes will be + Comments for the copied columns and constraints will be copied. The default behavior is to exclude comments, resulting in the copied columns and constraints in the new table having no comments. From db0c93f172a41515734a774f0412ff9557eca8ed Mon Sep 17 00:00:00 2001 From: Fujii Masao Date: Thu, 19 Jun 2025 09:12:34 +0900 Subject: [PATCH 115/602] doc: Mention GIN indexes support parallel builds. Commit 8492feb98f6 added support for parallel CREATE INDEX on GIN indexes. However, previously two places in the documentation and two in the source code comments still stated that only B-tree and BRIN indexes support parallel builds. This commit updates those references to correctly include GIN indexes. Author: Fujii Masao Reviewed-by: Robert Treat Discussion: https://postgr.es/m/7d27d068-90e2-4022-9bd7-09b0fd3d4f47@oss.nttdata.com --- doc/src/sgml/config.sgml | 3 ++- doc/src/sgml/ref/create_index.sgml | 2 +- src/backend/catalog/index.c | 2 +- src/backend/optimizer/plan/planner.c | 2 +- 4 files changed, 5 insertions(+), 4 deletions(-) diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index 5ea554ad3c36d..b265cc89c9d46 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -2894,7 +2894,8 @@ include_dir 'conf.d' Sets the maximum number of parallel workers that can be started by a single utility command. Currently, the parallel utility commands that support the use of parallel workers are - CREATE INDEX when building a B-tree or BRIN index, + CREATE INDEX when building a B-tree, + GIN, or BRIN index, and VACUUM without FULL option. Parallel workers are taken from the pool of processes established by , limited diff --git a/doc/src/sgml/ref/create_index.sgml b/doc/src/sgml/ref/create_index.sgml index 147a8f7587c71..b9c679c41e8db 100644 --- a/doc/src/sgml/ref/create_index.sgml +++ b/doc/src/sgml/ref/create_index.sgml @@ -814,7 +814,7 @@ Indexes: leveraging multiple CPUs in order to process the table rows faster. This feature is known as parallel index build. For index methods that support building indexes - in parallel (currently, B-tree and BRIN), + in parallel (currently, B-tree, GIN, and BRIN), maintenance_work_mem specifies the maximum amount of memory that can be used by each index build operation as a whole, regardless of how many worker processes were started. diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index 739a92bdcc1ca..aa216683b74fe 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -3020,7 +3020,7 @@ index_build(Relation heapRelation, /* * Determine worker process details for parallel CREATE INDEX. Currently, - * only btree and BRIN have support for parallel builds. + * only btree, GIN, and BRIN have support for parallel builds. * * Note that planner considers parallel safety for us. */ diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index ff65867eebee7..549aedcfa991a 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -6879,7 +6879,7 @@ plan_cluster_use_sort(Oid tableOid, Oid indexOid) * * tableOid is the table on which the index is to be built. indexOid is the * OID of an index to be created or reindexed (which must be an index with - * support for parallel builds - currently btree or BRIN). + * support for parallel builds - currently btree, GIN, or BRIN). * * Return value is the number of parallel worker processes to request. It * may be unsafe to proceed if this is 0. Note that this does not include the From a03805920b36b79b7ddf97c6804117f3296b2900 Mon Sep 17 00:00:00 2001 From: Bruce Momjian Date: Wed, 18 Jun 2025 21:19:42 -0400 Subject: [PATCH 116/602] doc PG 18 relnotes: add links for server variables --- doc/src/sgml/release-18.sgml | 58 ++++++++++++++++++------------------ 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/doc/src/sgml/release-18.sgml b/doc/src/sgml/release-18.sgml index ab83b1554001e..e89a86b1aa813 100644 --- a/doc/src/sgml/release-18.sgml +++ b/doc/src/sgml/release-18.sgml @@ -83,7 +83,7 @@ Change time zone abbreviation handling (Tom Lane) -The system will now favor the current session's time zone abbreviations before checking the server variable timezone_abbreviations. Previously timezone_abbreviations was +The system will now favor the current session's time zone abbreviations before checking the server variable . Previously timezone_abbreviations was checked first. @@ -101,7 +101,7 @@ Deprecate MD5 password authentication (Nathan Bossart) Support for MD5 passwords will be removed in a future major version release. CREATE ROLE and ALTER ROLE now emit deprecation warnings when setting MD5 passwords. -These warnings can be disabled by setting the md5_password_warnings parameter to off. +These warnings can be disabled by setting the parameter to off. @@ -263,7 +263,7 @@ Automatically remove some unnecessary table self-joins (Andrey Lepikhov, Alexand -This optimization can be disabled using server variable enable_self_join_elimination. +This optimization can be disabled using server variable . @@ -324,7 +324,7 @@ Allow the keys of SELECT DISTINCT to be internally reordered -This optimization can be disabled using enable_distinct_reordering. +This optimization can be disabled using . @@ -606,8 +606,8 @@ Add an asynchronous I/O subsystem (Andres Freund, Thomas Munro, Nazir Bilal Yavu This feature allows backends to queue multiple read requests, which allows for more efficient sequential scans, bitmap heap scans, vacuums, etc. -This is enabled by server variable io_method, with server variables io_combine_limit and io_max_combine_limit added to control it. This also enables -effective_io_concurrency and maintenance_io_concurrency values greater than zero for systems without fadvise() support. The new system view pg_aios shows the file handles being used +This is enabled by server variable , with server variables and added to control it. This also enables + and values greater than zero for systems without fadvise() support. The new system view pg_aios shows the file handles being used for asynchronous I/O. @@ -667,7 +667,7 @@ Allow normal vacuums to freeze some pages, even though they are all-visible (Mel -This reduces the overhead of later full-relation freezing. The aggressiveness of this can be controlled by server variable and per-table setting vacuum_max_eager_freeze_failure_rate. +This reduces the overhead of later full-relation freezing. The aggressiveness of this can be controlled by server variable and per-table setting . Previously vacuum never processed all-visible pages until freezing was required. @@ -679,7 +679,7 @@ Author: Nathan Bossart -Add server variable vacuum_truncate to control file truncation during VACUUM (Nathan Bossart, Gurjeet Singh) +Add server variable to control file truncation during VACUUM (Nathan Bossart, Gurjeet Singh) § @@ -697,7 +697,7 @@ Author: Melanie Plageman -Increase server variables effective_io_concurrency's and maintenance_io_concurrency's default values to 16 (Melanie Plageman) +Increase server variables 's and 's default values to 16 (Melanie Plageman) § § @@ -723,7 +723,7 @@ Author: Melanie Plageman -Increase the logging granularity of server variable log_connections (Melanie Plageman) +Increase the logging granularity of server variable (Melanie Plageman) § @@ -751,7 +751,7 @@ Author: Tom Lane -Add log_line_prefix escape %L to output the client IP address (Greg Sabino Mullane) +Add escape %L to output the client IP address (Greg Sabino Mullane) § @@ -763,7 +763,7 @@ Author: Fujii Masao -Add server variable log_lock_failures to log lock acquisition failures (Yuki Seino) +Add server variable to log lock acquisition failures (Yuki Seino) § @@ -804,7 +804,7 @@ Add delay time reporting to VACUUM and ANALYZE This information appears in the autovacuum logs, the system views pg_stat_progress_vacuum and pg_stat_progress_analyze, and the output of VACUUM and ANALYZE when in VERBOSE -mode; tracking must be enabled with the server variable track_cost_delay_timing. +mode; tracking must be enabled with the server variable . @@ -900,7 +900,7 @@ Author: Michael Paquier -Change server variable track_wal_io_timing to control tracking WAL timing in pg_stat_io instead of pg_stat_wal (Bertrand Drouvot) +Change server variable to control tracking WAL timing in pg_stat_io instead of pg_stat_wal (Bertrand Drouvot) § @@ -1144,7 +1144,7 @@ Add support for the OAuth authentication method (Jacob Champion, Daniel Gustafss -This adds an oauth authentication method to pg_hba.conf, libpq OAuth options, a server variable oauth_validator_libraries to load token validation libraries, and +This adds an oauth authentication method to pg_hba.conf, libpq OAuth options, a server variable to load token validation libraries, and a configure flag to add the required compile-time libraries. @@ -1156,7 +1156,7 @@ Author: Daniel Gustafsson -Add server variable ssl_tls13_ciphers to allow specification of multiple colon-separated TLSv1.3 cipher suites (Erica Zhang, Daniel Gustafsson) +Add server variable to allow specification of multiple colon-separated TLSv1.3 cipher suites (Erica Zhang, Daniel Gustafsson) § @@ -1168,7 +1168,7 @@ Author: Daniel Gustafsson -Change server variable ssl_groups's default to include elliptic curve X25519 (Daniel Gustafsson, Jacob Champion) +Change server variable 's default to include elliptic curve X25519 (Daniel Gustafsson, Jacob Champion) § @@ -1180,7 +1180,7 @@ Author: Daniel Gustafsson -Rename server variable ssl_ecdh_curve to ssl_groups and allow multiple colon-separated ECDH curves to be specified (Erica Zhang, Daniel Gustafsson) +Rename server variable ssl_ecdh_curve to and allow multiple colon-separated ECDH curves to be specified (Erica Zhang, Daniel Gustafsson) § @@ -1226,12 +1226,12 @@ Author: Nathan Bossart -Add server variable autovacuum_worker_slots to specify the maximum number of background workers (Nathan Bossart) +Add server variable to specify the maximum number of background workers (Nathan Bossart) § -With this variable set, autovacuum_max_workers can be adjusted at runtime up to this maximum without a server restart. +With this variable set, can be adjusted at runtime up to this maximum without a server restart. @@ -1247,7 +1247,7 @@ Allow specification of the fixed number of dead tuples that will trigger an auto -The server variable is autovacuum_vacuum_max_threshold. Percentages are still used for triggering. +The server variable is . Percentages are still used for triggering. @@ -1258,7 +1258,7 @@ Author: Andres Freund -Change server variable max_files_per_process to limit only files opened by a backend (Andres Freund) +Change server variable to limit only files opened by a backend (Andres Freund) § @@ -1274,7 +1274,7 @@ Author: Nathan Bossart -Add server variable num_os_semaphores to report the required number of semaphores (Nathan Bossart) +Add server variable to report the required number of semaphores (Nathan Bossart) § @@ -1292,7 +1292,7 @@ Author: Peter Eisentraut -Add server variable extension_control_path to specify the location of extension control files (Peter Eisentraut, Matheus Alcantara) +Add server variable to specify the location of extension control files (Peter Eisentraut, Matheus Alcantara) § § @@ -1314,7 +1314,7 @@ Author: Amit Kapila -Allow inactive replication slots to be automatically invalided using server variable idle_replication_slot_timeout (Nisha Moond, Bharath Rupireddy) +Allow inactive replication slots to be automatically invalided using server variable (Nisha Moond, Bharath Rupireddy) § @@ -1326,12 +1326,12 @@ Author: Masahiko Sawada -Add server variable max_active_replication_origins to control the maximum active replication origins (Euler Taveira) +Add server variable to control the maximum active replication origins (Euler Taveira) § -This was previously controlled by max_replication_slots, but this new setting allows a higher origin count in cases where fewer slots are required. +This was previously controlled by , but this new setting allows a higher origin count in cases where fewer slots are required. @@ -1583,7 +1583,7 @@ Author: Thomas Munro -Add server variable file_copy_method to control the file copying method (Nazir Bilal Yavuz) +Add server variable to control the file copying method (Nazir Bilal Yavuz) § @@ -2271,7 +2271,7 @@ Author: Tomas Vondra -Report search_path changes to the client (Alexander Kukushkin, Jelte Fennema-Nio, Tomas Vondra) +Report changes to the client (Alexander Kukushkin, Jelte Fennema-Nio, Tomas Vondra) § § From 1546e17f9d067e714e066fcdd57d5f56c14f4174 Mon Sep 17 00:00:00 2001 From: Amit Kapila Date: Thu, 19 Jun 2025 09:48:08 +0530 Subject: [PATCH 117/602] Improve log messages and docs for slot synchronization. Improve the clarity of LOG messages when a failover logical slot synchronization fails, making the reasons more explicit for easier debugging. Update the documentation to outline scenarios where slot synchronization can fail, especially during the initial sync, and emphasize that pg_sync_replication_slot() is primarily intended for testing and debugging purposes. We also discussed improving the functionality of pg_sync_replication_slot() so that it can be used reliably, but we would take up that work for next version after some more discussion and review. Reported-by: Suraj Kharage Author: shveta malik Reviewed-by: Zhijie Hou Reviewed-by: Peter Smith Reviewed-by: Amit Kapila Backpatch-through: 17, where it was introduced Discussion: https://postgr.es/m/CAF1DzPWTcg+m+x+oVVB=y4q9=PYYsL_mujVp7uJr-_oUtWNGbA@mail.gmail.com --- doc/src/sgml/func.sgml | 6 ++- doc/src/sgml/logicaldecoding.sgml | 54 ++++++++++++++++++++-- src/backend/replication/logical/slotsync.c | 6 +-- 3 files changed, 57 insertions(+), 9 deletions(-) diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index c67688cbf5f98..8d7d9a2f3e8e8 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -29698,7 +29698,7 @@ postgres=# SELECT '0/0'::pg_lsn + pd.segment_number * ps.setting::int + :offset - + pg_logical_slot_get_binary_changes @@ -29970,7 +29970,9 @@ postgres=# SELECT '0/0'::pg_lsn + pd.segment_number * ps.setting::int + :offset standby server. Temporary synced slots, if any, cannot be used for logical decoding and must be dropped after promotion. See for details. - Note that this function cannot be executed if + Note that this function is primarily intended for testing and + debugging purposes and should be used with caution. Additionaly, + this function cannot be executed if sync_replication_slots is enabled and the slotsync worker is already running to perform the synchronization of slots. diff --git a/doc/src/sgml/logicaldecoding.sgml b/doc/src/sgml/logicaldecoding.sgml index dd9e83b08eaf1..5c5957e0d37a1 100644 --- a/doc/src/sgml/logicaldecoding.sgml +++ b/doc/src/sgml/logicaldecoding.sgml @@ -370,10 +370,10 @@ postgres=# select * from pg_logical_slot_get_changes('regression_slot', NULL, NU pg_create_logical_replication_slot, or by using the failover option of - CREATE SUBSCRIPTION during slot creation, and then calling - - pg_sync_replication_slots - on the standby. By setting + CREATE SUBSCRIPTION during slot creation. + Additionally, enabling + sync_replication_slots on the standby + is required. By enabling sync_replication_slots on the standby, the failover slots can be synchronized periodically in the slotsync worker. For the synchronization to work, it is mandatory to @@ -398,6 +398,52 @@ postgres=# select * from pg_logical_slot_get_changes('regression_slot', NULL, NU receiving the WAL up to the latest flushed position on the primary server. + + + While enabling + sync_replication_slots allows for automatic + periodic synchronization of failover slots, they can also be manually + synchronized using the + pg_sync_replication_slots function on the standby. + However, this function is primarily intended for testing and debugging and + should be used with caution. Unlike automatic synchronization, it does not + include cyclic retries, making it more prone to synchronization failures, + particularly during initial sync scenarios where the required WAL files + or catalog rows for the slot may have already been removed or are at risk + of being removed on the standby. In contrast, automatic synchronization + via sync_replication_slots provides continuous slot + updates, enabling seamless failover and supporting high availability. + Therefore, it is the recommended method for synchronizing slots. + + + + + When slot synchronization is configured as recommended, + and the initial synchronization is performed either automatically or + manually via pg_sync_replication_slot, the standby can persist the + synchronized slot only if the following condition is met: The logical + replication slot on the primary must retain WALs and system catalog + rows that are still available on the standby. This ensures data + integrity and allows logical replication to continue smoothly after + promotion. + If the required WALs or catalog rows have already been purged from the + standby, the slot will not be persisted to avoid data loss. In such + cases, the following log message may appear: + + LOG: could not synchronize replication slot "failover_slot" + DETAIL: Synchronization could lead to data loss as the remote slot needs WAL at LSN 0/3003F28 and catalog xmin 754, but the standby has LSN 0/3003F28 and catalog xmin 756 + + If the logical replication slot is actively used by a consumer, no + manual intervention is needed; the slot will advance automatically, + and synchronization will resume in the next cycle. However, if no + consumer is configured, it is advisable to manually advance the slot + on the primary using + pg_logical_slot_get_changes or + + pg_logical_slot_get_binary_changes, + allowing synchronization to proceed. + + The ability to resume logical replication after failover depends upon the pg_replication_slots.synced diff --git a/src/backend/replication/logical/slotsync.c b/src/backend/replication/logical/slotsync.c index 656e66e0ae0a1..f1dcbebfa1ae7 100644 --- a/src/backend/replication/logical/slotsync.c +++ b/src/backend/replication/logical/slotsync.c @@ -211,9 +211,9 @@ update_local_synced_slot(RemoteSlot *remote_slot, Oid remote_dbid, * impact the users, so we used DEBUG1 level to log the message. */ ereport(slot->data.persistency == RS_TEMPORARY ? LOG : DEBUG1, - errmsg("could not synchronize replication slot \"%s\" because remote slot precedes local slot", + errmsg("could not synchronize replication slot \"%s\"", remote_slot->name), - errdetail("The remote slot has LSN %X/%X and catalog xmin %u, but the local slot has LSN %X/%X and catalog xmin %u.", + errdetail("Synchronization could lead to data loss as the remote slot needs WAL at LSN %X/%X and catalog xmin %u, but the standby has LSN %X/%X and catalog xmin %u.", LSN_FORMAT_ARGS(remote_slot->restart_lsn), remote_slot->catalog_xmin, LSN_FORMAT_ARGS(slot->data.restart_lsn), @@ -593,7 +593,7 @@ update_and_persist_local_synced_slot(RemoteSlot *remote_slot, Oid remote_dbid) { ereport(LOG, errmsg("could not synchronize replication slot \"%s\"", remote_slot->name), - errdetail("Logical decoding could not find consistent point from local slot's LSN %X/%X.", + errdetail("Synchronization could lead to data loss as standby could not build a consistent snapshot to decode WALs at LSN %X/%X.", LSN_FORMAT_ARGS(slot->data.restart_lsn))); return false; From dec6643487bbed8f5d771e9b9aff772e5c711d4d Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Thu, 19 Jun 2025 13:53:12 +0200 Subject: [PATCH 118/602] Improve pg_dump/pg_dumpall help synopses and terminology Increase consistency of --help and man page synopses between pg_dump and pg_dumpall. These should now be very similar, as pg_dumpall can now also produce non-text dump output. But actually, they had drifted further apart. - Use verb "export" consistently, instead of "dump" or "extract". - Use "SQL script" instead of just "script" or "text file". - Maintain consistent distinction between SQL script and other formats/archives (which is relevant for pg_restore). Reviewed-by: Robert Treat Discussion: https://www.postgresql.org/message-id/flat/3f71d8a7-095b-4829-9b0b-fce09e9866b3%40eisentraut.org --- doc/src/sgml/ref/pg_dump.sgml | 2 +- doc/src/sgml/ref/pg_dumpall.sgml | 7 +++++-- doc/src/sgml/ref/pg_restore.sgml | 4 ++-- src/bin/pg_dump/pg_dump.c | 2 +- src/bin/pg_dump/pg_dumpall.c | 2 +- 5 files changed, 10 insertions(+), 7 deletions(-) diff --git a/doc/src/sgml/ref/pg_dump.sgml b/doc/src/sgml/ref/pg_dump.sgml index 1e06bd33bdcd1..0d9270116549a 100644 --- a/doc/src/sgml/ref/pg_dump.sgml +++ b/doc/src/sgml/ref/pg_dump.sgml @@ -18,7 +18,7 @@ PostgreSQL documentation pg_dump - extract a PostgreSQL database into a script file or other archive file + export a PostgreSQL database as an SQL script or to other formats diff --git a/doc/src/sgml/ref/pg_dumpall.sgml b/doc/src/sgml/ref/pg_dumpall.sgml index 43f384ed16a9c..8ca68da5a5560 100644 --- a/doc/src/sgml/ref/pg_dumpall.sgml +++ b/doc/src/sgml/ref/pg_dumpall.sgml @@ -16,7 +16,10 @@ PostgreSQL documentation pg_dumpall - extract a PostgreSQL database cluster using a specified dump format + + + export a PostgreSQL database cluster as an SQL script or to other formats + @@ -33,7 +36,7 @@ PostgreSQL documentation pg_dumpall is a utility for writing out (dumping) all PostgreSQL databases - of a cluster into an archive. The archive contains + of a cluster into an SQL script file or an archive. The output contains SQL commands that can be used as input to to restore the databases. It does this by calling for each database in the cluster. diff --git a/doc/src/sgml/ref/pg_restore.sgml b/doc/src/sgml/ref/pg_restore.sgml index 8c88b07dcc865..b649bd3a5ae0f 100644 --- a/doc/src/sgml/ref/pg_restore.sgml +++ b/doc/src/sgml/ref/pg_restore.sgml @@ -18,8 +18,8 @@ PostgreSQL documentation pg_restore - restore a PostgreSQL database or cluster - from an archive created by pg_dump or + restore PostgreSQL databases from archives + created by pg_dump or pg_dumpall diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index a8f0309e8fc1e..db944ec223071 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -1235,7 +1235,7 @@ main(int argc, char **argv) static void help(const char *progname) { - printf(_("%s dumps a database as a text file or to other formats.\n\n"), progname); + printf(_("%s exports a PostgreSQL database as an SQL script or to other formats.\n\n"), progname); printf(_("Usage:\n")); printf(_(" %s [OPTION]... [DBNAME]\n"), progname); diff --git a/src/bin/pg_dump/pg_dumpall.c b/src/bin/pg_dump/pg_dumpall.c index b1f388cb39160..3cbcad65c5fb5 100644 --- a/src/bin/pg_dump/pg_dumpall.c +++ b/src/bin/pg_dump/pg_dumpall.c @@ -699,7 +699,7 @@ main(int argc, char *argv[]) static void help(void) { - printf(_("%s extracts a PostgreSQL database cluster based on specified dump format.\n\n"), progname); + printf(_("%s exports a PostgreSQL database cluster as an SQL script or to other formats.\n\n"), progname); printf(_("Usage:\n")); printf(_(" %s [OPTION]...\n"), progname); From d8aa21b74ff4e3d767c3344484c3cb22b9f0ec0d Mon Sep 17 00:00:00 2001 From: Bruce Momjian Date: Thu, 19 Jun 2025 11:50:50 -0400 Subject: [PATCH 119/602] doc: add xreflabel text for libpq and PL/Python to be used for PG 18 release notes --- doc/src/sgml/libpq.sgml | 2 +- doc/src/sgml/plpython.sgml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/src/sgml/libpq.sgml b/doc/src/sgml/libpq.sgml index 08bd51219262d..298c4b38ef90a 100644 --- a/doc/src/sgml/libpq.sgml +++ b/doc/src/sgml/libpq.sgml @@ -1,6 +1,6 @@ - + <application>libpq</application> — C Library diff --git a/doc/src/sgml/plpython.sgml b/doc/src/sgml/plpython.sgml index bee817ea822a2..cb065bf5f88db 100644 --- a/doc/src/sgml/plpython.sgml +++ b/doc/src/sgml/plpython.sgml @@ -1,6 +1,6 @@ - + PL/Python — Python Procedural Language PL/Python From ed117c4c6c4feb1362abbb417ac6e6525dd8789b Mon Sep 17 00:00:00 2001 From: Bruce Momjian Date: Thu, 19 Jun 2025 11:59:00 -0400 Subject: [PATCH 120/602] doc PG 18 relnotes: add links for applications --- doc/src/sgml/release-18.sgml | 64 ++++++++++++++++++++---------------- 1 file changed, 35 insertions(+), 29 deletions(-) diff --git a/doc/src/sgml/release-18.sgml b/doc/src/sgml/release-18.sgml index e89a86b1aa813..75e17f1a0c6de 100644 --- a/doc/src/sgml/release-18.sgml +++ b/doc/src/sgml/release-18.sgml @@ -60,13 +60,13 @@ Author: Peter Eisentraut -Change initdb default to enable data checksums (Greg Sabino Mullane) +Change default to enable data checksums (Greg Sabino Mullane) § Checksums can be disabled with the new initdb option . -pg_upgrade requires matching cluster checksum settings, so this new + requires matching cluster checksum settings, so this new option can be useful to upgrade non-checksum old clusters. @@ -136,7 +136,7 @@ Prevent COPY FROM from treating \. as an e -psql will still treat \. as an end-of-file marker when reading CSV files from STDIN. Older psql clients connecting to PostgreSQL 18 servers might + will still treat \. as an end-of-file marker when reading CSV files from STDIN. Older psql clients connecting to PostgreSQL 18 servers might experience \copy problems. This release also enforces that \. must appear alone on a line. @@ -1015,7 +1015,7 @@ Have query jumbling of arrays consider only the first and last array elements (D -Jumbling is used by pg_stat_statements. +Jumbling is used by . @@ -1645,7 +1645,7 @@ Require primary/foreign key relationships to use either deterministic collations -The restore of a pg_dump, also used by pg_upgrade, will fail if these requirements are not met; schema changes must be made for these upgrade methods to succeed. +The restore of a , also used by , will fail if these requirements are not met; schema changes must be made for these upgrade methods to succeed. @@ -1728,7 +1728,7 @@ This was previously erroneously prohibited. - <link linkend="sql-copy"><command>COPY</command></link> + <xref linkend="sql-copy"/> @@ -1796,7 +1796,7 @@ Previously, the COPY worked but the FREEZE - <link linkend="sql-explain"><command>EXPLAIN</command></link> + <xref linkend="sql-explain"/> @@ -2219,7 +2219,7 @@ Allow regexp_match[es]()/regexp_like() - <link linkend="libpq">libpq</link> + <xref linkend="libpq"/> @@ -2502,7 +2502,7 @@ Author: Peter Eisentraut -Change initdb to default to enabling checksums (Greg Sabino Mullane) +Change to default to enabling checksums (Greg Sabino Mullane) § § @@ -2554,7 +2554,7 @@ Author: Robert Haas -Add pg_combinebackup option / to enable hard linking (Israel Barth Rubio, Robert Haas) +Add option / to enable hard linking (Israel Barth Rubio, Robert Haas) § @@ -2582,7 +2582,7 @@ Author: Masahiko Sawada -If pg_rewind's specifies a database name, use it in output (Masahiko Sawada) +If 's specifies a database name, use it in output (Masahiko Sawada) § @@ -2617,12 +2617,12 @@ Author: Andrew Dunstan -Allow pg_dumpall to dump in the same output formats as pg_dump supports (Mahendra Singh Thalor, Andrew Dunstan) +Allow to dump in the same output formats as pg_dump supports (Mahendra Singh Thalor, Andrew Dunstan) § -Also modify pg_restore to handle such dumps. Previously pg_dumpall only supported text format. +Also modify to handle such dumps. Previously pg_dumpall only supported text format. @@ -2633,7 +2633,7 @@ Author: Jeff Davis -Add pg_dump options , , and (Jeff Davis) +Add options , , and (Jeff Davis) § @@ -2647,7 +2647,7 @@ Author: Nathan Bossart -Add pg_dump and pg_dumpall option to dump sequence data that would normally be excluded (Nathan Bossart) +Add pg_dump and option to dump sequence data that would normally be excluded (Nathan Bossart) § § @@ -2660,7 +2660,8 @@ Author: Jeff Davis -Add pg_dump, pg_dumpall, and pg_restore options , , , and (Corey Huinker, Jeff Davis) +Add , , and + options , , , and (Corey Huinker, Jeff Davis) § @@ -2672,7 +2673,9 @@ Author: Tom Lane -Add option to disable row level security policy processing in pg_dump, pg_dumpall, pg_restore (Nikolay Samokhvalov) +Add option to disable row level security policy processing in +, , + (Nikolay Samokhvalov) § @@ -2686,7 +2689,7 @@ This is useful for migrating to systems with different policies. - <link linkend="pgupgrade"><application>pg_upgrade</application></link> + <xref linkend="pgupgrade"/> @@ -2812,7 +2815,7 @@ Author: Amit Kapila -Add pg_createsubscriber option to create logical replicas for all databases (Shubham Khanna) +Add option to create logical replicas for all databases (Shubham Khanna) § @@ -2848,7 +2851,7 @@ Author: Masahiko Sawada -Add pg_recvlogical option to specify failover slots (Hayato Kuroda) +Add option to specify failover slots (Hayato Kuroda) § @@ -3087,7 +3090,7 @@ Author: Peter Eisentraut -Add support for Python "Limited API" in PL/Python (Peter Eisentraut) +Add support for Python "Limited API" in (Peter Eisentraut) § § @@ -3239,7 +3242,7 @@ Author: Masahiko Sawada -Add extension pg_logicalinspect to inspect logical snapshots (Bertrand Drouvot) +Add extension to inspect logical snapshots (Bertrand Drouvot) § @@ -3289,7 +3292,8 @@ Author: Peter Eisentraut -Allow SCRAM authentication from the client to be passed to postgres_fdw servers (Matheus Alcantara, Peter Eisentraut) +Allow SCRAM authentication from the client to be passed to + servers (Matheus Alcantara, Peter Eisentraut) § @@ -3307,7 +3311,8 @@ Author: Peter Eisentraut -Allow SCRAM authentication from the client to be passed to dblink servers (Matheus Alcantara) +Allow SCRAM authentication from the client to be passed to + servers (Matheus Alcantara) § @@ -3319,7 +3324,7 @@ Author: Fujii Masao -Add on_error and log_verbosity options to file_fdw (Atsushi Torikoshi) +Add on_error and log_verbosity options to (Atsushi Torikoshi) § @@ -3351,7 +3356,8 @@ Author: Nathan Bossart -Add configurable variable min_password_length to passwordcheck (Emanuele Musella, Maurizio Boriani) +Add configurable variable min_password_length to + (Emanuele Musella, Maurizio Boriani) § @@ -3379,7 +3385,7 @@ Author: Tom Lane -Add isn server variable weak to control invalid check digit acceptance (Viktor Holmberg) +Add server variable weak to control invalid check digit acceptance (Viktor Holmberg) § @@ -3464,7 +3470,7 @@ Allow extensions to use the server's cumulative statistics API - <link linkend="pgstatstatements"><application>pg_stat_statements</application></link> + <xref linkend="pgstatstatements"/> @@ -3533,7 +3539,7 @@ Add pg_stat_statements.wal_buffers_full - <link linkend="pgcrypto"><application>pgcrypto</application></link> + <xref linkend="pgcrypto"/> From 6c29088fc6e269b7d64797bb62533b82afe03d93 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Thu, 19 Jun 2025 12:43:27 -0700 Subject: [PATCH 121/602] Correct docs about partitions and EXCLUDE constraints. In version 17 we added support for cross-partition EXCLUDE constraints, as long as they included all partition key columns and compared them with equality (see 8c852ba9a4). I updated the docs for exclusion constraints, but I missed that the docs for CREATE TABLE still said that they were not supported. This commit fixes that. Author: Paul A. Jungwirth Co-authored-by: Jeff Davis Discussion: https://postgr.es/m/c955d292-b92d-42d1-a2a0-1ec6715a2546@illuminatedcomputing.com Backpatch-through: 17 --- doc/src/sgml/ref/create_table.sgml | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/doc/src/sgml/ref/create_table.sgml b/doc/src/sgml/ref/create_table.sgml index 4a41b2f553007..a581691818278 100644 --- a/doc/src/sgml/ref/create_table.sgml +++ b/doc/src/sgml/ref/create_table.sgml @@ -447,11 +447,6 @@ WITH ( MODULUS numeric_literal, REM the values in the new row, an error will be reported. - - Partitioned tables do not support EXCLUDE constraints; - however, you can define these constraints on individual partitions. - - See for more discussion on table partitioning. @@ -1162,6 +1157,18 @@ WITH ( MODULUS numeric_literal, REM exclusion constraint on a subset of the table; internally this creates a partial index. Note that parentheses are required around the predicate. + + + When establishing an exclusion constraint for a multi-level partition + hierarchy, all the columns in the partition key of the target + partitioned table, as well as those of all its descendant partitioned + tables, must be included in the constraint definition. Additionally, + those columns must be compared using the equality operator. These + restrictions ensure that potentially-conflicting rows will exist in the + same partition. The constraint may also refer to other columns which + are not a part of any partition key, which can be compared using any + appropriate operator. + From a8360f074cc03a7cb73a4aaa6d8caab0e0bf0a0f Mon Sep 17 00:00:00 2001 From: Bruce Momjian Date: Thu, 19 Jun 2025 17:13:58 -0400 Subject: [PATCH 122/602] doc PG 18 relnotes: add links to command and struct tags --- doc/src/sgml/release-18.sgml | 78 ++++++++++++++++++------------------ 1 file changed, 39 insertions(+), 39 deletions(-) diff --git a/doc/src/sgml/release-18.sgml b/doc/src/sgml/release-18.sgml index 75e17f1a0c6de..11a4f99a27236 100644 --- a/doc/src/sgml/release-18.sgml +++ b/doc/src/sgml/release-18.sgml @@ -100,7 +100,7 @@ Deprecate MD5 password authentication (Nathan Bossart) -Support for MD5 passwords will be removed in a future major version release. CREATE ROLE and ALTER ROLE now emit deprecation warnings when setting MD5 passwords. +Support for MD5 passwords will be removed in a future major version release. and now emit deprecation warnings when setting MD5 passwords. These warnings can be disabled by setting the parameter to off. @@ -112,7 +112,7 @@ Author: David Rowley -Change VACUUM and ANALYZE to process the inheritance children of a parent (Michael Harris) +Change and to process the inheritance children of a parent (Michael Harris) § @@ -130,7 +130,7 @@ Author: Tom Lane -Prevent COPY FROM from treating \. as an end-of-file marker when reading CSV files (Daniel Vérité, Tom Lane) +Prevent COPY FROM from treating \. as an end-of-file marker when reading CSV files (Daniel Vérité, Tom Lane) § § @@ -154,7 +154,7 @@ Disallow unlogged partitioned tables (Michael Paquier) -Previously ALTER TABLE SET [UN]LOGGED did nothing, and the creation of an unlogged partitioned table did not cause its children to be unlogged. +Previously ALTER TABLE SET [UN]LOGGED did nothing, and the creation of an unlogged partitioned table did not cause its children to be unlogged. @@ -170,7 +170,7 @@ Execute AFTER triggers as the role that was active when trigg -Previously such triggers were run as the role that was active at trigger execution time (e.g., at COMMIT). This is significant for cases where the role is changed between queue time and +Previously such triggers were run as the role that was active at trigger execution time (e.g., at ). This is significant for cases where the role is changed between queue time and transaction commit. @@ -182,7 +182,7 @@ Author: Fujii Masao -Remove non-functional support for rule privileges in GRANT/REVOKE (Fujii Masao) +Remove non-functional support for rule privileges in / (Fujii Masao) § @@ -198,7 +198,7 @@ Author: David Rowley -Remove column pg_backend_memory_contexts.parent (Melih Mutlu) +Remove column pg_backend_memory_contexts.parent (Melih Mutlu) § @@ -319,7 +319,7 @@ Author: Richard Guo -Allow the keys of SELECT DISTINCT to be internally reordered to avoid sorting (Richard Guo) +Allow the keys of SELECT DISTINCT to be internally reordered to avoid sorting (Richard Guo) § @@ -607,7 +607,7 @@ Add an asynchronous I/O subsystem (Andres Freund, Thomas Munro, Nazir Bilal Yavu This feature allows backends to queue multiple read requests, which allows for more efficient sequential scans, bitmap heap scans, vacuums, etc. This is enabled by server variable , with server variables and added to control it. This also enables - and values greater than zero for systems without fadvise() support. The new system view pg_aios shows the file handles being used + and values greater than zero for systems without fadvise() support. The new system view pg_aios shows the file handles being used for asynchronous I/O. @@ -679,7 +679,7 @@ Author: Nathan Bossart -Add server variable to control file truncation during VACUUM (Nathan Bossart, Gurjeet Singh) +Add server variable to control file truncation during (Nathan Bossart, Gurjeet Singh) § @@ -768,7 +768,7 @@ Add server variable to log lock acquisit -Specifically it reports SELECT ... NOWAIT lock failures. +Specifically it reports SELECT ... NOWAIT lock failures. @@ -779,7 +779,7 @@ Author: Michael Paquier -Modify pg_stat_all_tables and its variants to report the time spent in VACUUM, ANALYZE, and their automatic variants (Sami Imseih) +Modify pg_stat_all_tables and its variants to report the time spent in VACUUM, ANALYZE, and their automatic variants (Sami Imseih) § @@ -797,13 +797,13 @@ Author: Nathan Bossart -Add delay time reporting to VACUUM and ANALYZE (Bertrand Drouvot, Nathan Bossart) +Add delay time reporting to and (Bertrand Drouvot, Nathan Bossart) § § -This information appears in the autovacuum logs, the system views pg_stat_progress_vacuum and pg_stat_progress_analyze, and the output of VACUUM and ANALYZE when in VERBOSE +This information appears in the autovacuum logs, the system views pg_stat_progress_vacuum and pg_stat_progress_analyze, and the output of VACUUM and ANALYZE when in VERBOSE mode; tracking must be enabled with the server variable . @@ -861,7 +861,7 @@ Author: Michael Paquier -Add pg_stat_io columns to report I/O activity in bytes (Nazir Bilal Yavuz) +Add pg_stat_io columns to report I/O activity in bytes (Nazir Bilal Yavuz) § @@ -900,7 +900,7 @@ Author: Michael Paquier -Change server variable to control tracking WAL timing in pg_stat_io instead of pg_stat_wal (Bertrand Drouvot) +Change server variable to control tracking WAL timing in pg_stat_io instead of pg_stat_wal (Bertrand Drouvot) § @@ -959,7 +959,7 @@ Author: Fujii Masao -Add column pg_stat_checkpointer.num_done to report the number of completed checkpoints (Anton A. Melnikov) +Add column pg_stat_checkpointer.num_done to report the number of completed checkpoints (Anton A. Melnikov) § @@ -991,7 +991,7 @@ Author: Michael Paquier -Add columns to pg_stat_database to report parallel workers activity (Benoit Lobréau) +Add columns to pg_stat_database to report parallel workers activity (Benoit Lobréau) § @@ -1042,7 +1042,7 @@ Author: David Rowley -Add column pg_backend_memory_contexts.type to report the type of memory context (David Rowley) +Add column pg_backend_memory_contexts.type to report the type of memory context (David Rowley) § @@ -1102,7 +1102,7 @@ Author: Fujii Masao -Allow ALTER DEFAULT PRIVILEGES to define large object default privileges (Takatsuka Haruka, Yugo Nagata, Laurenz Albe) +Allow to define large object default privileges (Takatsuka Haruka, Yugo Nagata, Laurenz Albe) § @@ -1378,7 +1378,7 @@ Author: Amit Kapila -Change the default CREATE SUBSCRIPTION streaming option from off to parallel (Vignesh C) +Change the default streaming option from off to parallel (Vignesh C) § @@ -1392,7 +1392,7 @@ Author: Amit Kapila -Allow ALTER SUBSCRIPTION to change the replication slot's two-phase commit behavior (Hayato Kuroda, Ajin Cherian, Amit Kapila, Zhijie Hou) +Allow ALTER SUBSCRIPTION to change the replication slot's two-phase commit behavior (Hayato Kuroda, Ajin Cherian, Amit Kapila, Zhijie Hou) § § @@ -1422,7 +1422,7 @@ Log conflicts while applying logical replication changes (Zhijie Hou, Nisha Moon -Also report in new columns of pg_stat_subscription_stats. +Also report in new columns of pg_stat_subscription_stats. @@ -1471,7 +1471,7 @@ Add OLD/NEW support to RETURNING< -Previously RETURNING only returned new values for INSERT and UPDATE, and old values for DELETE; MERGE would return the appropriate value for the internal query executed. This new syntax +Previously RETURNING only returned new values for and , and old values for ; would return the appropriate value for the internal query executed. This new syntax allows the RETURNING list of INSERT/UPDATE/DELETE/MERGE to explicitly return old and new values by using the special aliases old and new. These aliases can be renamed to avoid identifier conflicts. @@ -1489,7 +1489,7 @@ Allow foreign tables to be created like existing local tables (Zhang Mingli) -The syntax is CREATE FOREIGN TABLE ... LIKE. +The syntax is CREATE FOREIGN TABLE ... LIKE. @@ -1544,7 +1544,7 @@ Author: David Rowley -Allow VACUUM and ANALYZE to process partitioned tables without processing their children (Michael Harris) +Allow and to process partitioned tables without processing their children (Michael Harris) § @@ -1588,7 +1588,7 @@ Add server variable to control the file c -This controls whether CREATE DATABASE ... STRATEGY=FILE_COPY and ALTER DATABASE ... SET TABLESPACE uses file copy or clone. +This controls whether CREATE DATABASE ... STRATEGY=FILE_COPY and ALTER DATABASE ... SET TABLESPACE uses file copy or clone. @@ -1630,7 +1630,7 @@ Allow CHECK and foreign key constraints to be specified as -This also adds column pg_constraint.conenforced. +This also adds column pg_constraint.conenforced. - - - -Add function pg_check_fipsmode() to report the server's FIPS mode (Daniel Gustafsson) -§ - - - -Add optional parameter to json{b}_strip_nulls to allow removal of null array elements (Florents Tselai) +Add optional parameter to json{b}_strip_nulls to allow removal of null array elements (Florents Tselai) § @@ -1981,7 +1969,7 @@ Author: Tom Lane -Add function array_sort() which sorts an array's first dimension (Junwang Zhao, Jian He) +Add function array_sort() which sorts an array's first dimension (Junwang Zhao, Jian He) § @@ -1993,7 +1981,7 @@ Author: Michael Paquier -Add function array_reverse() which reverses an array's first dimension (Aleksander Alekseev) +Add function array_reverse() which reverses an array's first dimension (Aleksander Alekseev) § @@ -2005,7 +1993,7 @@ Author: Nathan Bossart -Add function reverse() to reverse bytea bytes (Aleksander Alekseev) +Add function reverse() to reverse bytea bytes (Aleksander Alekseev) § @@ -2017,7 +2005,7 @@ Author: Dean Rasheed -Allow casting between integer types and bytea (Aleksander Alekseev) +Allow casting between integer types and bytea (Aleksander Alekseev) § @@ -2033,7 +2021,7 @@ Author: Peter Eisentraut -Update Unicode data to Unicode 16.0.0 (Peter Eisentraut) +Update Unicode data to Unicode 16.0.0 (Peter Eisentraut) § @@ -2045,7 +2033,7 @@ Author: Tom Lane -Add full text search stemming for Estonian (Tom Lane) +Add full text search stemming for Estonian (Tom Lane) § @@ -2057,12 +2045,12 @@ Author: Tom Lane -Improve the XML error codes to more closely match the SQL standard (Tom Lane) +Improve the XML error codes to more closely match the SQL standard (Tom Lane) § -These errors are reported via SQLSTATE. +These errors are reported via SQLSTATE. @@ -2082,7 +2070,7 @@ Author: Jeff Davis -Add function CASEFOLD() to allow for more sophisticated case-insensitive matching (Jeff Davis) +Add function casefold() to allow for more sophisticated case-insensitive matching (Jeff Davis) § @@ -2100,7 +2088,7 @@ Author: Tom Lane -Allow MIN()/MAX() aggregates on arrays and composite types (Aleksander Alekseev, Marat Buharov) +Allow MIN()/MAX() aggregates on arrays and composite types (Aleksander Alekseev, Marat Buharov) § § @@ -2113,7 +2101,7 @@ Author: Tom Lane -Add a WEEK option to EXTRACT() (Tom Lane) +Add a WEEK option to EXTRACT() (Tom Lane) § @@ -2137,7 +2125,7 @@ Author: Tom Lane -Add roman numeral support to to_number() (Hunaid Sohail) +Add roman numeral support to to_number() (Hunaid Sohail) § @@ -2153,12 +2141,12 @@ Author: Masahiko Sawada -Add UUID version 7 generation function uuidv7() (Andrey Borodin) +Add UUID version 7 generation function uuidv7() (Andrey Borodin) § -This UUID value is temporally sortable. Function alias uuidv4() has been added to explicitly generate version 4 UUIDs. +This UUID value is temporally sortable. Function alias uuidv4() has been added to explicitly generate version 4 UUIDs. @@ -2169,7 +2157,7 @@ Author: Nathan Bossart -Add functions crc32() and crc32c() to compute CRC values (Aleksander Alekseev) +Add functions crc32() and crc32c() to compute CRC values (Aleksander Alekseev) § @@ -2181,7 +2169,7 @@ Author: Dean Rasheed -Add math functions gamma() and lgamma() (Dean Rasheed) +Add math functions gamma() and lgamma() (Dean Rasheed) § @@ -2193,7 +2181,7 @@ Author: Tom Lane -Allow => syntax for named cursor arguments in plpgsql (Pavel Stehule) +Allow => syntax for named cursor arguments in PL/pgSQL (Pavel Stehule) § @@ -2209,7 +2197,7 @@ Author: Tom Lane -Allow regexp_match[es]()/regexp_like()/regexp_replace()/regexp_count()/regexp_instr()/regexp_substr()/regexp_split_to_table()/regexp_split_to_array() to use named arguments (Jian He) +Allow regexp_match[es]()/regexp_like()/regexp_replace()/regexp_count()/regexp_instr()/regexp_substr()/regexp_split_to_table()/regexp_split_to_array() to use named arguments (Jian He) § @@ -2230,7 +2218,7 @@ Author: Robert Haas -Add function PQfullProtocolVersion() to report the full, including minor, protocol version number (Jacob Champion, Jelte Fennema-Nio) +Add function PQfullProtocolVersion() to report the full, including minor, protocol version number (Jacob Champion, Jelte Fennema-Nio) § @@ -2244,7 +2232,7 @@ Author: Heikki Linnakangas -Add libpq connection parameters and environment variables to specify the minimum and maximum acceptable protocol version for connections (Jelte Fennema-Nio) +Add libpq connection parameters and environment variables to specify the minimum and maximum acceptable protocol version for connections (Jelte Fennema-Nio) § § @@ -2257,7 +2245,7 @@ Author: Michael Paquier -Add libpq function PQservice() to return the connection service name (Michael Banck) +Add libpq function PQservice() to return the connection service name (Michael Banck) § @@ -2292,7 +2280,7 @@ Author: Álvaro Herrera -Add PQtrace() output for all message types, including authentication (Jelte Fennema-Nio) +Add PQtrace() output for all message types, including authentication (Jelte Fennema-Nio) § § § @@ -2308,7 +2296,7 @@ Author: Daniel Gustafsson -Add libpq connection parameter sslkeylogfile which dumps out SSL key material (Abhishek Chanda, Daniel Gustafsson) +Add libpq connection parameter sslkeylogfile which dumps out SSL key material (Abhishek Chanda, Daniel Gustafsson) § @@ -2354,7 +2342,7 @@ Allow psql to parse, bind, and close named prepared s -This is accomplished with new commands \parse, \bind_named, and \close. +This is accomplished with new commands \parse, \bind_named, and \close. @@ -2376,7 +2364,7 @@ Add psql backslash commands to allowing issuance of p -The new commands are \startpipeline, \syncpipeline, \sendpipeline, \endpipeline, \flushrequest, \flush, and \getresults. +The new commands are \startpipeline, \syncpipeline, \sendpipeline, \endpipeline, \flushrequest, \flush, and \getresults. @@ -2392,7 +2380,7 @@ Allow adding pipeline status to the psql prompt and a -The new prompt character is %P and the new psql variables are PIPELINE_SYNC_COUNT, PIPELINE_COMMAND_COUNT, and PIPELINE_RESULT_COUNT. +The new prompt character is %P and the new psql variables are PIPELINE_SYNC_COUNT, PIPELINE_COMMAND_COUNT, and PIPELINE_RESULT_COUNT. @@ -2431,7 +2419,7 @@ Author: Álvaro Herrera -Change psql's \conninfo to use tabular format and include more information (Álvaro Herrera, Maiquel Grassi, Hunaid Sohail) +Change psql's to use tabular format and include more information (Álvaro Herrera, Maiquel Grassi, Hunaid Sohail) § @@ -2443,7 +2431,7 @@ Author: Dean Rasheed -Add function's leakproof indicator to psql's \df+, \do+, \dAo+, and \dC+ outputs (Yugo Nagata) +Add function's leakproof indicator to psql's \df+, \do+, \dAo+, and \dC+ outputs (Yugo Nagata) § @@ -2455,7 +2443,7 @@ Author: Michael Paquier -Add access method details for partitioned relations in \dP+ (Justin Pryzby) +Add access method details for partitioned relations in \dP+ (Justin Pryzby) § @@ -2467,7 +2455,7 @@ Author: Magnus Hagander -Add default_version to the psql \dx extension output (Magnus Hagander) +Add default_version to the psql \dx extension output (Magnus Hagander) § @@ -2479,7 +2467,7 @@ Author: Daniel Gustafsson -Add psql variable WATCH_INTERVAL to set the default \watch wait time (Daniel Gustafsson) +Add psql variable to set the default \watch wait time (Daniel Gustafsson) § @@ -2537,7 +2525,7 @@ Author: Nathan Bossart -Add vacuumdb option to compute only missing optimizer statistics (Corey Huinker, Nathan Bossart) +Add option to compute only missing optimizer statistics (Corey Huinker, Nathan Bossart) § § @@ -2570,7 +2558,7 @@ Author: Robert Haas -Allow pg_verifybackup to verify tar-format backups (Amul Sul) +Allow to verify tar-format backups (Amul Sul) § @@ -2594,7 +2582,7 @@ Author: Masahiko Sawada -Add pg_resetwal option to change the default char signedness (Masahiko Sawada) +Add option to change the default char signedness (Masahiko Sawada) § @@ -2888,13 +2876,13 @@ Author: Michael Paquier -Separate the loading and running of injection points (Michael Paquier, Heikki Linnakangas) +Separate the loading and running of injection points (Michael Paquier, Heikki Linnakangas) § § -Injection points can now be created, but not run, via INJECTION_POINT_LOAD(), and such injection points can be run via INJECTION_POINT_CACHED(). +Injection points can now be created, but not run, via INJECTION_POINT_LOAD(), and such injection points can be run via INJECTION_POINT_CACHED(). @@ -2917,7 +2905,7 @@ Author: Heikki Linnakangas -Allow inline injection point test code with IS_INJECTION_POINT_ATTACHED() (Heikki Linnakangas) +Allow inline injection point test code with IS_INJECTION_POINT_ATTACHED() (Heikki Linnakangas) § @@ -2929,7 +2917,7 @@ Author: David Rowley -Improve the performance of processing long JSON strings using SIMD instructions (David Rowley) +Improve the performance of processing long JSON strings using SIMD (Single Instruction Multiple Data) (David Rowley) § @@ -2993,14 +2981,14 @@ Author: Tomas Vondra -Add configure option to enable NUMA awareness (Jakub Wartak, Bertrand Drouvot) +Add configure option to enable NUMA awareness (Jakub Wartak, Bertrand Drouvot) § § § -The function pg_numa_available() reports on NUMA awareness, and system views pg_shmem_allocations_numa and pg_buffercache_numa which report on shared memory distribution across +The function pg_numa_available() reports on NUMA awareness, and system views pg_shmem_allocations_numa and pg_buffercache_numa which report on shared memory distribution across NUMA nodes. @@ -3012,7 +3000,7 @@ Author: Nathan Bossart -Add TOAST table to pg_index to allow for very large expression indexes (Nathan Bossart) +Add TOAST table to pg_index to allow for very large expression indexes (Nathan Bossart) § @@ -3051,7 +3039,7 @@ Author: Peter Eisentraut -Add amgettreeheight, amconsistentequality, and amconsistentordering to the index access method API (Mark Dilger) +Add amgettreeheight, amconsistentequality, and amconsistentordering to the index access method API (Mark Dilger) § § @@ -3064,7 +3052,7 @@ Author: Peter Eisentraut -Add GiST support function stratnum() (Paul A. Jungwirth) +Add GiST support function stratnum() (Paul A. Jungwirth) § @@ -3076,7 +3064,7 @@ Author: Masahiko Sawada -Record the default CPU signedness of char in pg_controldata (Masahiko Sawada) +Record the default CPU signedness of char in (Masahiko Sawada) § @@ -3146,12 +3134,12 @@ Author: Tom Lane -Add macro PG_MODULE_MAGIC_EXT to allow extensions to report their name and version (Andrei Lepikhov) +Add macro PG_MODULE_MAGIC_EXT to allow extensions to report their name and version (Andrei Lepikhov) § -This information can be access via the new function pg_get_loaded_modules(). +This information can be access via the new function pg_get_loaded_modules(). @@ -3162,7 +3150,7 @@ Author: Tom Lane -Document that SPI_connect()/SPI_connect_ext() always returns success (SPI_OK_CONNECT) (Stepan Neretin) +Document that SPI_connect()/SPI_connect_ext() always returns success (SPI_OK_CONNECT) (Stepan Neretin) § @@ -3173,25 +3161,25 @@ Errors are always reported via ereport(). -Remove the experimental designation of Meson builds on Windows (Aleksander Alekseev) -§ +Add documentation section about API and ABI compatibility (David Wheeler, Peter Eisentraut) +§ -Add documentation section about API and ABI compatibility (David Wheeler, Peter Eisentraut) -§ +Remove the experimental designation of Meson builds on Windows (Aleksander Alekseev) +§ @@ -3254,7 +3242,7 @@ Author: Robert Haas -Add extension pg_overexplain which adds debug details to EXPLAIN output (Robert Haas) +Add extension which adds debug details to EXPLAIN output (Robert Haas) § @@ -3272,7 +3260,7 @@ Author: Fujii Masao -Add output columns to postgres_fdw_get_connections() (Hayato Kuroda, Sagar Dilip Shedge) +Add output columns to postgres_fdw_get_connections() (Hayato Kuroda, Sagar Dilip Shedge) § § § @@ -3292,15 +3280,15 @@ Author: Peter Eisentraut -Allow SCRAM authentication from the client to be passed to +Allow SCRAM authentication from the client to be passed to servers (Matheus Alcantara, Peter Eisentraut) § This avoids storing postgres_fdw authentication information in the database, and is enabled with the -postgres_fdw use_scram_passthrough connection option. libpq uses new connection -parameters scram_client_key and scram_server_key. +postgres_fdw use_scram_passthrough connection option. libpq uses new connection +parameters and . @@ -3373,7 +3361,7 @@ Author: Tatsuo Ishii -Have pgbench report the number of failed, retried, or skipped transactions in per-script reports (Yugo Nagata) +Have report the number of failed, retried, or skipped transactions in per-script reports (Yugo Nagata) § @@ -3390,7 +3378,7 @@ Add server variable weak to control inv -This was previously only controlled by function isn_weak(). +This was previously only controlled by function isn_weak(). @@ -3401,7 +3389,7 @@ Author: Heikki Linnakangas -Allow values to be sorted to speed btree_gist index builds (Bernd Helmle, Andrey Borodin) +Allow values to be sorted to speed index builds (Bernd Helmle, Andrey Borodin) § @@ -3413,7 +3401,7 @@ Author: Tomas Vondra -Add amcheck function gin_index_check() to verify GIN indexes (Grigory Kryachko, Heikki Linnakangas, Andrey Borodin) +Add check function gin_index_check() to verify GIN indexes (Grigory Kryachko, Heikki Linnakangas, Andrey Borodin) § @@ -3425,12 +3413,12 @@ Author: Andres Freund -Add functions pg_buffercache_evict_relation() and pg_buffercache_evict_all() to evict unpinned shared buffers (Nazir Bilal Yavuz) +Add functions pg_buffercache_evict_relation() and pg_buffercache_evict_all() to evict unpinned shared buffers (Nazir Bilal Yavuz) § -The existing function pg_buffercache_evict() now returns the buffer flush status. +The existing function pg_buffercache_evict() now returns the buffer flush status. @@ -3445,7 +3433,7 @@ Author: Robert Haas -Allow extensions to install custom EXPLAIN options (Robert Haas, Sami Imseih) +Allow extensions to install custom options (Robert Haas, Sami Imseih) § § § @@ -3550,7 +3538,7 @@ Author: Álvaro Herrera -Add pgcrypto functions sha256crypt() and sha512crypt() (Bernd Helmle) +Add pgcrypto algorithms sha256crypt and sha512crypt (Bernd Helmle) § @@ -3562,11 +3550,23 @@ Author: Daniel Gustafsson -Add CFB mode to pgcrypto encryption and decryption (Umar Hayat) +Add CFB mode to pgcrypto encryption and decryption (Umar Hayat) § + + + + +Add function fips_mode() to report the server's FIPS mode (Daniel Gustafsson) +§ + + + - - -Change default to enable data checksums (Greg Sabino Mullane) -§ - + + + Change default to enable data checksums + (Greg Sabino Mullane) + § + - -Checksums can be disabled with the new initdb option . - requires matching cluster checksum settings, so this new -option can be useful to upgrade non-checksum old clusters. - - + + Checksums can be disabled with the + new initdb option + . + requires matching cluster checksum settings, so this new option can + be useful to upgrade non-checksum old clusters. + + - - -Change time zone abbreviation handling (Tom Lane) -§ - + + + Change time zone abbreviation handling (Tom Lane) + § + - -The system will now favor the current session's time zone abbreviations before checking the server variable . Previously timezone_abbreviations was -checked first. - - + + The system will now favor the current session's time + zone abbreviations before checking the server variable + . Previously + timezone_abbreviations was checked first. + + - - -Deprecate MD5 password authentication (Nathan Bossart) -§ - + + + Deprecate MD5 password + authentication (Nathan Bossart) + § + - -Support for MD5 passwords will be removed in a future major version release. and now emit deprecation warnings when setting MD5 passwords. -These warnings can be disabled by setting the parameter to off. - - + + Support for MD5 passwords will be removed in a future major + version release. and now emit deprecation warnings when + setting MD5 passwords. These warnings can be disabled by setting + the parameter to + off. + + - - -Change and to process the inheritance children of a parent (Michael Harris) -§ - + + + Change and + to process the inheritance children of a parent (Michael Harris) + § + - -The previous behavior can be performed by using the new ONLY option. - - + + The previous behavior can be performed by using the new + ONLY option. + + - - -Prevent COPY FROM from treating \. as an end-of-file marker when reading CSV files (Daniel Vérité, Tom Lane) -§ -§ - + + + Prevent COPY FROM + from treating \. as an end-of-file marker when + reading CSV files (Daniel Vérité, Tom Lane) + § + § + - - will still treat \. as an end-of-file marker when reading CSV files from STDIN. Older psql clients connecting to PostgreSQL 18 servers might -experience \copy problems. This -release also enforces that \. must appear alone on a line. - - + + will still treat + \. as an end-of-file marker when reading + CSV files from STDIN. + Older psql clients connecting to + PostgreSQL 18 servers might experience \copy + problems. This release also enforces that \. + must appear alone on a line. + + - - -Disallow unlogged partitioned tables (Michael Paquier) -§ - + + + Disallow unlogged partitioned tables (Michael Paquier) + § + - -Previously ALTER TABLE SET [UN]LOGGED did nothing, and the creation of an unlogged partitioned table did not cause its children to be unlogged. - - + + Previously ALTER TABLE SET + [UN]LOGGED did nothing, and the creation of an + unlogged partitioned table did not cause its children to be unlogged. + + - - -Execute AFTER triggers as the role that was active when trigger events were queued (Laurenz Albe) -§ - + + + Execute AFTER triggers as the role that was active when + trigger events were queued (Laurenz Albe) + § + - -Previously such triggers were run as the role that was active at trigger execution time (e.g., at ). This is significant for cases where the role is changed between queue time and -transaction commit. - - + + Previously such triggers were run as the role that was active at + trigger execution time (e.g., at ). + This is significant for cases where the role is changed between queue + time and transaction commit. + + - - -Remove non-functional support for rule privileges in / (Fujii Masao) -§ - + + + Remove non-functional support for rule privileges in / (Fujii Masao) + § + - -These have been non-functional since PostgreSQL 8.2. - - + + These have been non-functional since + PostgreSQL 8.2. + + - - -Remove column pg_backend_memory_contexts.parent (Melih Mutlu) -§ - + + + Remove column pg_backend_memory_contexts.parent + (Melih Mutlu) + § + - -This is no longer needed since pg_backend_memory_contexts.path was added. - - + + This is no longer needed since + pg_backend_memory_contexts.path + was added. + + - - -Change pg_backend_memory_contexts.level and pg_log_backend_memory_contexts() to be one-based (Melih Mutlu, Atsushi Torikoshi, David Rowley, Fujii Masao) -§ -§ -§ - + + + Change + pg_backend_memory_contexts.level + and pg_log_backend_memory_contexts() + to be one-based (Melih Mutlu, Atsushi Torikoshi, David Rowley, + Fujii Masao) + § + § + § + - -These were previously zero-based. - - + + These were previously zero-based. + + @@ -256,40 +292,48 @@ Author: Alexander Korotkov 2025-02-17 [fc069a3a6] Implement Self-Join Elimination --> - - -Automatically remove some unnecessary table self-joins (Andrey Lepikhov, Alexander Kuzmenkov, Alexander Korotkov, Alena Rybakina) -§ - + + + Automatically remove some unnecessary table self-joins (Andrey + Lepikhov, Alexander Kuzmenkov, Alexander Korotkov, Alena Rybakina) + § + - -This optimization can be disabled using server variable . - - + + This optimization can be disabled using server variable . + + - - -Convert some IN (VALUES ...) to x = ANY ... for better optimizer statistics (Alena Rybakina, Andrei Lepikhov) -§ - - + + + Convert some IN (VALUES + ...) to x = ANY ... for better + optimizer statistics (Alena Rybakina, Andrei Lepikhov) + § + + - - -Allow transforming OR-clauses to arrays for faster index processing (Alexander Korotkov, Andrey Lepikhov) -§ - - + + + Allow transforming OR-clauses + to arrays for faster index processing (Alexander Korotkov, Andrey + Lepikhov) + § + + - - -Speed up the processing of INTERSECT, EXCEPT, window aggregates, and view column aliases (Tom Lane, David Rowley) -§ -§ -§ -§ - - + + + Speed up the processing of INTERSECT, + EXCEPT, window aggregates, and view column aliases (Tom Lane, + David Rowley) + § + § + § + § + + - - -Allow the keys of SELECT DISTINCT to be internally reordered to avoid sorting (Richard Guo) -§ - + + + Allow the keys of SELECT + DISTINCT to be internally reordered to avoid sorting + (Richard Guo) + § + - -This optimization can be disabled using . - - + + This optimization can be disabled using . + + - - -Ignore GROUP BY columns that are functionally dependent on other columns (Zhang Mingli, Jian He, David Rowley) -§ - + + + Ignore GROUP BY + columns that are functionally dependent on other columns (Zhang + Mingli, Jian He, David Rowley) + § + - -If a GROUP BY clause includes all columns of a unique index, as well as other columns of the same table, those other columns are redundant and can be dropped -from the grouping. This was already true for non-deferred primary keys. - - + + If a GROUP BY clause includes all columns of + a unique index, as well as other columns of the same table, those + other columns are redundant and can be dropped from the grouping. + This was already true for non-deferred primary keys. + + - - -Allow some HAVING clauses on GROUPING SETS to be pushed to WHERE clauses (Richard Guo) -§ -§ -§ -§ - - - -This allows earlier row filtering. This release also fixes some GROUPING SETS queries that used to return incorrect results. - - + + + Allow some HAVING clauses + on GROUPING + SETS to be pushed to WHERE clauses + (Richard Guo) + § + § + § + § + + + + This allows earlier row filtering. This release also fixes some + GROUPING SETS queries that used to return + incorrect results. + + - - -Improve row estimates for generate_series() using numeric and timestamp values (David Rowley, Song Jinzhou) -§ -§ - - + + + Improve row estimates for generate_series() + using numeric + and timestamp + values (David Rowley, Song Jinzhou) + § + § + + - - -Allow the optimizer to use Right Semi Join plans (Richard Guo) -§ - + + + Allow the optimizer to use Right Semi Join plans + (Richard Guo) + § + - -Semi-joins are used when needing to find if there is at least one match. - - + + Semi-joins are used when needing to find if there is at least + one match. + + - - -Allow merge joins to use incremental sorts (Richard Guo) -§ - - + + + Allow merge joins to use incremental sorts + (Richard Guo) + § + + - - -Improve the efficiency of planning queries accessing many partitions (Ashutosh Bapat, Yuya Watari, David Rowley) -§ -§ - - + + + Improve the efficiency of planning queries accessing many partitions + (Ashutosh Bapat, Yuya Watari, David Rowley) + § + § + + - - -Allow partitionwise joins in more cases, and reduce its memory usage (Richard Guo, Tom Lane, Ashutosh Bapat) -§ -§ - - + + + Allow partitionwise + joins in more cases, and reduce its memory usage (Richard Guo, + Tom Lane, Ashutosh Bapat) + § + § + + - - -Improve cost estimates of partition queries (Nikita Malakhov, Andrei Lepikhov) -§ - - + + + Improve cost estimates of partition queries (Nikita Malakhov, + Andrei Lepikhov) + § + + - - -Improve SQL-language function plan caching (Alexander Pyhalov, Tom Lane) -§ -§ - - + + + Improve SQL-language + function plan caching (Alexander Pyhalov, Tom Lane) + § + § + + - - -Improve handling of disabled optimizer features (Robert Haas) -§ - - + + + Improve handling of disabled optimizer features (Robert Haas) + § + + @@ -498,18 +574,19 @@ Author: Peter Geoghegan 2025-04-04 [8a510275d] Further optimize nbtree search scan key comparisons. --> - - -Allow skip scans of btree indexes (Peter Geoghegan) -§ -§ - + + + Allow skip scans of btree indexes + (Peter Geoghegan) + § + § + - -This allows multi-column btree indexes to be used by queries that only -equality-reference the second or later indexed columns. - - + + This allows multi-column btree indexes to be used by queries that + only equality-reference the second or later indexed columns. + + - - -Allow non-btree unique indexes to be used as partition keys and in materialized views (Mark Dilger) -§ -§ - + + + Allow non-btree unique indexes to be used as partition keys and in + materialized views (Mark Dilger) + § + § + - -The index type must still support equality. - - + + The index type must still support equality. + + - - -Allow GIN indexes to be created in parallel (Tomas Vondra, Matthias van de Meent) -§ - - + + + Allow GIN indexes to + be created in parallel (Tomas Vondra, Matthias van de Meent) + § + + - - -Allow values to be sorted to speed range-type GiST and btree index builds (Bernd Helmle) -§ - - + + + Allow values to be sorted to speed range-type GiST and btree + index builds (Bernd Helmle) + § + + @@ -588,41 +669,51 @@ Author: Andres Freund 2025-03-30 [2a5e709e7] Enable IO concurrency on all systems --> - - -Add an asynchronous I/O subsystem (Andres Freund, Thomas Munro, Nazir Bilal Yavuz, Melanie Plageman) -§ -§ -§ -§ -§ -§ -§ -§ -§ -§ -§ - - - -This feature allows backends to queue multiple read requests, which allows for more efficient sequential scans, bitmap heap scans, vacuums, etc. -This is enabled by server variable , with server variables and added to control it. This also enables - and values greater than zero for systems without fadvise() support. The new system view pg_aios shows the file handles being used -for asynchronous I/O. - - + + + Add an asynchronous I/O subsystem (Andres Freund, Thomas Munro, + Nazir Bilal Yavuz, Melanie Plageman) + § + § + § + § + § + § + § + § + § + § + § + + + + This feature allows backends to queue multiple read requests, + which allows for more efficient sequential scans, bitmap + heap scans, vacuums, etc. This is enabled by server + variable , with server + variables and added to control it. + This also enables + and + values greater than zero for systems without + fadvise() support. The new system view pg_aios + shows the file handles being used for asynchronous I/O. + + - - -Improve the locking performance of queries that access many relations (Tomas Vondra) -§ - - + + + Improve the locking performance of queries that access many relations + (Tomas Vondra) + § + + - - -Improve the performance and reduce memory usage of hash joins and GROUP BY (David Rowley, Jeff Davis) -§ -§ -§ -§ -§ - + + + Improve the performance and reduce memory usage of hash joins and + GROUP BY + (David Rowley, Jeff Davis) + § + § + § + § + § + - -This also improves hash set operations used by EXCEPT, and hash lookups of subplan values. - - + + This also improves hash set operations used by EXCEPT, and hash + lookups of subplan values. + + - - -Allow normal vacuums to freeze some pages, even though they are all-visible (Melanie Plageman) -§ -§ - + + + Allow normal vacuums to freeze some pages, even though they are + all-visible (Melanie Plageman) + § + § + - -This reduces the overhead of later full-relation freezing. The aggressiveness of this can be controlled by server variable and per-table setting . -Previously vacuum never processed all-visible pages until freezing was required. - - + + This reduces the overhead of later full-relation + freezing. The aggressiveness of this can be + controlled by server variable and per-table setting . Previously + vacuum never processed all-visible pages until freezing was required. + + - - -Add server variable to control file truncation during (Nathan Bossart, Gurjeet Singh) -§ - + + + Add server variable to control + file truncation during (Nathan Bossart, + Gurjeet Singh) + § + - -A storage-level parameter with the same name and behavior already existed. - - + + A storage-level parameter with the same name and behavior already + existed. + + - - -Increase server variables 's and 's default values to 16 (Melanie Plageman) -§ -§ - + + + Increase server variables 's and 's default values to 16 + (Melanie Plageman) + § + § + - -This more accurately reflects modern hardware. - - + + This more accurately reflects modern hardware. + + @@ -721,72 +826,87 @@ Author: Melanie Plageman 2025-03-12 [9219093ca] Modularize log_connections output --> - - -Increase the logging granularity of server variable (Melanie Plageman) -§ - + + + Increase the logging granularity of server variable (Melanie Plageman) + § + - -This server variable was previously only boolean, which is still supported. - - + + This server variable was previously only boolean, which is still + supported. + + - - -Add log_connections option to report the duration of connection stages (Melanie Plageman) -§ - - + + + Add log_connections option to report the duration + of connection stages (Melanie Plageman) + § + + - - -Add escape %L to output the client IP address (Greg Sabino Mullane) -§ - - + + + Add escape + %L to output the client IP + address (Greg Sabino Mullane) + § + + - - -Add server variable to log lock acquisition failures (Yuki Seino) -§ - + + + Add server variable to log + lock acquisition failures (Yuki Seino) + § + - -Specifically it reports SELECT ... NOWAIT lock failures. - - + + Specifically it reports SELECT + ... NOWAIT lock failures. + + - - -Modify pg_stat_all_tables and its variants to report the time spent in , , and their automatic variants (Sami Imseih) -§ - + + + Modify pg_stat_all_tables + and its variants to report the time spent in , , and their + automatic variants (Sami Imseih) + § + - -The new columns are total_vacuum_time, total_autovacuum_time, total_analyze_time, and total_autoanalyze_time. - - + + The new columns are total_vacuum_time, + total_autovacuum_time, + total_analyze_time, and + total_autoanalyze_time. + + - - -Add delay time reporting to and (Bertrand Drouvot, Nathan Bossart) -§ -§ - + + + Add delay time reporting to and (Bertrand Drouvot, Nathan Bossart) + § + § + - -This information appears in the server log, the system views pg_stat_progress_vacuum and pg_stat_progress_analyze, and the output of and when in VERBOSE -mode; tracking must be enabled with the server variable . - - + + This information appears in the server log, the system views pg_stat_progress_vacuum + and pg_stat_progress_analyze, + and the output of and when in VERBOSE + mode; tracking must be enabled with the server variable . + + - - -Add WAL, CPU, and average read statistics output to ANALYZE VERBOSE (Anthonin Bonnefoy) -§ -§ - - + + + Add WAL, CPU, and average + read statistics output to ANALYZE VERBOSE + (Anthonin Bonnefoy) + § + § + + - - -Add full WAL buffer count to VACUUM/ANALYZE (VERBOSE) and autovacuum log output (Bertrand Drouvot) -§ - - + + + Add full WAL buffer count to + VACUUM/ANALYZE (VERBOSE) + and autovacuum log output (Bertrand Drouvot) + § + + - - -Add per-backend I/O statistics reporting (Bertrand Drouvot) -§ -§ - + + + Add per-backend I/O statistics reporting (Bertrand Drouvot) + § + § + - -The statistics are accessed via pg_stat_get_backend_io(). Per-backend I/O statistics can be cleared via pg_stat_reset_backend_stats(). - - + + The statistics are accessed via pg_stat_get_backend_io(). + Per-backend I/O statistics can be cleared via pg_stat_reset_backend_stats(). + + - - -Add pg_stat_io columns to report I/O activity in bytes (Nazir Bilal Yavuz) -§ - + + + Add pg_stat_io + columns to report I/O activity in bytes (Nazir Bilal Yavuz) + § + - -The new columns are read_bytes, write_bytes, and extend_bytes. The op_bytes column, which always equaled BLCKSZ, has been removed. - - + + The new columns are read_bytes, + write_bytes, and + extend_bytes. The + op_bytes column, which always equaled + BLCKSZ, + has been removed. + + - - -Add WAL I/O activity rows to pg_stat_io (Nazir Bilal Yavuz, Bertrand Drouvot, Michael Paquier) -§ -§ -§ - + + + Add WAL I/O activity rows to + pg_stat_io (Nazir Bilal Yavuz, Bertrand + Drouvot, Michael Paquier) + § + § + § + - -This includes WAL receiver activity and a wait event for such writes. - + + This includes WAL receiver activity and a wait + event for such writes. + - + - - -Change server variable to control tracking WAL timing in pg_stat_io instead of pg_stat_wal (Bertrand Drouvot) -§ - - + + + Change server variable + to control tracking WAL timing + in pg_stat_io instead of pg_stat_wal + (Bertrand Drouvot) + § + + - - -Remove read/sync columns from pg_stat_wal (Bertrand Drouvot) -§ -§ - + + + Remove read/sync columns from pg_stat_wal + (Bertrand Drouvot) + § + § + - -This removes columns wal_write, wal_sync, wal_write_time, and wal_sync_time. - - + + This removes columns wal_write, + wal_sync, + wal_write_time, and + wal_sync_time. + + - - -Add function pg_stat_get_backend_wal() to return per-backend WAL statistics (Bertrand Drouvot) -§ - + + + Add function pg_stat_get_backend_wal() + to return per-backend WAL statistics (Bertrand + Drouvot) + § + - -Per-backend WAL statistics can be cleared via pg_stat_reset_backend_stats(). - - + + Per-backend WAL + statistics can be cleared via pg_stat_reset_backend_stats(). + + - - -Add function pg_ls_summariesdir() to specifically list the contents of PGDATA/pg_wal/summaries (Yushi Ogiwara) -§ - - + + + Add function pg_ls_summariesdir() + to specifically list the contents of PGDATA/pg_wal/summaries + (Yushi Ogiwara) + § + + - - -Add column pg_stat_checkpointer.num_done to report the number of completed checkpoints (Anton A. Melnikov) -§ - + + + Add column pg_stat_checkpointer.num_done + to report the number of completed checkpoints (Anton A. Melnikov) + § + - -Columns num_timed and num_requested count both completed and skipped checkpoints. - - + + Columns num_timed and + num_requested count both completed and + skipped checkpoints. + + - - -Add column pg_stat_checkpointer.slru_written to report SLRU buffers written (Nitin Jadhav) -§ - + + + Add column + pg_stat_checkpointer.slru_written + to report SLRU buffers written (Nitin Jadhav) + § + - -Also, modify the checkpoint server log message to report separate shared buffer and SLRU buffer values. - - + + Also, modify the checkpoint server log message to report separate + shared buffer and SLRU buffer values. + + - - -Add columns to pg_stat_database to report parallel worker activity (Benoit Lobréau) -§ - + + + Add columns to pg_stat_database + to report parallel worker activity (Benoit Lobréau) + § + - -The new columns are parallel_workers_to_launch and parallel_workers_launched. - - + + The new columns are + parallel_workers_to_launch and + parallel_workers_launched. + + - - -Have query id computation of arrays consider only the first and last array elements (Dmitry Dolgov, Sami Imseih) -§ -§ - + + + Have query id computation + of arrays consider only the first and last array elements (Dmitry + Dolgov, Sami Imseih) + § + § + - -Jumbling is used by . - - + + Jumbling is used by . + + - - -Adjust query id computations to group together queries using the same relation name (Michael Paquier, Sami Imseih) -§ - + + + Adjust query id computations to group together queries using the + same relation name (Michael Paquier, Sami Imseih) + § + - -This is true even if the tables in different schemas have different column names. - - + + This is true even if the tables in different schemas have different + column names. + + - - -Add column pg_backend_memory_contexts.type to report the type of memory context (David Rowley) -§ - - + + + Add column pg_backend_memory_contexts.type + to report the type of memory context (David Rowley) + § + + - - -Add column pg_backend_memory_contexts.path to show memory context parents (Melih Mutlu) -§ - - + + + Add column + pg_backend_memory_contexts.path + to show memory context parents (Melih Mutlu) + § + + @@ -1075,53 +1256,61 @@ Author: Michael Paquier 2024-07-10 [d898665bf] Extend pg_get_acl() to handle sub-object IDs --> - - -Add function pg_get_acl() to retrieve database access control details (Joel Jacobson) -§ -§ - - + + + Add function pg_get_acl() + to retrieve database access control details (Joel Jacobson) + § + § + + - - -Add function has_largeobject_privilege() to check large object privileges (Yugo Nagata) -§ - - + + + Add function has_largeobject_privilege() + to check large object privileges (Yugo Nagata) + § + + - - -Allow to define large object default privileges (Takatsuka Haruka, Yugo Nagata, Laurenz Albe) -§ - - + + + Allow to define + large object default privileges (Takatsuka Haruka, Yugo Nagata, + Laurenz Albe) + § + + - - -Add predefined role pg_signal_autovacuum_worker (Kirill Reshke) -§ - + + + Add predefined role pg_signal_autovacuum_worker + (Kirill Reshke) + § + - -This allows sending signals to autovacuum workers. - - + + This allows sending signals to autovacuum workers. + + @@ -1137,56 +1326,69 @@ Author: Daniel Gustafsson 2025-02-20 [b3f0be788] Add support for OAUTHBEARER SASL mechanism --> - - -Add support for the OAuth authentication method (Jacob Champion, Daniel Gustafsson, Thomas Munro) -§ - + + + Add support for the OAuth authentication + method (Jacob Champion, Daniel Gustafsson, Thomas Munro) + § + - -This adds an oauth authentication method to pg_hba.conf, libpq OAuth options, a server variable to load token validation libraries, and -a configure flag to add the required compile-time libraries. - - + + This adds an oauth authentication method to pg_hba.conf, + libpq OAuth options, a server variable to load + token validation libraries, and a configure flag + to add the required compile-time libraries. + + - - -Add server variable to allow specification of multiple colon-separated TLSv1.3 cipher suites (Erica Zhang, Daniel Gustafsson) -§ - - + + + Add server variable to allow + specification of multiple colon-separated TLSv1.3 cipher suites + (Erica Zhang, Daniel Gustafsson) + § + + - - -Change server variable 's default to include elliptic curve X25519 (Daniel Gustafsson, Jacob Champion) -§ - - + + + Change server variable 's default + to include elliptic curve X25519 (Daniel Gustafsson, Jacob Champion) + § + + - - -Rename server variable ssl_ecdh_curve to and allow multiple colon-separated ECDH curves to be specified (Erica Zhang, Daniel Gustafsson) -§ - + + + Rename server variable ssl_ecdh_curve to and allow multiple colon-separated + ECDH curves to be specified (Erica Zhang, + Daniel Gustafsson) + § + -The previous name still works. - - + + The previous name still works. + + - - -Make cancel request keys 256 bits (Heikki Linnakangas, Jelte Fennema-Nio) -§ -§ - + + + Make cancel request + keys 256 bits (Heikki Linnakangas, Jelte Fennema-Nio) + § + § + - -This is only possible when the server and client support wire protocol version 3.2, introduced in this release. - - + + This is only possible when the server and client support wire + protocol version 3.2, introduced in this release. + + - - -Add server variable to specify the maximum number of background workers (Nathan Bossart) -§ - + + + Add server variable + to specify the maximum number of background workers (Nathan Bossart) + § + - -With this variable set, can be adjusted at runtime up to this maximum without a server restart. - - + + With this variable set, + can be adjusted at runtime up to this maximum without a server + restart. + + - - -Allow specification of the fixed number of dead tuples that will trigger an autovacuum (Nathan Bossart, Frédéric Yhuel) -§ - + + + Allow specification of the fixed number of dead tuples that will + trigger an autovacuum (Nathan + Bossart, Frédéric Yhuel) + § + - -The server variable is . Percentages are still used for triggering. - - + + The server variable is . Percentages are + still used for triggering. + + - - -Change server variable to limit only files opened by a backend (Andres Freund) -§ - + + + Change server variable + to limit only files opened by a backend (Andres Freund) + § + - -Previously files opened by the postmaster were also counted toward this limit. - - + + Previously files opened by the postmaster were also counted toward + this limit. + + - - -Add server variable to report the required number of semaphores (Nathan Bossart) -§ - + + + Add server variable to + report the required number of semaphores (Nathan Bossart) + § + - -This is useful for operating system configuration. - - + + This is useful for operating system configuration. + + - - -Add server variable to specify the location of extension control files (Peter Eisentraut, Matheus Alcantara) -§ -§ - - + + + Add server variable to + specify the location of extension control files (Peter Eisentraut, + Matheus Alcantara) + § + § + + - + @@ -1300,28 +1516,34 @@ Author: Amit Kapila 2025-02-19 [ac0e33136] Invalidate inactive replication slots. --> - - -Allow inactive replication slots to be automatically invalided using server variable (Nisha Moond, Bharath Rupireddy) -§ - - + + + Allow inactive replication slots to be automatically invalided using + server variable + (Nisha Moond, Bharath Rupireddy) + § + + - - -Add server variable to control the maximum active replication origins (Euler Taveira) -§ - + + + Add server variable to control the + maximum active replication origins (Euler Taveira) + § + - -This was previously controlled by , but this new setting allows a higher origin count in cases where fewer slots are required. - - + + This was previously controlled by , but this new setting allows + a higher origin count in cases where fewer slots are required. + + @@ -1343,33 +1565,44 @@ Author: Amit Kapila 2025-01-30 [6252b1eaf] Doc: Generated column replication. --> - - -Allow the values of generated columns to be logically replicated (Shubham Khanna, Vignesh C, Zhijie Hou, Shlok Kyal, Peter Smith) -§ -§ -§ -§ - - - -If the publication specifies a column list, all specified columns, generated and non-generated, are published. Without a specified column list, publication option publish_generated_columns -controls whether generated columns are published. Previously generated columns were not replicated and the subscriber had to compute the values if possible; this is particularly -useful for non-PostgreSQL subscribers which lack such a capability. - - + + + Allow the values of generated + columns to be logically replicated (Shubham Khanna, Vignesh C, + Zhijie Hou, Shlok Kyal, Peter Smith) + § + § + § + § + + + + If the publication specifies a column list, all specified + columns, generated and non-generated, are published. + Without a specified column list, publication option + publish_generated_columns controls whether + generated columns are published. Previously generated columns + were not replicated and the subscriber had to compute + the values if possible; this is particularly useful for + non-PostgreSQL subscribers which lack + such a capability. + + - - -Change the default streaming option from off to parallel (Vignesh C) -§ - - + + + Change the default streaming + option from off to parallel + (Vignesh C) + § + + - - -Allow to change the replication slot's two-phase commit behavior (Hayato Kuroda, Ajin Cherian, Amit Kapila, Zhijie Hou) -§ -§ - - + + + Allow to change the + replication slot's two-phase commit behavior (Hayato Kuroda, Ajin + Cherian, Amit Kapila, Zhijie Hou) + § + § + + - - -Log conflicts while applying logical replication changes (Zhijie Hou, Nisha Moond) -§ -§ -§ -§ -§ - + + + Log conflicts while + applying logical replication changes (Zhijie Hou, Nisha Moond) + § + § + § + § + § + - -Also report in new columns of pg_stat_subscription_stats. - - + + Also report in new columns of pg_stat_subscription_stats. + + @@ -1434,112 +1671,136 @@ Author: Richard Guo 2025-02-25 [1e4351af3] Expand virtual generated columns in the planner --> - - -Allow generated columns to be virtual, and make them the default (Peter Eisentraut, Jian He, Richard Guo, Dean Rasheed) -§ -§ -§ - + + + Allow generated + columns to be virtual, and make them the default (Peter + Eisentraut, Jian He, Richard Guo, Dean Rasheed) + § + § + § + - -Virtual generated columns generate their values when the columns are read, not written. The write behavior can still be specified via the STORED option. - - + + Virtual generated columns generate their values when the columns + are read, not written. The write behavior can still be specified + via the STORED option. + + - - -Add OLD/NEW support to RETURNING in DML queries (Dean Rasheed) -§ - + + + Add OLD/NEW support to RETURNING in + DML queries (Dean Rasheed) + § + - -Previously RETURNING only returned new values for and , and old values for ; would return the appropriate value for the internal query executed. This new syntax -allows the RETURNING list of INSERT/UPDATE/DELETE/MERGE to explicitly return old and new values by using the special aliases old and new. These aliases can be renamed to -avoid identifier conflicts. - - + + Previously RETURNING only returned new values for + and , and old + values for ; + would return the appropriate value for the internal query executed. + This new syntax allows the RETURNING list of + INSERT/UPDATE/DELETE/MERGE + to explicitly return old and new values by using the special aliases + old and new. These aliases + can be renamed to avoid identifier conflicts. + + - - -Allow foreign tables to be created like existing local tables (Zhang Mingli) -§ - + + + Allow foreign tables to be created like existing local tables + (Zhang Mingli) + § + - -The syntax is CREATE FOREIGN TABLE ... LIKE. - - + + The syntax is CREATE + FOREIGN TABLE ... LIKE. + + - - -Allow LIKE with nondeterministic collations (Peter Eisentraut) -§ - - + + + Allow LIKE + with nondeterministic + collations (Peter Eisentraut) + § + + - - -Allow text position search functions with nondeterministic collations (Peter Eisentraut) -§ - + + + Allow text position search functions with nondeterministic collations + (Peter Eisentraut) + § + - -These used to generate an error. - - + + These used to generate an error. + + - - -Add builtin collation provider PG_UNICODE_FAST (Jeff Davis) -§ - + + + Add builtin collation provider PG_UNICODE_FAST + (Jeff Davis) + § + - -This locale supports case mapping, but sorts in code point order, not natural language order. - - + + This locale supports case mapping, but sorts in code point order, + not natural language order. + + - - -Allow and to process partitioned tables without processing their children (Michael Harris) -§ - + + + Allow and + to process partitioned tables without processing their children + (Michael Harris) + § + - -This is enabled with the new ONLY option. This is useful since autovacuum does not process partitioned tables, just its children. - - + + This is enabled with the new ONLY option. This is + useful since autovacuum does not process partitioned tables, just + its children. + + - - -Add functions to modify per-relation and per-column optimizer statistics (Corey Huinker) -§ -§ -§ - + + + Add functions to modify per-relation and per-column optimizer + statistics (Corey Huinker) + § + § + § + - -The functions are pg_restore_relation_stats(), pg_restore_attribute_stats(), pg_clear_relation_stats(), and pg_clear_attribute_stats(). - - + + The functions are pg_restore_relation_stats(), + pg_restore_attribute_stats(), + pg_clear_relation_stats(), and + pg_clear_attribute_stats(). + + - - -Add server variable to control the file copying method (Nazir Bilal Yavuz) -§ - + + + Add server variable to control + the file copying method (Nazir Bilal Yavuz) + § + - -This controls whether CREATE DATABASE ... STRATEGY=FILE_COPY and ALTER DATABASE ... SET TABLESPACE uses file copy or clone. - - + + This controls whether CREATE DATABASE + ... STRATEGY=FILE_COPY and ALTER DATABASE ... SET + TABLESPACE uses file copy or clone. + + <link linkend="ddl-constraints">Constraints</link> - + - - -Allow the specification of non-overlapping PRIMARY KEY and UNIQUE constraints (Paul A. Jungwirth) -§ - + + + Allow the specification of non-overlapping PRIMARY + KEY and UNIQUE + constraints (Paul A. Jungwirth) + § + - -This is specified by WITHOUT OVERLAPS on the last specified column. - - + + This is specified by WITHOUT OVERLAPS on the + last specified column. + + - - -Allow CHECK and foreign key constraints to be specified as NOT ENFORCED (Amul Sul) -§ -§ - + + + Allow CHECK + and foreign + key constraints to be specified as NOT + ENFORCED (Amul Sul) + § + § + - -This also adds column pg_constraint.conenforced. - + + This also adds column pg_constraint.conenforced. + + - - -Require primary/foreign key relationships to use either deterministic collations or the the same nondeterministic collations (Peter Eisentraut) -§ - + + + Require primary/foreign key + relationships to use either deterministic collations or the the + same nondeterministic collations (Peter Eisentraut) + § + - -The restore of a , also used by , will fail if these requirements are not met; schema changes must be made for these upgrade methods to succeed. - - + + The restore of a , also used by , will fail if these requirements are not met; + schema changes must be made for these upgrade methods to succeed. + + - - -Store column NOT NULL specifications in pg_constraint (Álvaro Herrera, Bernd Helmle) -§ - + + + Store column NOT + NULL specifications in pg_constraint + (Álvaro Herrera, Bernd Helmle) + § + - -This allows names to be specified for NOT NULL constraint. This also adds NOT NULL constraints to foreign tables and NOT NULL inheritance control to local tables. - - + + This allows names to be specified for NOT NULL + constraint. This also adds NOT NULL constraints + to foreign tables and NOT NULL inheritance + control to local tables. + + - - -Allow to set the NOT VALID attribute of NOT NULL constraints (Rushabh Lathia, Jian He) -§ - - + + + Allow to set the NOT + VALID attribute of NOT NULL constraints + (Rushabh Lathia, Jian He) + § + + - - -Allow modification of the inheritability of NOT NULL constraints (Suraj Kharage, Álvaro Herrera) -§ -§ - + + + Allow modification of the inheritability of NOT + NULL constraints (Suraj Kharage, Álvaro Herrera) + § + § + - -The syntax is ALTER TABLE ... ALTER CONSTRAINT ... [NO] INHERIT. - - + + The syntax is ALTER TABLE + ... ALTER CONSTRAINT ... [NO] INHERIT. + + - - -Allow NOT VALID foreign key constraints on partitioned tables (Amul Sul) -§ - - + + + Allow NOT VALID foreign key constraints on + partitioned tables (Amul Sul) + § + + - - -Allow dropping of constraints ONLY on partitioned tables (Álvaro Herrera) -§ - + + + Allow dropping + of constraints ONLY on partitioned tables + (Álvaro Herrera) + § + - -This was previously erroneously prohibited. - - + + This was previously erroneously prohibited. + + - + <xref linkend="sql-copy"/> - + - - -Add REJECT_LIMIT to control the number of invalid rows COPY FROM can ignore (Atsushi Torikoshi) -§ - + + + Add REJECT_LIMIT to control the number of invalid + rows COPY FROM can ignore (Atsushi Torikoshi) + § + - -This is available when ON_ERROR = 'ignore'. - - + + This is available when ON_ERROR = 'ignore'. + + - - -Allow COPY TO to copy rows from populated materialized views (Jian He) -§ - - + + + Allow COPY TO to copy rows from populated + materialized views (Jian He) + § + + - - -Add COPY LOG_VERBOSITY level silent to suppress log output of ignored rows (Atsushi Torikoshi) -§ - + + + Add COPY LOG_VERBOSITY level + silent to suppress log output of ignored rows + (Atsushi Torikoshi) + § + - -This new level suppresses output for discarded input rows when on_error = 'ignore'. - - + + This new level suppresses output for discarded input rows when + on_error = 'ignore'. + + - - -Disallow COPY FREEZE on foreign tables (Nathan Bossart) -§ - + + + Disallow COPY FREEZE on foreign tables (Nathan + Bossart) + § + - -Previously, the COPY worked but the FREEZE was ignored, so disallow this command. - - + + Previously, the COPY worked but the + FREEZE was ignored, so disallow this command. + + @@ -1793,36 +2102,39 @@ Author: David Rowley 2024-12-11 [c2a4078eb] Enable BUFFERS with EXPLAIN ANALYZE by default --> - - -Automatically include BUFFERS output in EXPLAIN ANALYZE (Guillaume Lelarge, David Rowley) -§ - - + + + Automatically include BUFFERS output in + EXPLAIN ANALYZE (Guillaume Lelarge, David Rowley) + § + + - - -Add full WAL buffer count to EXPLAIN (WAL) output (Bertrand Drouvot) -§ - - + + + Add full WAL buffer count to EXPLAIN + (WAL) output (Bertrand Drouvot) + § + + - - -In EXPLAIN ANALYZE, report the number of index lookups used per index scan node (Peter Geoghegan) -§ - - + + + In EXPLAIN ANALYZE, report the number of index + lookups used per index scan node (Peter Geoghegan) + § + + - - -Modify EXPLAIN to output fractional row counts (Ibrar Ahmed, Ilia Evdokimov, Robert Haas) -§ -§ - - + + + Modify EXPLAIN to output fractional row counts + (Ibrar Ahmed, Ilia Evdokimov, Robert Haas) + § + § + + - - -Add memory and disk usage details to Material, Window Aggregate, and common table expression nodes to EXPLAIN -output (David Rowley, Tatsuo Ishii) -§ -§ -§ -§ - - + + + Add memory and disk usage details to Material, + Window Aggregate, and common table expression + nodes to EXPLAIN output (David Rowley, Tatsuo + Ishii) + § + § + § + § + + - - -Add details about window function arguments to EXPLAIN output (Tom Lane) -§ - - + + + Add details about window function arguments to + EXPLAIN output (Tom Lane) + § + + - - -Add Parallel Bitmap Heap Scan worker cache statistics to EXPLAIN ANALYZE (David Geier, Heikki Linnakangas, Donghang Lin, Alena Rybakina, David Rowley) -§ - - + + + Add Parallel Bitmap Heap Scan worker cache + statistics to EXPLAIN ANALYZE (David Geier, + Heikki Linnakangas, Donghang Lin, Alena Rybakina, David Rowley) + § + + - - -Indicate disabled nodes in EXPLAIN ANALYZE output (Robert Haas, David Rowley, Laurenz Albe) -§ -§ -§ - - + + + Indicate disabled nodes in EXPLAIN ANALYZE output + (Robert Haas, David Rowley, Laurenz Albe) + § + § + § + + @@ -1922,137 +2241,159 @@ Author: Jeff Davis 2025-01-17 [286a365b9] Support Unicode full case mapping and conversion. --> - - -Improve Unicode full case mapping and conversion (Jeff Davis) -§ -§ - + + + Improve Unicode + full case mapping and conversion (Jeff Davis) + § + § + - -This adds the ability to do conditional and title case mapping, and case map single characters to multiple characters. - - + + This adds the ability to do conditional and title case mapping, + and case map single characters to multiple characters. + + - - -Allow jsonb null values to be cast to scalar types as NULL (Tom Lane) -§ - + + + Allow jsonb + null values to be cast to scalar types as + NULL (Tom Lane) + § + - -Previously such casts generated an error. - - + + Previously such casts generated an error. + + - - -Add optional parameter to json{b}_strip_nulls to allow removal of null array elements (Florents Tselai) -§ - - + + + Add optional parameter to json{b}_strip_nulls + to allow removal of null array elements (Florents Tselai) + § + + - - -Add function array_sort() which sorts an array's first dimension (Junwang Zhao, Jian He) -§ - - + + + Add function array_sort() + which sorts an array's first dimension (Junwang Zhao, Jian He) + § + + - - -Add function array_reverse() which reverses an array's first dimension (Aleksander Alekseev) -§ - - + + + Add function array_reverse() + which reverses an array's first dimension (Aleksander Alekseev) + § + + - - -Add function reverse() to reverse bytea bytes (Aleksander Alekseev) -§ - - + + + Add function reverse() + to reverse bytea bytes (Aleksander Alekseev) + § + + - - -Allow casting between integer types and bytea (Aleksander Alekseev) -§ - + + + Allow casting between integer types and bytea (Aleksander + Alekseev) + § + - -The integer values are stored as bytea two's complement values. - - + + The integer values are stored as bytea two's complement + values. + + - - -Update Unicode data to Unicode 16.0.0 (Peter Eisentraut) -§ - - + + + Update Unicode data to Unicode 16.0.0 (Peter + Eisentraut) + § + + - - -Add full text search stemming for Estonian (Tom Lane) -§ - - + + + Add full text search stemming for Estonian + (Tom Lane) + § + + - - -Improve the XML error codes to more closely match the SQL standard (Tom Lane) -§ - + + + Improve the XML + error codes to more closely match the SQL standard + (Tom Lane) + § + - -These errors are reported via SQLSTATE. - - + + These errors are reported via SQLSTATE. + + @@ -2068,16 +2409,20 @@ Author: Jeff Davis 2025-01-24 [bfc599206] Add SQL function CASEFOLD(). --> - - -Add function casefold() to allow for more sophisticated case-insensitive matching (Jeff Davis) -§ - + + + Add function casefold() + to allow for more sophisticated case-insensitive matching (Jeff Davis) + § + - -This allows more accurate comparisons, i.e., a character can have multiple upper or lower case equivalents, or upper or lower case conversion changes the number of characters. - - + + This allows more accurate comparisons, i.e., a character can have + multiple upper or lower case equivalents, or upper or lower case + conversion changes the number of characters. + + - - -Allow MIN()/MAX() aggregates on arrays and composite types (Aleksander Alekseev, Marat Buharov) -§ -§ - - + + + Allow MIN()/MAX() + aggregates on arrays and composite types (Aleksander Alekseev, + Marat Buharov) + § + § + + - - -Add a WEEK option to EXTRACT() (Tom Lane) -§ - - + + + Add a WEEK option to EXTRACT() + (Tom Lane) + § + + - - -Improve the output EXTRACT(QUARTER ...) for negative values (Tom Lane) -§ - - + + + Improve the output EXTRACT(QUARTER ...) for + negative values (Tom Lane) + § + + - - -Add roman numeral support to to_number() (Hunaid Sohail) -§ - + + + Add roman numeral support to to_number() + (Hunaid Sohail) + § + - -This is accessed via the RN pattern. - - + + This is accessed via the RN pattern. + + - - -Add UUID version 7 generation function uuidv7() (Andrey Borodin) -§ - + + + Add UUID + version 7 generation function uuidv7() + (Andrey Borodin) + § + - -This UUID value is temporally sortable. Function alias uuidv4() has been added to explicitly generate version 4 UUIDs. - - + + This UUID value is + temporally sortable. Function alias uuidv4() + has been added to explicitly generate version 4 UUIDs. + + - - -Add functions crc32() and crc32c() to compute CRC values (Aleksander Alekseev) -§ - - + + + Add functions crc32() + and crc32c() + to compute CRC values (Aleksander Alekseev) + § + + - - -Add math functions gamma() and lgamma() (Dean Rasheed) -§ - - + + + Add math functions gamma() + and lgamma() + (Dean Rasheed) + § + + - - -Allow => syntax for named cursor arguments in PL/pgSQL (Pavel Stehule) -§ - + + + Allow => syntax for named cursor arguments in + PL/pgSQL (Pavel Stehule) + § + - -We previously only accepted :=. - - + + We previously only accepted :=. + + - - -Allow regexp_match[es]()/regexp_like()/regexp_replace()/regexp_count()/regexp_instr()/regexp_substr()/regexp_split_to_table()/regexp_split_to_array() to use named arguments (Jian He) -§ - - + + + Allow regexp_match[es]()/regexp_like()/regexp_replace()/regexp_count()/regexp_instr()/regexp_substr()/regexp_split_to_table()/regexp_split_to_array() + to use named arguments (Jian He) + § + + @@ -2216,12 +2594,15 @@ Author: Robert Haas 2024-09-09 [cdb6b0fdb] Add PQfullProtocolVersion() to surface the precise proto --> - - -Add function PQfullProtocolVersion() to report the full, including minor, protocol version number (Jacob Champion, Jelte Fennema-Nio) -§ - - + + + Add function PQfullProtocolVersion() + to report the full, including minor, protocol version number (Jacob + Champion, Jelte Fennema-Nio) + § + + - - -Add libpq connection parameters and environment variables to specify the minimum and maximum acceptable protocol version for connections (Jelte Fennema-Nio) -§ -§ - - + + + Add libpq connection parameters + and environment variables to + specify the minimum and maximum acceptable protocol version for + connections (Jelte Fennema-Nio) + § + § + + - - -Add libpq function PQservice() to return the connection service name (Michael Banck) -§ - - + + + Add libpq function PQservice() + to return the connection service name (Michael Banck) + § + + - - -Report changes to the client (Alexander Kukushkin, Jelte Fennema-Nio, Tomas Vondra) -§ -§ - - + + + Report changes to the client + (Alexander Kukushkin, Jelte Fennema-Nio, Tomas Vondra) + § + § + + - - -Add PQtrace() output for all message types, including authentication (Jelte Fennema-Nio) -§ -§ -§ -§ -§ - - + + + Add PQtrace() output + for all message types, including authentication (Jelte Fennema-Nio) + § + § + § + § + § + + - - -Add libpq connection parameter sslkeylogfile which dumps out SSL key material (Abhishek Chanda, Daniel Gustafsson) -§ - + + + Add libpq connection parameter sslkeylogfile + which dumps out SSL key material (Abhishek Chanda, + Daniel Gustafsson) + § + - -This is useful for debugging. - - + + This is useful for debugging. + + - - -Modify some libpq function signatures to use int64_t (Thomas Munro) -§ - + + + Modify some libpq function signatures to use + int64_t (Thomas Munro) + § + - -These previously used pg_int64, which is now deprecated. - - + + These previously used pg_int64, which is now + deprecated. + + - <xref linkend="app-psql"/> + <xref linkend="app-psql"/> - + - - -Allow psql to parse, bind, and close named prepared statements (Anthonin Bonnefoy, Michael Paquier) -§ - + + + Allow psql to parse, bind, and close + named prepared statements (Anthonin Bonnefoy, Michael Paquier) + § + - -This is accomplished with new commands \parse, \bind_named, and \close. - - + + This is accomplished with new commands \parse, + \bind_named, + and \close. + + - - -Add psql backslash commands to allowing issuance of pipeline queries (Anthonin Bonnefoy) -§ -§ -§ - + + + Add psql backslash commands to allowing + issuance of pipeline queries (Anthonin Bonnefoy) + § + § + § + - -The new commands are \startpipeline, \syncpipeline, \sendpipeline, \endpipeline, \flushrequest, \flush, and \getresults. - - + + The new commands are \startpipeline, + \syncpipeline, \sendpipeline, + \endpipeline, \flushrequest, + \flush, and \getresults. + + - - -Allow adding pipeline status to the psql prompt and add related state variables (Anthonin Bonnefoy) -§ - + + + Allow adding pipeline status to the psql + prompt and add related state variables (Anthonin Bonnefoy) + § + - -The new prompt character is %P and the new psql variables are PIPELINE_SYNC_COUNT, PIPELINE_COMMAND_COUNT, and PIPELINE_RESULT_COUNT. - - + + The new prompt character is %P and + the new psql variables are PIPELINE_SYNC_COUNT, + PIPELINE_COMMAND_COUNT, + and PIPELINE_RESULT_COUNT. + + - - -Allow adding the connection service name to the psql prompt or access it via psql variable (Michael Banck) -§ - - + + + Allow adding the connection service name to the + psql prompt or access it via + psql variable (Michael Banck) + § + + - - -Add psql option to use expanded mode on all list commands (Dean Rasheed) -§ - + + + Add psql option to use expanded mode on + all list commands (Dean Rasheed) + § + - -Adding backslash suffix x enables this. - - + + Adding backslash suffix x enables this. + + - - -Change psql's to use tabular format and include more information (Álvaro Herrera, Maiquel Grassi, Hunaid Sohail) -§ - - + + + Change psql's to use tabular format + and include more information (Álvaro Herrera, Maiquel Grassi, + Hunaid Sohail) + § + + - - -Add function's leakproof indicator to psql's \df+, \do+, \dAo+, and \dC+ outputs (Yugo Nagata) -§ - - + + + Add function's leakproof indicator + to psql's \df+, + \do+, \dAo+, and + \dC+ outputs (Yugo Nagata) + § + + - - -Add access method details for partitioned relations in \dP+ (Justin Pryzby) -§ - - + + + Add access method details for partitioned relations in \dP+ + (Justin Pryzby) + § + + - - -Add default_version to the psql \dx extension output (Magnus Hagander) -§ - - + + + Add default_version + to the psql \dx + extension output (Magnus Hagander) + § + + - - -Add psql variable to set the default \watch wait time (Daniel Gustafsson) -§ - - + + + Add psql variable to set the default \watch + wait time (Daniel Gustafsson) + § + + - + - + - - Server Applications + + Server Applications - + - - -Change to default to enabling checksums (Greg Sabino Mullane) -§ -§ - + + + Change to default to enabling checksums + (Greg Sabino Mullane) + § + § + - -The new initdb option disables checksums. - - + + The new initdb option + disables checksums. + + - - -Add initdb option to avoid syncing heap/index files (Nathan Bossart) -§ - + + + Add initdb option + to avoid syncing heap/index + files (Nathan Bossart) + § + - -initdb option is still available to avoid syncing any files. - - + + initdb option + is still available to avoid syncing any files. + + - - -Add option to compute only missing optimizer statistics (Corey Huinker, Nathan Bossart) -§ -§ - + + + Add option + to compute only missing + optimizer statistics (Corey Huinker, Nathan Bossart) + § + § + - -This option can only be used by and . - - + + This option can only be used by + and . + + - - -Add option / to enable hard linking (Israel Barth Rubio, Robert Haas) -§ - + + + Add option + / to enable hard linking + (Israel Barth Rubio, Robert Haas) + § + - -Only some files can be hard linked. This should not be used if the backups will be used independently. - - + + Only some files can be hard linked. This should not be used if the + backups will be used independently. + + - - -Allow to verify tar-format backups (Amul Sul) -§ - - + + + Allow to verify tar-format + backups (Amul Sul) + § + + - - -If 's specifies a database name, use it in output (Masahiko Sawada) -§ - - + + + If 's + specifies a database name, use it in + output (Masahiko Sawada) + § + + - - -Add option to change the default char signedness (Masahiko Sawada) -§ - - + + + Add option + to change the default + char signedness (Masahiko Sawada) + § + + - + @@ -2603,28 +3050,34 @@ Author: Andrew Dunstan 2025-04-04 [1495eff7b] Non text modes for pg_dumpall, correspondingly change pg --> - - -Allow to dump in the same output formats as pg_dump supports (Mahendra Singh Thalor, Andrew Dunstan) -§ - + + + Allow to dump in the same output + formats as pg_dump supports (Mahendra + Singh Thalor, Andrew Dunstan) + § + - -Also modify to handle such dumps. Previously pg_dumpall only supported text format. - - + + Also modify to handle such dumps. + Previously pg_dumpall only supported + text format. + + - - -Add options , , and (Jeff Davis) -§ - - + + + Add options + , , + and (Jeff Davis) + § + + - - -Add pg_dump and option to dump sequence data that would normally be excluded (Nathan Bossart) -§ -§ - - + + + Add pg_dump and option to + dump sequence data that would normally be excluded (Nathan Bossart) + § + § + + - - -Add , , and - options , , , and (Corey Huinker, Jeff Davis) -§ - - + + + Add , , + and options + , , + , and + (Corey Huinker, Jeff Davis) + § + + - - -Add option to disable row level security policy processing in -, , - (Nikolay Samokhvalov) -§ - + + + Add option to disable row level + security policy processing in , + , + (Nikolay Samokhvalov) + § + - -This is useful for migrating to systems with different policies. - - + + This is useful for migrating to systems with different policies. + + @@ -2692,19 +3151,22 @@ Author: Jeff Davis 2025-02-20 [1fd1bd871] Transfer statistics during pg_upgrade. --> - - -Allow pg_upgrade to preserve optimizer statistics (Corey Huinker, Jeff Davis, Nathan Bossart) -§ -§ -§ -§ - + + + Allow pg_upgrade to preserve optimizer + statistics (Corey Huinker, Jeff Davis, Nathan Bossart) + § + § + § + § + - -Extended statistics are not preserved. Also add pg_upgrade option to disable statistics preservation. - - + + Extended statistics are not preserved. Also add + pg_upgrade option + to disable statistics preservation. + + - - -Allow pg_upgrade to process database checks in parallel (Nathan Bossart) -§ -§ -§ -§ -§ -§ -§ -§ -§ -§ -§ - - - -This is controlled by the existing option. - - + + + Allow pg_upgrade to process database + checks in parallel (Nathan Bossart) + § + § + § + § + § + § + § + § + § + § + § + + + + This is controlled by the existing option. + + - - -Add pg_upgrade option to swap directories rather than copy, clone, or link files (Nathan Bossart) -§ - + + + Add pg_upgrade option + to swap directories rather than copy, clone, + or link files (Nathan Bossart) + § + - -This mode is potentially the fastest. - - + + This mode is potentially the fastest. + + - - -Add pg_upgrade option to set the default char signedness of new cluster (Masahiko Sawada) -§ -§ - + + + Add pg_upgrade option + to set the default + char signedness of new cluster (Masahiko Sawada) + § + § + - -This is to handle cases where a pre-PostgreSQL 18 cluster's default CPU signedness does not match the new cluster. - - + + This is to handle cases where a + pre-PostgreSQL 18 cluster's default + CPU signedness does not match the new cluster. + + @@ -2801,60 +3270,68 @@ Author: Amit Kapila 2025-03-28 [fb2ea12f4] pg_createsubscriber: Add '- -all' option. --> - - -Add option to create logical replicas for all databases (Shubham Khanna) -§ - - + + + Add option + to create logical replicas for all databases + (Shubham Khanna) + § + + - - -Add pg_createsubscriber option to remove publications (Shubham Khanna) -§ - - + + + Add pg_createsubscriber option + to remove publications (Shubham Khanna) + § + + - - -Add pg_createsubscriber option to enable prepared transactions (Shubham Khanna) -§ - - + + + Add pg_createsubscriber option + to enable prepared transactions + (Shubham Khanna) + § + + - - -Add option to specify failover slots (Hayato Kuroda) -§ - - + + + Add option + to specify failover slots (Hayato Kuroda) + § + + - - -Allow pg_recvlogical to work without (Hayato Kuroda) -§ - - + + + Allow pg_recvlogical + to work without + (Hayato Kuroda) + § + + @@ -2874,65 +3351,76 @@ Author: Michael Paquier 2024-07-18 [a0a5869a8] Add INJECTION_POINT_CACHED() to run injection points dir --> - - -Separate the loading and running of injection points (Michael Paquier, Heikki Linnakangas) -§ -§ - + + + Separate the loading and running of injection points + (Michael Paquier, Heikki Linnakangas) + § + § + - -Injection points can now be created, but not run, via INJECTION_POINT_LOAD(), and such injection points can be run via INJECTION_POINT_CACHED(). - - + + Injection points can now be created, but not run, via INJECTION_POINT_LOAD(), + and such injection points can be run via INJECTION_POINT_CACHED(). + + - - -Support runtime arguments in injection points (Michael Paquier) -§ - - + + + Support runtime arguments in injection points (Michael Paquier) + § + + - - -Allow inline injection point test code with IS_INJECTION_POINT_ATTACHED() (Heikki Linnakangas) -§ - - + + + Allow inline injection point test code with IS_INJECTION_POINT_ATTACHED() + (Heikki Linnakangas) + § + + - - -Improve the performance of processing long JSON strings using SIMD (Single Instruction Multiple Data) (David Rowley) -§ - - + + + Improve the performance of processing long JSON strings using + SIMD (Single Instruction Multiple Data) (David + Rowley) + § + + - - -Speed up CRC32C calculations using x86 AVX-512 instructions (Raghuveer Devulapalli, Paul Amonson) -§ - - + + + Speed up CRC32C calculations using x86 AVX-512 + instructions (Raghuveer Devulapalli, Paul Amonson) + § + + - - -Add ARM Neon and SVE CPU intrinsics for popcount (integer bit counting) (Chiranmoy Bhattacharya, Devanga Susmitha, Rama Malladi) -§ -§ - - + + + Add ARM Neon and SVE CPU + intrinsics for popcount (integer bit counting) (Chiranmoy + Bhattacharya, Devanga Susmitha, Rama Malladi) + § + § + + - - -Improve the speed of numeric multiplication and division (Joel Jacobson, Dean Rasheed) -§ -§ -§ -§ - - + + + Improve the speed of numeric multiplication and division (Joel + Jacobson, Dean Rasheed) + § + § + § + § + + - - -Add configure option to enable NUMA awareness (Jakub Wartak, Bertrand Drouvot) -§ -§ -§ - + + + Add configure option + to enable NUMA awareness (Jakub Wartak, Bertrand + Drouvot) + § + § + § + - -The function pg_numa_available() reports on NUMA awareness, and system views pg_shmem_allocations_numa and pg_buffercache_numa which report on shared memory distribution across -NUMA nodes. - - + + The function pg_numa_available() + reports on NUMA awareness, and system views pg_shmem_allocations_numa + and pg_buffercache_numa + which report on shared memory distribution across + NUMA nodes. + + - - -Add TOAST table to pg_index to allow for very large expression indexes (Nathan Bossart) -§ - - + + + Add TOAST table to pg_index + to allow for very large expression indexes (Nathan Bossart) + § + + - - -Remove column -pg_attribute.attcacheoff (David Rowley) -§ - - + + + Remove column pg_attribute.attcacheoff + (David Rowley) + § + + - - -Add column pg_class.relallfrozen (Melanie Plageman) -§ - - + + + Add column pg_class.relallfrozen + (Melanie Plageman) + § + + - - -Add amgettreeheight, amconsistentequality, and amconsistentordering to the index access method API (Mark Dilger) -§ -§ - - + + + Add amgettreeheight, + amconsistentequality, and + amconsistentordering to the index access method + API (Mark Dilger) + § + § + + - - -Add GiST support function stratnum() (Paul A. Jungwirth) -§ - - + + + Add GiST support function stratnum() + (Paul A. Jungwirth) + § + + - - -Record the default CPU signedness of char in (Masahiko Sawada) -§ - - + + + Record the default CPU signedness of + char in + (Masahiko Sawada) + § + + - - -Add support for Python "Limited API" in (Peter Eisentraut) -§ -§ - + + + Add support for Python "Limited API" in (Peter Eisentraut) + § + § + - -This helps prevent problems caused by Python 3.x version mismatches. - - + + This helps prevent problems caused by + Python 3.x version mismatches. + + - - -Change the minimum supported Python version to 3.6.8 (Jacob Champion) -§ - - + + + Change the minimum supported Python + version to 3.6.8 (Jacob Champion) + § + + - - -Remove support for OpenSSL versions older than 1.1.1 (Daniel Gustafsson) -§ -§ - - + + + Remove support for OpenSSL versions older + than 1.1.1 (Daniel Gustafsson) + § + § + + - - -If LLVM is enabled, require version 14 or later (Thomas Munro) -§ - - + + + If LLVM is enabled, require version 14 + or later (Thomas Munro) + § + + - - -Add macro PG_MODULE_MAGIC_EXT to allow extensions to report their name and version (Andrei Lepikhov) -§ - + + + Add macro PG_MODULE_MAGIC_EXT + to allow extensions to report their name and version (Andrei Lepikhov) + § + - -This information can be access via the new function pg_get_loaded_modules(). - - + + This information can be access via the new function pg_get_loaded_modules(). + + - - -Document that SPI_connect()/SPI_connect_ext() always returns success (SPI_OK_CONNECT) (Stepan Neretin) -§ - + + + Document that SPI_connect()/SPI_connect_ext() + always returns success (SPI_OK_CONNECT) (Stepan + Neretin) + § + - -Errors are always reported via ereport(). - - + + Errors are always reported via ereport(). + + - - -Add documentation section about API and ABI compatibility (David Wheeler, Peter Eisentraut) -§ - - + + + Add documentation + section about API and ABI + compatibility (David Wheeler, Peter Eisentraut) + § + + - - -Remove the experimental designation of Meson builds on Windows (Aleksander Alekseev) -§ - - + + + Remove the experimental designation of + Meson builds on Windows (Aleksander Alekseev) + § + + - - -Remove configure options and (Thomas Munro) -§ -§ - + + + Remove configure options and + (Thomas Munro) + § + § + - -Thirty-two-bit atomic operations are now required. - - + + Thirty-two-bit atomic operations are now required. + + - - -Remove support for the HPPA/PA-RISC architecture (Tom Lane) -§ - - + + + Remove support for the + HPPA/PA-RISC architecture + (Tom Lane) + § + + @@ -3228,24 +3761,27 @@ Author: Masahiko Sawada 2024-10-14 [7cdfeee32] Add contrib/pg_logicalinspect. --> - - -Add extension to inspect logical snapshots (Bertrand Drouvot) -§ - - + + + Add extension to inspect logical + snapshots (Bertrand Drouvot) + § + + - - -Add extension which adds debug details to EXPLAIN output (Robert Haas) -§ - - + + + Add extension which adds debug details + to EXPLAIN + output (Robert Haas) + § + + - - -Add output columns to postgres_fdw_get_connections() (Hayato Kuroda, Sagar Dilip Shedge) -§ -§ -§ -§ - + + + Add output columns to postgres_fdw_get_connections() + (Hayato Kuroda, Sagar Dilip Shedge) + § + § + § + § + - -New output column used_in_xact indicates if the foreign data wrapper is being used by a current transaction, closed indicates if it is closed, user_name indicates the -user name, and remote_backend_pid indicates the remote backend process identifier. - - + + New output column used_in_xact indicates + if the foreign data wrapper is being used by a current transaction, + closed indicates if it is closed, + user_name indicates the user name, and + remote_backend_pid indicates the remote + backend process identifier. + + - - -Allow SCRAM authentication from the client to be passed to - servers (Matheus Alcantara, Peter Eisentraut) -§ - + + + Allow SCRAM + authentication from the client to be passed to servers (Matheus Alcantara, Peter Eisentraut) + § + - -This avoids storing postgres_fdw authentication information in the database, and is enabled with the -postgres_fdw use_scram_passthrough connection option. libpq uses new connection -parameters and . - - + + This avoids storing postgres_fdw + authentication information in the database, and is + enabled with the postgres_fdw use_scram_passthrough + connection option. libpq uses new connection parameters + and . + + - - -Allow SCRAM authentication from the client to be passed to - servers (Matheus Alcantara) -§ - - + + + Allow SCRAM authentication from the client to be + passed to servers (Matheus Alcantara) + § + + - - -Add on_error and log_verbosity options to (Atsushi Torikoshi) -§ - + + + Add on_error and log_verbosity + options to (Atsushi Torikoshi) + § + - -These control how file_fdw handles and reports invalid file rows. - - + + These control how file_fdw handles and + reports invalid file rows. + + - - -Add reject_limit to control the number of invalid rows file_fdw can ignore (Atsushi Torikoshi) -§ - + + + Add reject_limit to control the number of + invalid rows file_fdw can ignore (Atsushi + Torikoshi) + § + - -This is active when ON_ERROR = 'ignore'. - - + + This is active when ON_ERROR = 'ignore'. + + - - -Add configurable variable min_password_length to - (Emanuele Musella, Maurizio Boriani) -§ - + + + Add configurable variable min_password_length to + (Emanuele Musella, Maurizio Boriani) + § + - -This controls the minimum password length. - - + + This controls the minimum password length. + + - - -Have report the number of failed, retried, or skipped transactions in per-script reports (Yugo Nagata) -§ - - + + + Have report the number of failed, retried, + or skipped transactions in per-script reports (Yugo Nagata) + § + + - - -Add server variable weak to control invalid check digit acceptance (Viktor Holmberg) -§ - + + + Add server variable weak + to control invalid check digit acceptance (Viktor Holmberg) + § + - -This was previously only controlled by function isn_weak(). - - + + This was previously only controlled by function isn_weak(). + + - - -Allow values to be sorted to speed index builds (Bernd Helmle, Andrey Borodin) -§ - - + + + Allow values to be sorted to speed + index builds (Bernd Helmle, Andrey Borodin) + § + + - - -Add check function gin_index_check() to verify GIN indexes (Grigory Kryachko, Heikki Linnakangas, Andrey Borodin) -§ - - + + + Add check function gin_index_check() + to verify GIN indexes (Grigory Kryachko, Heikki + Linnakangas, Andrey Borodin) + § + + - - -Add functions pg_buffercache_evict_relation() and pg_buffercache_evict_all() to evict unpinned shared buffers (Nazir Bilal Yavuz) -§ - + + + Add functions pg_buffercache_evict_relation() + and pg_buffercache_evict_all() + to evict unpinned shared buffers (Nazir Bilal Yavuz) + § + - -The existing function pg_buffercache_evict() now returns the buffer flush status. - - + + The existing function pg_buffercache_evict() + now returns the buffer flush status. + + - - -Allow extensions to install custom options (Robert Haas, Sami Imseih) -§ -§ -§ - - + + + Allow extensions to install custom + options (Robert Haas, Sami Imseih) + § + § + § + + - - -Allow extensions to use the server's cumulative statistics API (Michael Paquier) -§ -§ - - + + + Allow extensions to use the server's cumulative statistics + API (Michael Paquier) + § + § + + @@ -3467,60 +4033,71 @@ Author: Michael Paquier 2024-10-28 [6b652e6ce] Set query ID for inner queries of CREATE TABLE AS and DE --> - - -Allow the queries of and to be tracked by pg_stat_statements (Anthonin Bonnefoy) -§ - + + + Allow the queries of + and to be tracked by + pg_stat_statements (Anthonin Bonnefoy) + § + - -They are also now assigned query ids. - - + + They are also now assigned query ids. + + - - -Allow the parameterization of values in pg_stat_statements (Greg Sabino Mullane, Michael Paquier) -§ - + + + Allow the parameterization of values in + pg_stat_statements (Greg Sabino Mullane, + Michael Paquier) + § + - -This reduces the bloat caused by SET statements with differing constants. - - + + This reduces the bloat caused by SET statements + with differing constants. + + - - -Add pg_stat_statements columns to report parallel activity (Guillaume Lelarge) -§ - + + + Add pg_stat_statements + columns to report parallel activity (Guillaume Lelarge) + § + - -The new columns are parallel_workers_to_launch and parallel_workers_launched. - - + + The new columns are + parallel_workers_to_launch and + parallel_workers_launched. + + - - -Add pg_stat_statements.wal_buffers_full to report full WAL buffers (Bertrand Drouvot) -§ - - + + + Add + pg_stat_statements.wal_buffers_full + to report full WAL buffers (Bertrand Drouvot) + § + + @@ -3536,52 +4113,65 @@ Author: Álvaro Herrera 2025-04-05 [749a9e20c] Add modern SHA-2 based password hashes to pgcrypto. --> - - -Add pgcrypto algorithms sha256crypt and sha512crypt (Bernd Helmle) -§ - - + + + Add pgcrypto algorithms sha256crypt + and sha512crypt + (Bernd Helmle) + § + + - - -Add CFB mode to pgcrypto encryption and decryption (Umar Hayat) -§ - - + + + Add CFB mode + to pgcrypto encryption and decryption + (Umar Hayat) + § + + - - -Add function fips_mode() to report the server's FIPS mode (Daniel Gustafsson) -§ - - + + + Add function fips_mode() + to report the server's FIPS mode (Daniel + Gustafsson) + § + + - - -Add pgcrypto server variable builtin_crypto_enabled to allow disabling builtin non-FIPS mode cryptographic functions (Daniel Gustafsson, Joe Conway) -§ - + + + Add pgcrypto server variable builtin_crypto_enabled + to allow disabling builtin non-FIPS mode + cryptographic functions (Daniel Gustafsson, Joe Conway) + § + - -This is useful for guaranteeing FIPS mode behavior. - - + + This is useful for guaranteeing FIPS mode behavior. + + From fa638edc74ee4be90e94a45f8489f3be9a926d7e Mon Sep 17 00:00:00 2001 From: Bruce Momjian Date: Fri, 20 Jun 2025 23:53:15 -0400 Subject: [PATCH 129/602] doc PG 18 relnotes: update to current, add one commit --- doc/src/sgml/release-18.sgml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/doc/src/sgml/release-18.sgml b/doc/src/sgml/release-18.sgml index faf9156e18519..662c7d8890f9b 100644 --- a/doc/src/sgml/release-18.sgml +++ b/doc/src/sgml/release-18.sgml @@ -6,7 +6,7 @@ Release date: - 2025-??-??, CURRENT AS OF 2025-05-23 + 2025-??-??, CURRENT AS OF 2025-06-20 @@ -869,13 +869,16 @@ Author: Tom Lane Add server variable to log - lock acquisition failures (Yuki Seino) + lock acquisition failures (Yuki Seino, Fujii Masao) § + § From ea06263c4aa5abadc97a6928c6b2aff0e29698ae Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sat, 21 Jun 2025 12:52:37 -0400 Subject: [PATCH 130/602] Doc: improve documentation about width_bucket(). Specify whether the bucket bounds are inclusive or exclusive, and improve some other vague language. Explain the behavior that occurs when the "low" bound is greater than the "high" bound. Make width_bucket_numeric's comment more like that for width_bucket_float8, in particular noting that infinite bounds are rejected (since they became possible in v14). Reported-by: Ben Peachey Higdon Author: Robert Treat Co-authored-by: Tom Lane Reviewed-by: Dean Rasheed Discussion: https://postgr.es/m/2BD74F86-5B89-4AC1-8F13-23CED3546AC1@gmail.com Backpatch-through: 13 --- doc/src/sgml/func.sgml | 18 ++++++++++++++---- src/backend/utils/adt/float.c | 4 ++-- src/backend/utils/adt/numeric.c | 7 ++++--- 3 files changed, 20 insertions(+), 9 deletions(-) diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 8d7d9a2f3e8e8..a6d79765c1a73 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -1824,13 +1824,23 @@ SELECT NOT(ROW(table.*) IS NOT NULL) FROM TABLE; -- detect at least one null in which operand falls in a histogram having count equal-width buckets spanning the range low to high. - Returns 0 + The buckets have inclusive lower bounds and exclusive upper bounds. + Returns 0 for an input less + than low, or count+1 for an input - outside that range. + greater than or equal to high. + If low > high, + the behavior is mirror-reversed, with bucket 1 + now being the one just below low, and the + inclusive bounds now being on the upper side. width_bucket(5.35, 0.024, 10.06, 5) 3 + + + width_bucket(9, 10, 0, 10) + 2 @@ -1842,8 +1852,8 @@ SELECT NOT(ROW(table.*) IS NOT NULL) FROM TABLE; -- detect at least one null in Returns the number of the bucket in which operand falls given an array listing the - lower bounds of the buckets. Returns 0 for an - input less than the first lower + inclusive lower bounds of the buckets. + Returns 0 for an input less than the first lower bound. operand and the array elements can be of any type having standard comparison operators. The thresholds array must be diff --git a/src/backend/utils/adt/float.c b/src/backend/utils/adt/float.c index 6d20ae07ae7b0..ba66a9c4ce63a 100644 --- a/src/backend/utils/adt/float.c +++ b/src/backend/utils/adt/float.c @@ -4065,8 +4065,8 @@ float84ge(PG_FUNCTION_ARGS) * in the histogram. width_bucket() returns an integer indicating the * bucket number that 'operand' belongs to in an equiwidth histogram * with the specified characteristics. An operand smaller than the - * lower bound is assigned to bucket 0. An operand greater than the - * upper bound is assigned to an additional bucket (with number + * lower bound is assigned to bucket 0. An operand greater than or equal + * to the upper bound is assigned to an additional bucket (with number * count+1). We don't allow "NaN" for any of the float8 inputs, and we * don't allow either of the histogram bounds to be +/- infinity. */ diff --git a/src/backend/utils/adt/numeric.c b/src/backend/utils/adt/numeric.c index 40dcbc7b6710b..58ad1a65ef7b1 100644 --- a/src/backend/utils/adt/numeric.c +++ b/src/backend/utils/adt/numeric.c @@ -1958,9 +1958,10 @@ generate_series_numeric_support(PG_FUNCTION_ARGS) * in the histogram. width_bucket() returns an integer indicating the * bucket number that 'operand' belongs to in an equiwidth histogram * with the specified characteristics. An operand smaller than the - * lower bound is assigned to bucket 0. An operand greater than the - * upper bound is assigned to an additional bucket (with number - * count+1). We don't allow "NaN" for any of the numeric arguments. + * lower bound is assigned to bucket 0. An operand greater than or equal + * to the upper bound is assigned to an additional bucket (with number + * count+1). We don't allow "NaN" for any of the numeric inputs, and we + * don't allow either of the histogram bounds to be +/- infinity. */ Datum width_bucket_numeric(PG_FUNCTION_ARGS) From 2c0d8b95080e1d51c60d9c6f6a2e4460d6dfaf77 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Sun, 22 Jun 2025 14:13:46 +0200 Subject: [PATCH 131/602] meson: Fix meson warning WARNING: You should add the boolean check kwarg to the run_command call. It currently defaults to false, but it will default to true in meson 2.0. Introduced by commit bc46104fc9a. (This only happens in the msvc branch. All the other run_command calls are ok.) Reviewed-by: Andres Freund Discussion: https://www.postgresql.org/message-id/flat/42e13eb0-862a-441e-8d84-4f0fd5f6def0%40eisentraut.org --- meson.build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/meson.build b/meson.build index d142e3e408b38..474763ad19f0c 100644 --- a/meson.build +++ b/meson.build @@ -1205,7 +1205,7 @@ if not perlopt.disabled() if cc.get_id() == 'msvc' # prevent binary mismatch between MSVC built plperl and Strawberry or # msys ucrt perl libraries - perl_v = run_command(perl, '-V').stdout() + perl_v = run_command(perl, '-V', check: false).stdout() if not perl_v.contains('USE_THREAD_SAFE_LOCALE') perl_ccflags += ['-DNO_THREAD_SAFE_LOCALE'] endif From 43da394304fba820830da2cef2c0214fe292c037 Mon Sep 17 00:00:00 2001 From: John Naylor Date: Mon, 23 Jun 2025 18:03:56 +0700 Subject: [PATCH 132/602] Properly fix AVX-512 CRC calculation bug The problem that led to the workaround in f83f14881c7 was not in fact a compiler bug, but a failure to zero the upper bits of the vector register containing the initial scalar CRC value. Fix that and revert the workaround. Diagnosed-by: Nathan Bossart Diagnosed-by: Raghuveer Devulapalli Tested-by: Andy Fan Tested-by: Soumyadeep Chakraborty Reviewed-by: Nathan Bossart Reviewed-by: Raghuveer Devulapalli Discussion: https://postgr.es/m/PH8PR11MB82866B07AA6758D12F699C00FB70A@PH8PR11MB8286.namprd11.prod.outlook.com --- src/port/pg_crc32c_sse42.c | 2 +- src/port/pg_crc32c_sse42_choose.c | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/port/pg_crc32c_sse42.c b/src/port/pg_crc32c_sse42.c index 9af3474a6ca95..1a7172553551a 100644 --- a/src/port/pg_crc32c_sse42.c +++ b/src/port/pg_crc32c_sse42.c @@ -123,7 +123,7 @@ pg_comp_crc32c_avx512(pg_crc32c crc, const void *data, size_t len) __m512i k; k = _mm512_broadcast_i32x4(_mm_setr_epi32(0x740eef02, 0, 0x9e4addf8, 0)); - x0 = _mm512_xor_si512(_mm512_castsi128_si512(_mm_cvtsi32_si128(crc0)), x0); + x0 = _mm512_xor_si512(_mm512_zextsi128_si512(_mm_cvtsi32_si128(crc0)), x0); buf += 64; /* Main loop. */ diff --git a/src/port/pg_crc32c_sse42_choose.c b/src/port/pg_crc32c_sse42_choose.c index 802e47788c10c..74d2421ba2be9 100644 --- a/src/port/pg_crc32c_sse42_choose.c +++ b/src/port/pg_crc32c_sse42_choose.c @@ -95,9 +95,7 @@ pg_comp_crc32c_choose(pg_crc32c crc, const void *data, size_t len) __cpuidex(exx, 7, 0); #endif -#if defined(__clang__) && !defined(__OPTIMIZE__) - /* Some versions of clang are broken at -O0 */ -#elif defined(USE_AVX512_CRC32C_WITH_RUNTIME_CHECK) +#ifdef USE_AVX512_CRC32C_WITH_RUNTIME_CHECK if (exx[2] & (1 << 10) && /* VPCLMULQDQ */ exx[1] & (1 << 31)) /* AVX512-VL */ pg_comp_crc32c = pg_comp_crc32c_avx512; From ccd5bc93fdfeae22c935f405b0687be5cfa9caa4 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Mon, 23 Jun 2025 11:50:21 -0400 Subject: [PATCH 133/602] Include _mm512_zextsi128_si512() in AVX-512 configure probes. Commit 43da39430 added a dependency on this intrinsic to our AVX-512 CRC code. It turns out this intrinsic was added to gcc later than the other ones we were using, so that there are platforms where the new code fails to compile. Since only relatively old (pre-gcc-10) compilers are affected, it doesn't seem worth trying to make the AVX-512 CRC code actually work on these platforms. Just add the new intrinsic to the configure probe, so that we'll conclude the code can't be built. Author: Tom Lane Reviewed-by: Nathan Bossart Discussion: https://postgr.es/m/3350336.1750690281@sss.pgh.pa.us --- config/c-compiler.m4 | 1 + configure | 1 + meson.build | 1 + 3 files changed, 3 insertions(+) diff --git a/config/c-compiler.m4 b/config/c-compiler.m4 index 5f3e1d1faf930..da40bd6a64755 100644 --- a/config/c-compiler.m4 +++ b/config/c-compiler.m4 @@ -602,6 +602,7 @@ AC_CACHE_CHECK([for _mm512_clmulepi64_epi128], [Ac_cachevar], { __m128i z; + x = _mm512_xor_si512(_mm512_zextsi128_si512(_mm_cvtsi32_si128(0)), x); y = _mm512_clmulepi64_epi128(x, y, 0); z = _mm_ternarylogic_epi64( _mm512_castsi512_si128(y), diff --git a/configure b/configure index 4f15347cc9503..3d3d3db97a456 100755 --- a/configure +++ b/configure @@ -18227,6 +18227,7 @@ else { __m128i z; + x = _mm512_xor_si512(_mm512_zextsi128_si512(_mm_cvtsi32_si128(0)), x); y = _mm512_clmulepi64_epi128(x, y, 0); z = _mm_ternarylogic_epi64( _mm512_castsi512_si128(y), diff --git a/meson.build b/meson.build index 474763ad19f0c..6ffe7b4727556 100644 --- a/meson.build +++ b/meson.build @@ -2465,6 +2465,7 @@ int main(void) { __m128i z; + x = _mm512_xor_si512(_mm512_zextsi128_si512(_mm_cvtsi32_si128(0)), x); y = _mm512_clmulepi64_epi128(x, y, 0); z = _mm_ternarylogic_epi64( _mm512_castsi512_si128(y), From 70d8a91f82f19d270facd25bb6292a949773dfce Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Mon, 23 Jun 2025 21:27:42 +0300 Subject: [PATCH 134/602] Remove excess assert from InvalidatePossiblyObsoleteSlot() ca307d5cec90 introduced keeping WAL segments by slot's last saved restart LSN. It also added an assertion that the slot's restart LSN never goes backward. However, situations when the restart LSN goes backward have been spotted by buildfarm animals and investigated in the thread. When pg_receivewal starts the replication, it sets the last replayed LSN to the beginning of the segment, which is older than what ReplicationSlotReserveWal() set for the slot. A similar situation can happen to pg_basebackup. When standby reconnects to the primary, it sends the last replayed LSN, which might be older than the last confirmed flush LSN. In both these situations, a concurrent checkpoint may trigger an assert trap. Based on ideas from Vitaly Davydov , Hayato Kuroda (Fujitsu) , Vignesh C , Amit Kapila . Reported-by: Vignesh C Reported-by: Tom Lane Discussion: https://postgr.es/m/CALDaNm3s-jpQTe1MshsvQ8GO%3DTLj233JCdkQ7uZ6pwqRVpxAdw%40mail.gmail.com Reviewed-by: Vignesh C Reviewed-by: Amit Kapila --- src/backend/replication/slot.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c index c64f020742f8f..c11e588d63221 100644 --- a/src/backend/replication/slot.c +++ b/src/backend/replication/slot.c @@ -1810,8 +1810,6 @@ InvalidatePossiblyObsoleteSlot(uint32 possible_causes, */ SpinLockAcquire(&s->mutex); - Assert(s->data.restart_lsn >= s->last_saved_restart_lsn); - restart_lsn = s->data.restart_lsn; /* we do nothing if the slot is already invalid */ From f3ed72ca0765bdd726a31b7fa20219e96baf312c Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Mon, 23 Jun 2025 21:33:50 +0300 Subject: [PATCH 135/602] Temporarily remove 046_checkpoint_logical_slot.pl This new test was intended to check the handling of the replication slot's restart lsn fixed in ca307d5cec90. However, it also reveals another issue related to logical decoding. This commit temporarily removes this test to keep the buildfarm and CFbot green and avoid distorting others' work. This test will be restored once we investigate and fix the issue. Discussion: https://postgr.es/m/CAAKRu_ZCOzQpEumLFgG_%2Biw3FTa%2BhJ4SRpxzaQBYxxM_ZAzWcA%40mail.gmail.com --- src/test/recovery/meson.build | 1 - .../recovery/t/046_checkpoint_logical_slot.pl | 136 ------------------ 2 files changed, 137 deletions(-) delete mode 100644 src/test/recovery/t/046_checkpoint_logical_slot.pl diff --git a/src/test/recovery/meson.build b/src/test/recovery/meson.build index 92429d2840257..28fd9ae8dda0d 100644 --- a/src/test/recovery/meson.build +++ b/src/test/recovery/meson.build @@ -54,7 +54,6 @@ tests += { 't/043_no_contrecord_switch.pl', 't/044_invalidate_inactive_slots.pl', 't/045_archive_restartpoint.pl', - 't/046_checkpoint_logical_slot.pl', 't/047_checkpoint_physical_slot.pl' ], }, diff --git a/src/test/recovery/t/046_checkpoint_logical_slot.pl b/src/test/recovery/t/046_checkpoint_logical_slot.pl deleted file mode 100644 index d67c5108d7800..0000000000000 --- a/src/test/recovery/t/046_checkpoint_logical_slot.pl +++ /dev/null @@ -1,136 +0,0 @@ -# Copyright (c) 2025, PostgreSQL Global Development Group -# -# This test verifies the case when the logical slot is advanced during -# checkpoint. The test checks that the logical slot's restart_lsn still refers -# to an existed WAL segment after immediate restart. -# -use strict; -use warnings FATAL => 'all'; - -use PostgreSQL::Test::Cluster; -use PostgreSQL::Test::Utils; - -use Test::More; - -if ($ENV{enable_injection_points} ne 'yes') -{ - plan skip_all => 'Injection points not supported by this build'; -} - -my ($node, $result); - -$node = PostgreSQL::Test::Cluster->new('mike'); -$node->init; -$node->append_conf('postgresql.conf', "wal_level = 'logical'"); -$node->start; - -# Check if the extension injection_points is available, as it may be -# possible that this script is run with installcheck, where the module -# would not be installed by default. -if (!$node->check_extension('injection_points')) -{ - plan skip_all => 'Extension injection_points not installed'; -} - -$node->safe_psql('postgres', q(CREATE EXTENSION injection_points)); - -# Create the two slots we'll need. -$node->safe_psql('postgres', - q{select pg_create_logical_replication_slot('slot_logical', 'test_decoding')} -); -$node->safe_psql('postgres', - q{select pg_create_physical_replication_slot('slot_physical', true)}); - -# Advance both slots to the current position just to have everything "valid". -$node->safe_psql('postgres', - q{select count(*) from pg_logical_slot_get_changes('slot_logical', null, null)} -); -$node->safe_psql('postgres', - q{select pg_replication_slot_advance('slot_physical', pg_current_wal_lsn())} -); - -# Run checkpoint to flush current state to disk and set a baseline. -$node->safe_psql('postgres', q{checkpoint}); - -# Generate some transactions to get RUNNING_XACTS. -my $xacts = $node->background_psql('postgres'); -$xacts->query_until( - qr/run_xacts/, - q(\echo run_xacts -SELECT 1 \watch 0.1 -\q -)); - -$node->advance_wal(20); - -# Run another checkpoint to set a new restore LSN. -$node->safe_psql('postgres', q{checkpoint}); - -$node->advance_wal(20); - -# Run another checkpoint, this time in the background, and make it wait -# on the injection point) so that the checkpoint stops right before -# removing old WAL segments. -note('starting checkpoint'); - -my $checkpoint = $node->background_psql('postgres'); -$checkpoint->query_safe( - q(select injection_points_attach('checkpoint-before-old-wal-removal','wait')) -); -$checkpoint->query_until( - qr/starting_checkpoint/, - q(\echo starting_checkpoint -checkpoint; -\q -)); - -# Wait until the checkpoint stops right before removing WAL segments. -note('waiting for injection_point'); -$node->wait_for_event('checkpointer', 'checkpoint-before-old-wal-removal'); -note('injection_point is reached'); - -# Try to advance the logical slot, but make it stop when it moves to the next -# WAL segment (this has to happen in the background, too). -my $logical = $node->background_psql('postgres'); -$logical->query_safe( - q{select injection_points_attach('logical-replication-slot-advance-segment','wait');} -); -$logical->query_until( - qr/get_changes/, - q( -\echo get_changes -select count(*) from pg_logical_slot_get_changes('slot_logical', null, null) \watch 1 -\q -)); - -# Wait until the slot's restart_lsn points to the next WAL segment. -note('waiting for injection_point'); -$node->wait_for_event('client backend', - 'logical-replication-slot-advance-segment'); -note('injection_point is reached'); - -# OK, we're in the right situation: time to advance the physical slot, which -# recalculates the required LSN, and then unblock the checkpoint, which -# removes the WAL still needed by the logical slot. -$node->safe_psql('postgres', - q{select pg_replication_slot_advance('slot_physical', pg_current_wal_lsn())} -); - -# Continue the checkpoint. -$node->safe_psql('postgres', - q{select injection_points_wakeup('checkpoint-before-old-wal-removal')}); - -# Abruptly stop the server (1 second should be enough for the checkpoint -# to finish; it would be better). -$node->stop('immediate'); - -$node->start; - -eval { - $node->safe_psql('postgres', - q{select count(*) from pg_logical_slot_get_changes('slot_logical', null, null);} - ); -}; -is($@, '', "Logical slot still valid"); - -done_testing(); From fc39b286ad7262a4aac8ff9a34f244763bed7a53 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Tue, 24 Jun 2025 13:12:46 +0900 Subject: [PATCH 136/602] psql: Rename meta-command \close to \close_prepared \close has been introduced in d55322b0da60 to be able to close a prepared statement using the extended protocol in psql. Per discussion, the name "close" is ambiguous. At the SQL level, CLOSE is used to close a cursor. At protocol level, the close message can be used to either close a statement or a portal. This patch renames \close to \close_prepared to avoid any ambiguity and make it clear that this is used to close a prepared statement. This new name has been chosen based on the feedback from the author and the reviewers. Author: Anthonin Bonnefoy Reviewed-by: Peter Eisentraut Reviewed-by: Jelte Fennema-Nio Discussion: https://postgr.es/m/3e694442-0df5-4f92-a08f-c5d4c4346b85@eisentraut.org --- doc/src/sgml/ref/psql-ref.sgml | 8 ++++---- doc/src/sgml/release-18.sgml | 2 +- src/bin/psql/command.c | 12 ++++++------ src/bin/psql/common.c | 2 +- src/bin/psql/help.c | 3 ++- src/bin/psql/tab-complete.in.c | 2 +- src/test/regress/expected/psql.out | 14 +++++++------- src/test/regress/expected/psql_pipeline.out | 6 +++--- src/test/regress/sql/psql.sql | 12 ++++++------ src/test/regress/sql/psql_pipeline.sql | 6 +++--- 10 files changed, 34 insertions(+), 33 deletions(-) diff --git a/doc/src/sgml/ref/psql-ref.sgml b/doc/src/sgml/ref/psql-ref.sgml index 570ef21d1fce3..95f4cac2467e3 100644 --- a/doc/src/sgml/ref/psql-ref.sgml +++ b/doc/src/sgml/ref/psql-ref.sgml @@ -1067,8 +1067,8 @@ INSERT INTO tbls1 VALUES ($1, $2) \parse stmt1 - - \close prepared_statement_name + + \close_prepared prepared_statement_name @@ -1081,7 +1081,7 @@ INSERT INTO tbls1 VALUES ($1, $2) \parse stmt1 Example: SELECT $1 \parse stmt1 -\close stmt1 +\close_prepared stmt1 @@ -3710,7 +3710,7 @@ testdb=> \setenv LESS -imx4F All queries executed while a pipeline is ongoing use the extended query protocol. Queries are appended to the pipeline when ending with a semicolon. The meta-commands \bind, - \bind_named, \close or + \bind_named, \close_prepared or \parse can be used in an ongoing pipeline. While a pipeline is ongoing, \sendpipeline will append the current query buffer to the pipeline. Other meta-commands like diff --git a/doc/src/sgml/release-18.sgml b/doc/src/sgml/release-18.sgml index 662c7d8890f9b..66a6817a2be0f 100644 --- a/doc/src/sgml/release-18.sgml +++ b/doc/src/sgml/release-18.sgml @@ -2746,7 +2746,7 @@ Author: Michael Paquier \bind_named, and \close. + linkend="app-psql-meta-command-close-prepared">\close_prepared. diff --git a/src/bin/psql/command.c b/src/bin/psql/command.c index 83e84a778411a..9fcd2db832656 100644 --- a/src/bin/psql/command.c +++ b/src/bin/psql/command.c @@ -67,8 +67,8 @@ static backslashResult exec_command_C(PsqlScanState scan_state, bool active_bran static backslashResult exec_command_connect(PsqlScanState scan_state, bool active_branch); static backslashResult exec_command_cd(PsqlScanState scan_state, bool active_branch, const char *cmd); -static backslashResult exec_command_close(PsqlScanState scan_state, bool active_branch, - const char *cmd); +static backslashResult exec_command_close_prepared(PsqlScanState scan_state, + bool active_branch, const char *cmd); static backslashResult exec_command_conninfo(PsqlScanState scan_state, bool active_branch); static backslashResult exec_command_copy(PsqlScanState scan_state, bool active_branch); static backslashResult exec_command_copyright(PsqlScanState scan_state, bool active_branch); @@ -330,8 +330,8 @@ exec_command(const char *cmd, status = exec_command_connect(scan_state, active_branch); else if (strcmp(cmd, "cd") == 0) status = exec_command_cd(scan_state, active_branch, cmd); - else if (strcmp(cmd, "close") == 0) - status = exec_command_close(scan_state, active_branch, cmd); + else if (strcmp(cmd, "close_prepared") == 0) + status = exec_command_close_prepared(scan_state, active_branch, cmd); else if (strcmp(cmd, "conninfo") == 0) status = exec_command_conninfo(scan_state, active_branch); else if (pg_strcasecmp(cmd, "copy") == 0) @@ -728,10 +728,10 @@ exec_command_cd(PsqlScanState scan_state, bool active_branch, const char *cmd) } /* - * \close -- close a previously prepared statement + * \close_prepared -- close a previously prepared statement */ static backslashResult -exec_command_close(PsqlScanState scan_state, bool active_branch, const char *cmd) +exec_command_close_prepared(PsqlScanState scan_state, bool active_branch, const char *cmd) { backslashResult status = PSQL_CMD_SKIP_LINE; diff --git a/src/bin/psql/common.c b/src/bin/psql/common.c index b53cd8ab69880..d2c0a49c46c04 100644 --- a/src/bin/psql/common.c +++ b/src/bin/psql/common.c @@ -2628,7 +2628,7 @@ clean_extended_state(void) switch (pset.send_mode) { - case PSQL_SEND_EXTENDED_CLOSE: /* \close */ + case PSQL_SEND_EXTENDED_CLOSE: /* \close_prepared */ free(pset.stmtName); break; case PSQL_SEND_EXTENDED_PARSE: /* \parse */ diff --git a/src/bin/psql/help.c b/src/bin/psql/help.c index db6adec8b692b..a2e009ab9bea7 100644 --- a/src/bin/psql/help.c +++ b/src/bin/psql/help.c @@ -331,7 +331,8 @@ slashUsage(unsigned short int pager) HELP0(" \\bind [PARAM]... set query parameters\n"); HELP0(" \\bind_named STMT_NAME [PARAM]...\n" " set query parameters for an existing prepared statement\n"); - HELP0(" \\close STMT_NAME close an existing prepared statement\n"); + HELP0(" \\close_prepared STMT_NAME\n" + " close an existing prepared statement\n"); HELP0(" \\endpipeline exit pipeline mode\n"); HELP0(" \\flush flush output data to the server\n"); HELP0(" \\flushrequest send request to the server to flush its output buffer\n"); diff --git a/src/bin/psql/tab-complete.in.c b/src/bin/psql/tab-complete.in.c index 2c0b4f28c14dd..908eef97c6e28 100644 --- a/src/bin/psql/tab-complete.in.c +++ b/src/bin/psql/tab-complete.in.c @@ -1875,7 +1875,7 @@ psql_completion(const char *text, int start, int end) static const char *const backslash_commands[] = { "\\a", "\\bind", "\\bind_named", - "\\connect", "\\conninfo", "\\C", "\\cd", "\\close", "\\copy", + "\\connect", "\\conninfo", "\\C", "\\cd", "\\close_prepared", "\\copy", "\\copyright", "\\crosstabview", "\\d", "\\da", "\\dA", "\\dAc", "\\dAf", "\\dAo", "\\dAp", "\\db", "\\dc", "\\dconfig", "\\dC", "\\dd", "\\ddp", "\\dD", diff --git a/src/test/regress/expected/psql.out b/src/test/regress/expected/psql.out index cf48ae6d0c2ee..236eba2540e9d 100644 --- a/src/test/regress/expected/psql.out +++ b/src/test/regress/expected/psql.out @@ -160,12 +160,12 @@ LINE 1: SELECT $1, $2 foo4 | bar4 (1 row) --- \close (extended query protocol) -\close -\close: missing required argument -\close '' -\close stmt2 -\close stmt2 +-- \close_prepared (extended query protocol) +\close_prepared +\close_prepared: missing required argument +\close_prepared '' +\close_prepared stmt2 +\close_prepared stmt2 SELECT name, statement FROM pg_prepared_statements ORDER BY name; name | statement -------+---------------- @@ -4666,7 +4666,7 @@ bar 'bar' "bar" \C arg1 \c arg1 arg2 arg3 arg4 \cd arg1 - \close stmt1 + \close_prepared stmt1 \conninfo \copy arg1 arg2 arg3 arg4 arg5 arg6 \copyright diff --git a/src/test/regress/expected/psql_pipeline.out b/src/test/regress/expected/psql_pipeline.out index e78e6bfa0ad35..a0816fb10b68e 100644 --- a/src/test/regress/expected/psql_pipeline.out +++ b/src/test/regress/expected/psql_pipeline.out @@ -564,7 +564,7 @@ SELECT $1 \bind \sendpipeline SELECT $1 \bind 1 \sendpipeline SELECT $1 \parse a \bind_named a 1 \sendpipeline -\close a +\close_prepared a \flushrequest \getresults ERROR: bind message supplies 0 parameters, but prepared statement "" requires 1 @@ -572,7 +572,7 @@ ERROR: bind message supplies 0 parameters, but prepared statement "" requires 1 SELECT $1 \bind 1 \sendpipeline SELECT $1 \parse a \bind_named a 1 \sendpipeline -\close a +\close_prepared a -- Sync allows pipeline to recover. \syncpipeline \getresults @@ -580,7 +580,7 @@ Pipeline aborted, command did not run SELECT $1 \bind 1 \sendpipeline SELECT $1 \parse a \bind_named a 1 \sendpipeline -\close a +\close_prepared a \flushrequest \getresults ?column? diff --git a/src/test/regress/sql/psql.sql b/src/test/regress/sql/psql.sql index 1a8a83462f022..e2e3124543978 100644 --- a/src/test/regress/sql/psql.sql +++ b/src/test/regress/sql/psql.sql @@ -68,11 +68,11 @@ SELECT $1, $2 \parse stmt3 -- Multiple \g calls mean multiple executions \bind_named stmt2 'foo3' \g \bind_named stmt3 'foo4' 'bar4' \g --- \close (extended query protocol) -\close -\close '' -\close stmt2 -\close stmt2 +-- \close_prepared (extended query protocol) +\close_prepared +\close_prepared '' +\close_prepared stmt2 +\close_prepared stmt2 SELECT name, statement FROM pg_prepared_statements ORDER BY name; -- \bind (extended query protocol) @@ -1035,7 +1035,7 @@ select \if false \\ (bogus \else \\ 42 \endif \\ forty_two; \C arg1 \c arg1 arg2 arg3 arg4 \cd arg1 - \close stmt1 + \close_prepared stmt1 \conninfo \copy arg1 arg2 arg3 arg4 arg5 arg6 \copyright diff --git a/src/test/regress/sql/psql_pipeline.sql b/src/test/regress/sql/psql_pipeline.sql index 5945eca1ef76c..6788dceee2e90 100644 --- a/src/test/regress/sql/psql_pipeline.sql +++ b/src/test/regress/sql/psql_pipeline.sql @@ -306,21 +306,21 @@ SELECT $1 \bind \sendpipeline SELECT $1 \bind 1 \sendpipeline SELECT $1 \parse a \bind_named a 1 \sendpipeline -\close a +\close_prepared a \flushrequest \getresults -- Pipeline is aborted. SELECT $1 \bind 1 \sendpipeline SELECT $1 \parse a \bind_named a 1 \sendpipeline -\close a +\close_prepared a -- Sync allows pipeline to recover. \syncpipeline \getresults SELECT $1 \bind 1 \sendpipeline SELECT $1 \parse a \bind_named a 1 \sendpipeline -\close a +\close_prepared a \flushrequest \getresults \endpipeline From 6531f36283f05219464dd2084a17aab5747f7de8 Mon Sep 17 00:00:00 2001 From: Amit Kapila Date: Tue, 24 Jun 2025 09:51:07 +0530 Subject: [PATCH 137/602] Fix missing comment update in 1462aad2e4. Remove the part of comment that says we don't allow toggling two_phase option as that is supported in commit 1462aad2e4. Author: Hayato Kuroda Author: Amit Kapila Discussion: https://postgr.es/m/OSCPR01MB1496656725F3951AEE8749EBDF579A@OSCPR01MB14966.jpnprd01.prod.outlook.com --- src/backend/replication/logical/worker.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c index a23262957acb5..fd11805a44cf9 100644 --- a/src/backend/replication/logical/worker.c +++ b/src/backend/replication/logical/worker.c @@ -109,13 +109,6 @@ * If ever a user needs to be aware of the tri-state value, they can fetch it * from the pg_subscription catalog (see column subtwophasestate). * - * We don't allow to toggle two_phase option of a subscription because it can - * lead to an inconsistent replica. Consider, initially, it was on and we have - * received some prepare then we turn it off, now at commit time the server - * will send the entire transaction data along with the commit. With some more - * analysis, we can allow changing this option from off to on but not sure if - * that alone would be useful. - * * Finally, to avoid problems mentioned in previous paragraphs from any * subsequent (not READY) tablesyncs (need to toggle two_phase option from 'on' * to 'off' and then again back to 'on') there is a restriction for From 0cb5145a32c1a867a157e18493e24930338f5d6f Mon Sep 17 00:00:00 2001 From: Fujii Masao Date: Tue, 24 Jun 2025 14:21:10 +0900 Subject: [PATCH 138/602] doc: Fix incorrect UUID index entry in function documentation. Previously, the UUID functions documentation defined the "UUID" index entry to link to the UUID data type page, even though that entry already exists there. Instead, the UUID functions page should define its own index entry linking to itself. This commit updates the UUID index entry in the UUID functions documentation to point to the correct section, improving navigation and avoiding duplication. Back-patch to all supported versions. Author: Fujii Masao Reviewed-by: Masahiko Sawada Reviewed-by: Daniel Gustafsson Discussion: https://postgr.es/m/f33e0493-5773-4296-87c5-7ce459054cfe@oss.nttdata.com Backpatch-through: 13 --- doc/src/sgml/func.sgml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index a6d79765c1a73..224d4fe5a9f95 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -14384,7 +14384,7 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple UUID Functions - + UUID generating From 49fe1c83ecf3474776ea9d0db47ae5644d29b67b Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Tue, 24 Jun 2025 11:30:49 +0200 Subject: [PATCH 139/602] Fix virtual generated column type checking for ALTER TABLE Virtual generated columns have some special checks in CheckAttributeType(), mainly to check that domains are not used. But this check was only applied during CREATE TABLE, not during ALTER TABLE. This fixes that. Reported-by: jian he Discussion: https://www.postgresql.org/message-id/CACJufxE0KHR__-h=zHXbhSNZXMMs4LYo4-dbj8H3YoStYBok1Q@mail.gmail.com --- src/backend/commands/tablecmds.c | 4 ++-- src/test/regress/expected/generated_virtual.out | 6 ++++++ src/test/regress/sql/generated_virtual.sql | 5 +++++ 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index ea96947d81305..074ddb6b9cd17 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -7374,7 +7374,7 @@ ATExecAddColumn(List **wqueue, AlteredTableInfo *tab, Relation rel, /* make sure datatype is legal for a column */ CheckAttributeType(NameStr(attribute->attname), attribute->atttypid, attribute->attcollation, list_make1_oid(rel->rd_rel->reltype), - 0); + (attribute->attgenerated == ATTRIBUTE_GENERATED_VIRTUAL ? CHKATYPE_IS_VIRTUAL : 0)); InsertPgAttributeTuples(attrdesc, tupdesc, myrelid, NULL, NULL); @@ -14426,7 +14426,7 @@ ATPrepAlterColumnType(List **wqueue, /* make sure datatype is legal for a column */ CheckAttributeType(colName, targettype, targetcollid, list_make1_oid(rel->rd_rel->reltype), - 0); + (attTup->attgenerated == ATTRIBUTE_GENERATED_VIRTUAL ? CHKATYPE_IS_VIRTUAL : 0)); if (attTup->attgenerated == ATTRIBUTE_GENERATED_VIRTUAL) { diff --git a/src/test/regress/expected/generated_virtual.out b/src/test/regress/expected/generated_virtual.out index 6300e7c1d96e1..ab35a77477445 100644 --- a/src/test/regress/expected/generated_virtual.out +++ b/src/test/regress/expected/generated_virtual.out @@ -800,6 +800,12 @@ CREATE TABLE gtest24r (a int PRIMARY KEY, b gtestdomain1range GENERATED ALWAYS A ERROR: virtual generated column "b" cannot have a domain type --INSERT INTO gtest24r (a) VALUES (4); -- ok --INSERT INTO gtest24r (a) VALUES (6); -- error +CREATE TABLE gtest24at (a int PRIMARY KEY); +ALTER TABLE gtest24at ADD COLUMN b gtestdomain1 GENERATED ALWAYS AS (a * 2) VIRTUAL; -- error +ERROR: virtual generated column "b" cannot have a domain type +CREATE TABLE gtest24ata (a int PRIMARY KEY, b int GENERATED ALWAYS AS (a * 2) VIRTUAL); +ALTER TABLE gtest24ata ALTER COLUMN b TYPE gtestdomain1; -- error +ERROR: virtual generated column "b" cannot have a domain type CREATE DOMAIN gtestdomainnn AS int CHECK (VALUE IS NOT NULL); CREATE TABLE gtest24nn (a int, b gtestdomainnn GENERATED ALWAYS AS (a * 2) VIRTUAL); ERROR: virtual generated column "b" cannot have a domain type diff --git a/src/test/regress/sql/generated_virtual.sql b/src/test/regress/sql/generated_virtual.sql index b4eedeee2fb27..9011c9d26745f 100644 --- a/src/test/regress/sql/generated_virtual.sql +++ b/src/test/regress/sql/generated_virtual.sql @@ -453,6 +453,11 @@ CREATE TABLE gtest24r (a int PRIMARY KEY, b gtestdomain1range GENERATED ALWAYS A --INSERT INTO gtest24r (a) VALUES (4); -- ok --INSERT INTO gtest24r (a) VALUES (6); -- error +CREATE TABLE gtest24at (a int PRIMARY KEY); +ALTER TABLE gtest24at ADD COLUMN b gtestdomain1 GENERATED ALWAYS AS (a * 2) VIRTUAL; -- error +CREATE TABLE gtest24ata (a int PRIMARY KEY, b int GENERATED ALWAYS AS (a * 2) VIRTUAL); +ALTER TABLE gtest24ata ALTER COLUMN b TYPE gtestdomain1; -- error + CREATE DOMAIN gtestdomainnn AS int CHECK (VALUE IS NOT NULL); CREATE TABLE gtest24nn (a int, b gtestdomainnn GENERATED ALWAYS AS (a * 2) VIRTUAL); --INSERT INTO gtest24nn (a) VALUES (4); -- ok From 054beebb7c9fb94ed17bda9381dbd204f32adf42 Mon Sep 17 00:00:00 2001 From: Daniel Gustafsson Date: Tue, 24 Jun 2025 11:49:37 +0200 Subject: [PATCH 140/602] doc: Remove dead link to NewbieDoc Docbook Guide The link returns 404 and no replacement is available in the project on Sourceforge where the content once was. Since we already link to resources for both beginner and experienced docs hackers, remove the the dead link. Backpatch to all supported versions as the link was added in 8.1. Author: Daniel Gustafsson Reviewed-by: Magnus Hagander Reviewed-by: Michael Paquier Reported-by: jian he Discussion: https://postgr.es/m/CACJufxH=YzQPDOe+2WuYZ7seD-BOyjCBmP6JiErpoSiVZWDRnw@mail.gmail.com Backpatch-through: 13 --- doc/src/sgml/docguide.sgml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/doc/src/sgml/docguide.sgml b/doc/src/sgml/docguide.sgml index db4bcce56eac6..7b61b4841aa03 100644 --- a/doc/src/sgml/docguide.sgml +++ b/doc/src/sgml/docguide.sgml @@ -60,9 +60,7 @@ maintained by the OASIS group. The official DocBook site has good introductory and reference documentation and - a complete O'Reilly book for your online reading pleasure. The - - NewbieDoc Docbook Guide is very helpful for beginners. + a complete O'Reilly book for your online reading pleasure. The FreeBSD Documentation Project also uses DocBook and has some good information, including a number of style guidelines that might be From 303ba0573ce656b98620133cd17418dcd217318f Mon Sep 17 00:00:00 2001 From: Melanie Plageman Date: Tue, 24 Jun 2025 09:20:16 -0400 Subject: [PATCH 141/602] Test that vacuum removes tuples older than OldestXmin If vacuum fails to prune a tuple killed before OldestXmin, it will decide to freeze its xmax and later error out in pre-freeze checks. Add a test reproducing this scenario to the recovery suite which creates a table on a primary, updates the table to generate dead tuples for vacuum, and then, during the vacuum, uses a replica to force GlobalVisState->maybe_needed on the primary to move backwards and precede the value of OldestXmin set at the beginning of vacuuming the table. This test is coverage for a case fixed in 83c39a1f7f3. The test was originally committed to master in aa607980aee but later reverted in efcbb76efe4 due to test instability. The test requires multiple index passes. In Postgres 17+, vacuum uses a TID store for the dead TIDs that is very space efficient. With the old minimum maintenance_work_mem of 1 MB, it required a large number of dead rows to generate enough dead TIDs to force multiple index vacuuming passes. Once the source code changes were made to allow a minimum maintenance_work_mem value of 64kB, the test could be made much faster and more stable. Author: Melanie Plageman Reviewed-by: John Naylor Reviewed-by: Peter Geoghegan Discussion: https://postgr.es/m/CAAKRu_ZJBkidusDut6i%3DbDCiXzJEp93GC1%2BNFaZt4eqanYF3Kw%40mail.gmail.com Backpatch-through: 17 --- src/test/recovery/meson.build | 3 +- .../recovery/t/048_vacuum_horizon_floor.pl | 278 ++++++++++++++++++ 2 files changed, 280 insertions(+), 1 deletion(-) create mode 100644 src/test/recovery/t/048_vacuum_horizon_floor.pl diff --git a/src/test/recovery/meson.build b/src/test/recovery/meson.build index 28fd9ae8dda0d..6e78ff1a030b3 100644 --- a/src/test/recovery/meson.build +++ b/src/test/recovery/meson.build @@ -54,7 +54,8 @@ tests += { 't/043_no_contrecord_switch.pl', 't/044_invalidate_inactive_slots.pl', 't/045_archive_restartpoint.pl', - 't/047_checkpoint_physical_slot.pl' + 't/047_checkpoint_physical_slot.pl', + 't/048_vacuum_horizon_floor.pl' ], }, } diff --git a/src/test/recovery/t/048_vacuum_horizon_floor.pl b/src/test/recovery/t/048_vacuum_horizon_floor.pl new file mode 100644 index 0000000000000..d48a6ef7e0f24 --- /dev/null +++ b/src/test/recovery/t/048_vacuum_horizon_floor.pl @@ -0,0 +1,278 @@ +use strict; +use warnings; +use PostgreSQL::Test::Cluster; +use Test::More; + +# Test that vacuum prunes away all dead tuples killed before OldestXmin +# +# This test creates a table on a primary, updates the table to generate dead +# tuples for vacuum, and then, during the vacuum, uses the replica to force +# GlobalVisState->maybe_needed on the primary to move backwards and precede +# the value of OldestXmin set at the beginning of vacuuming the table. + +# Set up nodes +my $node_primary = PostgreSQL::Test::Cluster->new('primary'); +$node_primary->init(allows_streaming => 'physical'); + +# io_combine_limit is set to 1 to avoid pinning more than one buffer at a time +# to ensure test determinism. +$node_primary->append_conf( + 'postgresql.conf', qq[ +hot_standby_feedback = on +autovacuum = off +log_min_messages = INFO +maintenance_work_mem = 64 +io_combine_limit = 1 +]); +$node_primary->start; + +my $node_replica = PostgreSQL::Test::Cluster->new('standby'); + +$node_primary->backup('my_backup'); +$node_replica->init_from_backup($node_primary, 'my_backup', + has_streaming => 1); + +$node_replica->start; + +my $test_db = "test_db"; +$node_primary->safe_psql('postgres', "CREATE DATABASE $test_db"); + +# Save the original connection info for later use +my $orig_conninfo = $node_primary->connstr(); + +my $table1 = "vac_horizon_floor_table"; + +# Long-running Primary Session A +my $psql_primaryA = + $node_primary->background_psql($test_db, on_error_stop => 1); + +# Long-running Primary Session B +my $psql_primaryB = + $node_primary->background_psql($test_db, on_error_stop => 1); + +# Our test relies on two rounds of index vacuuming for reasons elaborated +# later. To trigger two rounds of index vacuuming, we must fill up the +# TIDStore with dead items partway through a vacuum of the table. The number +# of rows is just enough to ensure we exceed maintenance_work_mem on all +# supported platforms, while keeping test runtime as short as we can. +my $nrows = 2000; + +# Because vacuum's first pass, pruning, is where we use the GlobalVisState to +# check tuple visibility, GlobalVisState->maybe_needed must move backwards +# during pruning before checking the visibility for a tuple which would have +# been considered HEAPTUPLE_DEAD prior to maybe_needed moving backwards but +# HEAPTUPLE_RECENTLY_DEAD compared to the new, older value of maybe_needed. +# +# We must not only force the horizon on the primary to move backwards but also +# force the vacuuming backend's GlobalVisState to be updated. GlobalVisState +# is forced to update during index vacuuming. +# +# _bt_pendingfsm_finalize() calls GetOldestNonRemovableTransactionId() at the +# end of a round of index vacuuming, updating the backend's GlobalVisState +# and, in our case, moving maybe_needed backwards. +# +# Then vacuum's first (pruning) pass will continue and pruning will find our +# later inserted and updated tuple HEAPTUPLE_RECENTLY_DEAD when compared to +# maybe_needed but HEAPTUPLE_DEAD when compared to OldestXmin. +# +# Thus, we must force at least two rounds of index vacuuming to ensure that +# some tuple visibility checks will happen after a round of index vacuuming. +# To accomplish this, we set maintenance_work_mem to its minimum value and +# insert and delete enough rows that we force at least one round of index +# vacuuming before getting to a dead tuple which was killed after the standby +# is disconnected. +$node_primary->safe_psql($test_db, qq[ + CREATE TABLE ${table1}(col1 int) + WITH (autovacuum_enabled=false, fillfactor=10); + INSERT INTO $table1 VALUES(7); + INSERT INTO $table1 SELECT generate_series(1, $nrows) % 3; + CREATE INDEX on ${table1}(col1); + DELETE FROM $table1 WHERE col1 = 0; + INSERT INTO $table1 VALUES(7); +]); + +# We will later move the primary forward while the standby is disconnected. +# For now, however, there is no reason not to wait for the standby to catch +# up. +my $primary_lsn = $node_primary->lsn('flush'); +$node_primary->wait_for_catchup($node_replica, 'replay', $primary_lsn); + +# Test that the WAL receiver is up and running. +$node_replica->poll_query_until($test_db, qq[ + SELECT EXISTS (SELECT * FROM pg_stat_wal_receiver);] , 't'); + +# Set primary_conninfo to something invalid on the replica and reload the +# config. Once the config is reloaded, the startup process will force the WAL +# receiver to restart and it will be unable to reconnect because of the +# invalid connection information. +$node_replica->safe_psql($test_db, qq[ + ALTER SYSTEM SET primary_conninfo = ''; + SELECT pg_reload_conf(); + ]); + +# Wait until the WAL receiver has shut down and been unable to start up again. +$node_replica->poll_query_until($test_db, qq[ + SELECT EXISTS (SELECT * FROM pg_stat_wal_receiver);] , 'f'); + +# Now insert and update a tuple which will be visible to the vacuum on the +# primary but which will have xmax newer than the oldest xmin on the standby +# that was recently disconnected. +my $res = $psql_primaryA->query_safe( + qq[ + INSERT INTO $table1 VALUES (99); + UPDATE $table1 SET col1 = 100 WHERE col1 = 99; + SELECT 'after_update'; + ] + ); + +# Make sure the UPDATE finished +like($res, qr/^after_update$/m, "UPDATE occurred on primary session A"); + +# Open a cursor on the primary whose pin will keep VACUUM from getting a +# cleanup lock on the first page of the relation. We want VACUUM to be able to +# start, calculate initial values for OldestXmin and GlobalVisState and then +# be unable to proceed with pruning our dead tuples. This will allow us to +# reconnect the standby and push the horizon back before we start actual +# pruning and vacuuming. +my $primary_cursor1 = "vac_horizon_floor_cursor1"; + +# The first value inserted into the table was a 7, so FETCH FORWARD should +# return a 7. That's how we know the cursor has a pin. +# Disable index scans so the cursor pins heap pages and not index pages. +$res = $psql_primaryB->query_safe( + qq[ + BEGIN; + SET enable_bitmapscan = off; + SET enable_indexscan = off; + SET enable_indexonlyscan = off; + DECLARE $primary_cursor1 CURSOR FOR SELECT * FROM $table1 WHERE col1 = 7; + FETCH $primary_cursor1; + ] + ); + +is($res, 7, qq[Cursor query returned $res. Expected value 7.]); + +# Get the PID of the session which will run the VACUUM FREEZE so that we can +# use it to filter pg_stat_activity later. +my $vacuum_pid = $psql_primaryA->query_safe("SELECT pg_backend_pid();"); + +# Now start a VACUUM FREEZE on the primary. It will call vacuum_get_cutoffs() +# and establish values of OldestXmin and GlobalVisState which are newer than +# all of our dead tuples. Then it will be unable to get a cleanup lock to +# start pruning, so it will hang. +# +# We use VACUUM FREEZE because it will wait for a cleanup lock instead of +# skipping the page pinned by the cursor. Note that works because the target +# tuple's xmax precedes OldestXmin which ensures that lazy_scan_noprune() will +# return false and we will wait for the cleanup lock. +# +# Disable any prefetching, parallelism, or other concurrent I/O by vacuum. The +# pages of the heap must be processed in order by a single worker to ensure +# test stability (PARALLEL 0 shouldn't be necessary but guards against the +# possibility of parallel heap vacuuming). +$psql_primaryA->{stdin} .= qq[ + SET maintenance_io_concurrency = 0; + VACUUM (VERBOSE, FREEZE, PARALLEL 0) $table1; + \\echo VACUUM + ]; + +# Make sure the VACUUM command makes it to the server. +$psql_primaryA->{run}->pump_nb(); + +# Make sure that the VACUUM has already called vacuum_get_cutoffs() and is +# just waiting on the lock to start vacuuming. We don't want the standby to +# re-establish a connection to the primary and push the horizon back until +# we've saved initial values in GlobalVisState and calculated OldestXmin. +$node_primary->poll_query_until($test_db, + qq[ + SELECT count(*) >= 1 FROM pg_stat_activity + WHERE pid = $vacuum_pid + AND wait_event = 'BufferPin'; + ], + 't'); + +# Ensure the WAL receiver is still not active on the replica. +$node_replica->poll_query_until($test_db, qq[ + SELECT EXISTS (SELECT * FROM pg_stat_wal_receiver);] , 'f'); + +# Allow the WAL receiver connection to re-establish. +$node_replica->safe_psql( + $test_db, qq[ + ALTER SYSTEM SET primary_conninfo = '$orig_conninfo'; + SELECT pg_reload_conf(); + ]); + +# Ensure the new WAL receiver has connected. +$node_replica->poll_query_until($test_db, qq[ + SELECT EXISTS (SELECT * FROM pg_stat_wal_receiver);] , 't'); + +# Once the WAL sender is shown on the primary, the replica should have +# connected with the primary and pushed the horizon backward. Primary Session +# A won't see that until the VACUUM FREEZE proceeds and does its first round +# of index vacuuming. +$node_primary->poll_query_until($test_db, qq[ + SELECT EXISTS (SELECT * FROM pg_stat_replication);] , 't'); + +# Move the cursor forward to the next 7. We inserted the 7 much later, so +# advancing the cursor should allow vacuum to proceed vacuuming most pages of +# the relation. Because we set maintanence_work_mem sufficiently low, we +# expect that a round of index vacuuming has happened and that the vacuum is +# now waiting for the cursor to release its pin on the last page of the +# relation. +$res = $psql_primaryB->query_safe("FETCH $primary_cursor1"); +is($res, 7, + qq[Cursor query returned $res from second fetch. Expected value 7.]); + +# Prevent the test from incorrectly passing by confirming that we did indeed +# do a pass of index vacuuming. +$node_primary->poll_query_until($test_db, qq[ + SELECT index_vacuum_count > 0 + FROM pg_stat_progress_vacuum + WHERE datname='$test_db' AND relid::regclass = '$table1'::regclass; + ] , 't'); + +# Commit the transaction with the open cursor so that the VACUUM can finish. +$psql_primaryB->query_until( + qr/^commit$/m, + qq[ + COMMIT; + \\echo commit + ] + ); + +# VACUUM proceeds with pruning and does a visibility check on each tuple. In +# older versions of Postgres, pruning found our final dead tuple +# non-removable (HEAPTUPLE_RECENTLY_DEAD) since its xmax is after the new +# value of maybe_needed. Then heap_prepare_freeze_tuple() would decide the +# tuple xmax should be frozen because it precedes OldestXmin. Vacuum would +# then error out in heap_pre_freeze_checks() with "cannot freeze committed +# xmax". This was fixed by changing pruning to find all +# HEAPTUPLE_RECENTLY_DEAD tuples with xmaxes preceding OldestXmin +# HEAPTUPLE_DEAD and removing them. + +# With the fix, VACUUM should finish successfully, incrementing the table +# vacuum_count. +$node_primary->poll_query_until($test_db, + qq[ + SELECT vacuum_count > 0 + FROM pg_stat_all_tables WHERE relname = '${table1}'; + ] + , 't'); + +$primary_lsn = $node_primary->lsn('flush'); + +# Make sure something causes us to flush +$node_primary->safe_psql($test_db, "INSERT INTO $table1 VALUES (1);"); + +# Nothing on the replica should cause a recovery conflict, so this should +# finish successfully. +$node_primary->wait_for_catchup($node_replica, 'replay', $primary_lsn); + +## Shut down psqls +$psql_primaryA->quit; +$psql_primaryB->quit; + +$node_replica->stop(); +$node_primary->stop(); + +done_testing(); From debad29d22152d7fe4c4e671090e20238647c460 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Herrera?= Date: Tue, 24 Jun 2025 19:36:12 +0200 Subject: [PATCH 142/602] Improve jumble squashing through CoerceViaIO and RelabelType There's no principled reason for query jumbling to only remove the first layer of RelabelType and CoerceViaIO. Change it to see through as many layers as there are. --- .../pg_stat_statements/expected/squashing.out | 40 +++++++++++-------- contrib/pg_stat_statements/sql/squashing.sql | 9 +++-- src/backend/nodes/queryjumblefuncs.c | 27 ++++++++----- 3 files changed, 45 insertions(+), 31 deletions(-) diff --git a/contrib/pg_stat_statements/expected/squashing.out b/contrib/pg_stat_statements/expected/squashing.out index 7b935d464ecff..e978564ad7299 100644 --- a/contrib/pg_stat_statements/expected/squashing.out +++ b/contrib/pg_stat_statements/expected/squashing.out @@ -645,7 +645,7 @@ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 (2 rows) --- Multiple CoerceViaIO wrapping a constant. Will not squash +-- Multiple CoerceViaIO are squashed SELECT pg_stat_statements_reset() IS NOT NULL AS t; t --- @@ -661,10 +661,10 @@ SELECT WHERE 1 = ANY(ARRAY[1::text::int::text::int, 1::text::int::text::int]); (1 row) SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; - query | calls --------------------------------------------------------------------------+------- - SELECT WHERE $1 IN ($2::text::int::text::int, $3::text::int::text::int) | 2 - SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 + query | calls +----------------------------------------------------+------- + SELECT WHERE $1 IN ($2 /*, ... */) | 2 + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 (2 rows) -- @@ -676,7 +676,7 @@ SELECT pg_stat_statements_reset() IS NOT NULL AS t; t (1 row) --- if there is only one level of RelabelType, the list will be squashable +-- However many layers of RelabelType there are, the list will be squashable. SELECT * FROM test_squash WHERE id IN (1::oid, 2::oid, 3::oid, 4::oid, 5::oid, 6::oid, 7::oid, 8::oid, 9::oid); id | data @@ -689,8 +689,6 @@ SELECT ARRAY[1::oid, 2::oid, 3::oid, 4::oid, 5::oid, 6::oid, 7::oid, 8::oid, 9:: {1,2,3,4,5,6,7,8,9} (1 row) --- if there is at least one element with multiple levels of RelabelType, --- the list will not be squashable SELECT * FROM test_squash WHERE id IN (1::oid, 2::oid::int::oid); id | data ----+------ @@ -701,15 +699,25 @@ SELECT * FROM test_squash WHERE id = ANY(ARRAY[1::oid, 2::oid::int::oid]); ----+------ (0 rows) +-- RelabelType together with CoerceViaIO is also squashable +SELECT * FROM test_squash WHERE id = ANY(ARRAY[1::oid::text::int::oid, 2::oid::int::oid]); + id | data +----+------ +(0 rows) + +SELECT * FROM test_squash WHERE id = ANY(ARRAY[1::text::int::oid, 2::oid::int::oid]); + id | data +----+------ +(0 rows) + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; - query | calls ---------------------------------------------------------------------+------- - SELECT * FROM test_squash WHERE id IN +| 1 - ($1 /*, ... */) | - SELECT * FROM test_squash WHERE id IN ($1::oid, $2::oid::int::oid) | 2 - SELECT ARRAY[$1 /*, ... */] | 1 - SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 -(4 rows) + query | calls +----------------------------------------------------+------- + SELECT * FROM test_squash WHERE id IN +| 5 + ($1 /*, ... */) | + SELECT ARRAY[$1 /*, ... */] | 1 + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 +(3 rows) -- -- edge cases diff --git a/contrib/pg_stat_statements/sql/squashing.sql b/contrib/pg_stat_statements/sql/squashing.sql index bd3243ec9cd85..946e149831c94 100644 --- a/contrib/pg_stat_statements/sql/squashing.sql +++ b/contrib/pg_stat_statements/sql/squashing.sql @@ -234,7 +234,7 @@ SELECT * FROM test_squash_jsonb WHERE data = ANY(ARRAY (SELECT '"10"')::jsonb]); SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; --- Multiple CoerceViaIO wrapping a constant. Will not squash +-- Multiple CoerceViaIO are squashed SELECT pg_stat_statements_reset() IS NOT NULL AS t; SELECT WHERE 1 IN (1::text::int::text::int, 1::text::int::text::int); SELECT WHERE 1 = ANY(ARRAY[1::text::int::text::int, 1::text::int::text::int]); @@ -245,14 +245,15 @@ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; -- SELECT pg_stat_statements_reset() IS NOT NULL AS t; --- if there is only one level of RelabelType, the list will be squashable +-- However many layers of RelabelType there are, the list will be squashable. SELECT * FROM test_squash WHERE id IN (1::oid, 2::oid, 3::oid, 4::oid, 5::oid, 6::oid, 7::oid, 8::oid, 9::oid); SELECT ARRAY[1::oid, 2::oid, 3::oid, 4::oid, 5::oid, 6::oid, 7::oid, 8::oid, 9::oid]; --- if there is at least one element with multiple levels of RelabelType, --- the list will not be squashable SELECT * FROM test_squash WHERE id IN (1::oid, 2::oid::int::oid); SELECT * FROM test_squash WHERE id = ANY(ARRAY[1::oid, 2::oid::int::oid]); +-- RelabelType together with CoerceViaIO is also squashable +SELECT * FROM test_squash WHERE id = ANY(ARRAY[1::oid::text::int::oid, 2::oid::int::oid]); +SELECT * FROM test_squash WHERE id = ANY(ARRAY[1::text::int::oid, 2::oid::int::oid]); SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; -- diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c index fb33e6931ada3..62e3a677cd191 100644 --- a/src/backend/nodes/queryjumblefuncs.c +++ b/src/backend/nodes/queryjumblefuncs.c @@ -414,7 +414,7 @@ RecordConstLocation(JumbleState *jstate, int location, int len) * Subroutine for _jumbleElements: Verify a few simple cases where we can * deduce that the expression is a constant: * - * - Ignore a possible wrapping RelabelType and CoerceViaIO. + * - See through any wrapping RelabelType and CoerceViaIO layers. * - If it's a FuncExpr, check that the function is a builtin * cast and its arguments are Const. * - Otherwise test if the expression is a simple Const. @@ -422,14 +422,22 @@ RecordConstLocation(JumbleState *jstate, int location, int len) static bool IsSquashableConstant(Node *element) { - if (IsA(element, RelabelType)) - element = (Node *) ((RelabelType *) element)->arg; - - if (IsA(element, CoerceViaIO)) - element = (Node *) ((CoerceViaIO *) element)->arg; - +restart: switch (nodeTag(element)) { + case T_RelabelType: + /* Unwrap RelabelType */ + element = (Node *) ((RelabelType *) element)->arg; + goto restart; + + case T_CoerceViaIO: + /* Unwrap CoerceViaIO */ + element = (Node *) ((CoerceViaIO *) element)->arg; + goto restart; + + case T_Const: + return true; + case T_FuncExpr: { FuncExpr *func = (FuncExpr *) element; @@ -468,11 +476,8 @@ IsSquashableConstant(Node *element) } default: - if (!IsA(element, Const)) - return false; + return false; } - - return true; } /* From c2da1a5d6325a92d834c9cb036f65d362e4bfc3e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Herrera?= Date: Tue, 24 Jun 2025 19:36:32 +0200 Subject: [PATCH 143/602] Make query jumbling also squash PARAM_EXTERN params Commit 62d712ecfd94 made query jumbling squash lists of Consts as a single element, but there's no reason not to treat PARAM_EXTERN parameters the same. For these purposes, these values are indeed constants for any particular execution of a query. In particular, this should make list squashing more useful for applications using extended query protocol, which would use parameters extensively. A complication arises: if a query has both external parameters and squashable lists, then the parameter number used as placeholder for the squashed list might be inconsistent with regards to the parameter numbers used by the query literal. To reduce the surprise factor, all parameters are renumbered starting from 1 in that case. Author: Sami Imseih Author: Dmitry Dolgov <9erthalion6@gmail.com> Reviewed-by: Michael Paquier Discussion: https://postgr.es/m/CAA5RZ0tRXoPG2y6bMgBCWNDt0Tn=unRerbzYM=oW0syi1=C1OA@mail.gmail.com --- .../pg_stat_statements/expected/extended.out | 60 +++++-- .../pg_stat_statements/expected/squashing.out | 26 +-- .../pg_stat_statements/pg_stat_statements.c | 10 ++ contrib/pg_stat_statements/sql/extended.sql | 11 +- contrib/pg_stat_statements/sql/squashing.sql | 4 +- src/backend/nodes/queryjumblefuncs.c | 160 +++++++++++------- src/include/nodes/primnodes.h | 6 +- src/include/nodes/queryjumble.h | 19 ++- 8 files changed, 198 insertions(+), 98 deletions(-) diff --git a/contrib/pg_stat_statements/expected/extended.out b/contrib/pg_stat_statements/expected/extended.out index 7da308ba84f4f..1bfd0c1ca242f 100644 --- a/contrib/pg_stat_statements/expected/extended.out +++ b/contrib/pg_stat_statements/expected/extended.out @@ -69,13 +69,13 @@ SELECT calls, rows, query FROM pg_stat_statements ORDER BY query COLLATE "C"; (4 rows) -- Various parameter numbering patterns +-- Unique query IDs with parameter numbers switched. SELECT pg_stat_statements_reset() IS NOT NULL AS t; t --- t (1 row) --- Unique query IDs with parameter numbers switched. SELECT WHERE ($1::int, 7) IN ((8, $2::int), ($3::int, 9)) \bind '1' '2' '3' \g -- (0 rows) @@ -96,7 +96,24 @@ SELECT WHERE $3::int IN ($1::int, $2::int) \bind '1' '2' '3' \g -- (0 rows) +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +--------------------------------------------------------------+------- + SELECT WHERE $1::int IN ($2 /*, ... */) | 1 + SELECT WHERE $1::int IN ($2 /*, ... */) | 1 + SELECT WHERE $1::int IN ($2 /*, ... */) | 1 + SELECT WHERE ($1::int, $4) IN (($5, $2::int), ($3::int, $6)) | 1 + SELECT WHERE ($2::int, $4) IN (($5, $3::int), ($1::int, $6)) | 1 + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 +(6 rows) + -- Two groups of two queries with the same query ID. +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + SELECT WHERE '1'::int IN ($1::int, '2'::int) \bind '1' \g -- (1 row) @@ -114,15 +131,34 @@ SELECT WHERE $2::int IN ($1::int, '2'::int) \bind '3' '4' \g (0 rows) SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; - query | calls ---------------------------------------------------------------+------- - SELECT WHERE $1::int IN ($2::int, $3::int) | 1 - SELECT WHERE $2::int IN ($1::int, $3::int) | 2 - SELECT WHERE $2::int IN ($1::int, $3::int) | 2 - SELECT WHERE $2::int IN ($3::int, $1::int) | 1 - SELECT WHERE $3::int IN ($1::int, $2::int) | 1 - SELECT WHERE ($1::int, $4) IN (($5, $2::int), ($3::int, $6)) | 1 - SELECT WHERE ($2::int, $4) IN (($5, $3::int), ($1::int, $6)) | 1 - SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 -(8 rows) + query | calls +----------------------------------------------------+------- + SELECT WHERE $1::int IN ($2 /*, ... */) | 2 + SELECT WHERE $1::int IN ($2 /*, ... */) | 2 + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 +(3 rows) + +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +-- no squashable list, the parameters id's are kept as-is +SELECT WHERE $3 = $1 AND $2 = $4 \bind 1 2 1 2 \g +-- +(1 row) + +-- squashable list, so the parameter IDs will be re-assigned +SELECT WHERE 1 IN (1, 2, 3) AND $3 = $1 AND $2 = $4 \bind 1 2 1 2 \g +-- +(1 row) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------+------- + SELECT WHERE $1 IN ($2 /*, ... */) AND $3 = $4 AND $5 = $6 | 1 + SELECT WHERE $3 = $1 AND $2 = $4 | 1 + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 +(3 rows) diff --git a/contrib/pg_stat_statements/expected/squashing.out b/contrib/pg_stat_statements/expected/squashing.out index e978564ad7299..f952f47ef7be1 100644 --- a/contrib/pg_stat_statements/expected/squashing.out +++ b/contrib/pg_stat_statements/expected/squashing.out @@ -103,7 +103,7 @@ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 (2 rows) --- external parameters will not be squashed +-- external parameters will be squashed SELECT pg_stat_statements_reset() IS NOT NULL AS t; t --- @@ -123,14 +123,14 @@ SELECT * FROM test_squash WHERE id::text = ANY(ARRAY[$1, $2, $3, $4, $5]) \bind (0 rows) SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; - query | calls ----------------------------------------------------------------------------+------- - SELECT * FROM test_squash WHERE id IN ($1, $2, $3, $4, $5) | 1 - SELECT * FROM test_squash WHERE id::text = ANY(ARRAY[$1, $2, $3, $4, $5]) | 1 - SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 + query | calls +----------------------------------------------------------------------+------- + SELECT * FROM test_squash WHERE id IN ($1 /*, ... */) | 1 + SELECT * FROM test_squash WHERE id::text = ANY(ARRAY[$1 /*, ... */]) | 1 + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 (3 rows) --- neither are prepared statements +-- prepared statements will also be squashed -- the IN and ARRAY forms of this statement will have the same queryId SELECT pg_stat_statements_reset() IS NOT NULL AS t; t @@ -155,12 +155,12 @@ EXECUTE p1(1, 2, 3, 4, 5); DEALLOCATE p1; SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; - query | calls -------------------------------------------------------------+------- - DEALLOCATE $1 | 2 - PREPARE p1(int, int, int, int, int) AS +| 2 - SELECT * FROM test_squash WHERE id IN ($1, $2, $3, $4, $5) | - SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 + query | calls +-------------------------------------------------------+------- + DEALLOCATE $1 | 2 + PREPARE p1(int, int, int, int, int) AS +| 2 + SELECT * FROM test_squash WHERE id IN ($1 /*, ... */) | + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 (3 rows) -- More conditions in the query diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c index ecc7f2fb2663f..5597fcaaa053d 100644 --- a/contrib/pg_stat_statements/pg_stat_statements.c +++ b/contrib/pg_stat_statements/pg_stat_statements.c @@ -2841,6 +2841,16 @@ generate_normalized_query(JumbleState *jstate, const char *query, int off, /* Offset from start for cur tok */ tok_len; /* Length (in bytes) of that tok */ + /* + * If we have an external param at this location, but no lists are + * being squashed across the query, then we skip here; this will make + * us print print the characters found in the original query that + * represent the parameter in the next iteration (or after the loop is + * done), which is a bit odd but seems to work okay in most cases. + */ + if (jstate->clocations[i].extern_param && !jstate->has_squashed_lists) + continue; + off = jstate->clocations[i].location; /* Adjust recorded location if we're dealing with partial string */ diff --git a/contrib/pg_stat_statements/sql/extended.sql b/contrib/pg_stat_statements/sql/extended.sql index a366658a53a72..9a6518e2f0487 100644 --- a/contrib/pg_stat_statements/sql/extended.sql +++ b/contrib/pg_stat_statements/sql/extended.sql @@ -21,17 +21,26 @@ SELECT $1 \bind 'unnamed_val1' \g SELECT calls, rows, query FROM pg_stat_statements ORDER BY query COLLATE "C"; -- Various parameter numbering patterns -SELECT pg_stat_statements_reset() IS NOT NULL AS t; -- Unique query IDs with parameter numbers switched. +SELECT pg_stat_statements_reset() IS NOT NULL AS t; SELECT WHERE ($1::int, 7) IN ((8, $2::int), ($3::int, 9)) \bind '1' '2' '3' \g SELECT WHERE ($2::int, 10) IN ((11, $3::int), ($1::int, 12)) \bind '1' '2' '3' \g SELECT WHERE $1::int IN ($2::int, $3::int) \bind '1' '2' '3' \g SELECT WHERE $2::int IN ($3::int, $1::int) \bind '1' '2' '3' \g SELECT WHERE $3::int IN ($1::int, $2::int) \bind '1' '2' '3' \g +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; -- Two groups of two queries with the same query ID. +SELECT pg_stat_statements_reset() IS NOT NULL AS t; SELECT WHERE '1'::int IN ($1::int, '2'::int) \bind '1' \g SELECT WHERE '4'::int IN ($1::int, '5'::int) \bind '2' \g SELECT WHERE $2::int IN ($1::int, '1'::int) \bind '1' '2' \g SELECT WHERE $2::int IN ($1::int, '2'::int) \bind '3' '4' \g +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + +-- no squashable list, the parameters id's are kept as-is +SELECT WHERE $3 = $1 AND $2 = $4 \bind 1 2 1 2 \g +-- squashable list, so the parameter IDs will be re-assigned +SELECT WHERE 1 IN (1, 2, 3) AND $3 = $1 AND $2 = $4 \bind 1 2 1 2 \g SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; diff --git a/contrib/pg_stat_statements/sql/squashing.sql b/contrib/pg_stat_statements/sql/squashing.sql index 946e149831c94..53138d125a92c 100644 --- a/contrib/pg_stat_statements/sql/squashing.sql +++ b/contrib/pg_stat_statements/sql/squashing.sql @@ -32,7 +32,7 @@ SELECT WHERE 1 IN (1, int4(1), int4(2), 2); SELECT WHERE 1 = ANY (ARRAY[1, int4(1), int4(2), 2]); SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; --- external parameters will not be squashed +-- external parameters will be squashed SELECT pg_stat_statements_reset() IS NOT NULL AS t; SELECT * FROM test_squash WHERE id IN ($1, $2, $3, $4, $5) \bind 1 2 3 4 5 ; @@ -40,7 +40,7 @@ SELECT * FROM test_squash WHERE id::text = ANY(ARRAY[$1, $2, $3, $4, $5]) \bind ; SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; --- neither are prepared statements +-- prepared statements will also be squashed -- the IN and ARRAY forms of this statement will have the same queryId SELECT pg_stat_statements_reset() IS NOT NULL AS t; PREPARE p1(int, int, int, int, int) AS diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c index 62e3a677cd191..31f971519772d 100644 --- a/src/backend/nodes/queryjumblefuncs.c +++ b/src/backend/nodes/queryjumblefuncs.c @@ -21,6 +21,11 @@ * tree(s) generated from the query. The executor can then use this value * to blame query costs on the proper queryId. * + * Arrays of two or more constants and PARAM_EXTERN parameters are "squashed" + * and contribute only once to the jumble. This has the effect that queries + * that differ only on the length of such lists have the same queryId. + * + * * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * @@ -61,11 +66,13 @@ static void AppendJumble(JumbleState *jstate, const unsigned char *value, Size size); static void FlushPendingNulls(JumbleState *jstate); static void RecordConstLocation(JumbleState *jstate, + bool extern_param, int location, int len); static void _jumbleNode(JumbleState *jstate, Node *node); +static void _jumbleList(JumbleState *jstate, Node *node); static void _jumbleElements(JumbleState *jstate, List *elements, Node *node); +static void _jumbleParam(JumbleState *jstate, Node *node); static void _jumbleA_Const(JumbleState *jstate, Node *node); -static void _jumbleList(JumbleState *jstate, Node *node); static void _jumbleVariableSetStmt(JumbleState *jstate, Node *node); static void _jumbleRangeTblEntry_eref(JumbleState *jstate, RangeTblEntry *rte, @@ -185,6 +192,7 @@ InitJumble(void) jstate->clocations_count = 0; jstate->highest_extern_param_id = 0; jstate->pending_nulls = 0; + jstate->has_squashed_lists = false; #ifdef USE_ASSERT_CHECKING jstate->total_jumble_len = 0; #endif @@ -207,6 +215,10 @@ DoJumble(JumbleState *jstate, Node *node) if (jstate->pending_nulls > 0) FlushPendingNulls(jstate); + /* Squashed list found, reset highest_extern_param_id */ + if (jstate->has_squashed_lists) + jstate->highest_extern_param_id = 0; + /* Process the jumble buffer and produce the hash value */ return DatumGetInt64(hash_any_extended(jstate->jumble, jstate->jumble_len, @@ -376,14 +388,14 @@ FlushPendingNulls(JumbleState *jstate) * Record the location of some kind of constant within a query string. * These are not only bare constants but also expressions that ultimately * constitute a constant, such as those inside casts and simple function - * calls. + * calls; if extern_param, then it corresponds to a PARAM_EXTERN Param. * * If length is -1, it indicates a single such constant element. If * it's a positive integer, it indicates the length of a squashable * list of them. */ static void -RecordConstLocation(JumbleState *jstate, int location, int len) +RecordConstLocation(JumbleState *jstate, bool extern_param, int location, int len) { /* -1 indicates unknown or undefined location */ if (location >= 0) @@ -406,6 +418,7 @@ RecordConstLocation(JumbleState *jstate, int location, int len) Assert(len > -1 || len == -1); jstate->clocations[jstate->clocations_count].length = len; jstate->clocations[jstate->clocations_count].squashed = (len > -1); + jstate->clocations[jstate->clocations_count].extern_param = extern_param; jstate->clocations_count++; } } @@ -417,7 +430,8 @@ RecordConstLocation(JumbleState *jstate, int location, int len) * - See through any wrapping RelabelType and CoerceViaIO layers. * - If it's a FuncExpr, check that the function is a builtin * cast and its arguments are Const. - * - Otherwise test if the expression is a simple Const. + * - Otherwise test if the expression is a simple Const or a + * PARAM_EXTERN param. */ static bool IsSquashableConstant(Node *element) @@ -438,6 +452,9 @@ IsSquashableConstant(Node *element) case T_Const: return true; + case T_Param: + return castNode(Param, element)->paramkind == PARAM_EXTERN; + case T_FuncExpr: { FuncExpr *func = (FuncExpr *) element; @@ -487,8 +504,8 @@ IsSquashableConstant(Node *element) * Return value indicates if squashing is possible. * * Note that this function searches only for explicit Const nodes with - * possibly very simple decorations on top, and does not try to simplify - * expressions. + * possibly very simple decorations on top and PARAM_EXTERN parameters, + * and does not try to simplify expressions. */ static bool IsSquashableConstantList(List *elements) @@ -513,7 +530,7 @@ IsSquashableConstantList(List *elements) #define JUMBLE_ELEMENTS(list, node) \ _jumbleElements(jstate, (List *) expr->list, node) #define JUMBLE_LOCATION(location) \ - RecordConstLocation(jstate, expr->location, -1) + RecordConstLocation(jstate, false, expr->location, -1) #define JUMBLE_FIELD(item) \ do { \ if (sizeof(expr->item) == 8) \ @@ -540,42 +557,6 @@ do { \ #include "queryjumblefuncs.funcs.c" -/* - * We try to jumble lists of expressions as one individual item regardless - * of how many elements are in the list. This is know as squashing, which - * results in different queries jumbling to the same query_id, if the only - * difference is the number of elements in the list. - * - * We allow constants to be squashed. To normalize such queries, we use - * the start and end locations of the list of elements in a list. - */ -static void -_jumbleElements(JumbleState *jstate, List *elements, Node *node) -{ - bool normalize_list = false; - - if (IsSquashableConstantList(elements)) - { - if (IsA(node, ArrayExpr)) - { - ArrayExpr *aexpr = (ArrayExpr *) node; - - if (aexpr->list_start > 0 && aexpr->list_end > 0) - { - RecordConstLocation(jstate, - aexpr->list_start + 1, - (aexpr->list_end - aexpr->list_start) - 1); - normalize_list = true; - } - } - } - - if (!normalize_list) - { - _jumbleNode(jstate, (Node *) elements); - } -} - static void _jumbleNode(JumbleState *jstate, Node *node) { @@ -617,26 +598,6 @@ _jumbleNode(JumbleState *jstate, Node *node) break; } - /* Special cases to handle outside the automated code */ - switch (nodeTag(expr)) - { - case T_Param: - { - Param *p = (Param *) node; - - /* - * Update the highest Param id seen, in order to start - * normalization correctly. - */ - if (p->paramkind == PARAM_EXTERN && - p->paramid > jstate->highest_extern_param_id) - jstate->highest_extern_param_id = p->paramid; - } - break; - default: - break; - } - /* Ensure we added something to the jumble buffer */ Assert(jstate->total_jumble_len > prev_jumble_len); } @@ -672,6 +633,79 @@ _jumbleList(JumbleState *jstate, Node *node) } } +/* + * We try to jumble lists of expressions as one individual item regardless + * of how many elements are in the list. This is know as squashing, which + * results in different queries jumbling to the same query_id, if the only + * difference is the number of elements in the list. + * + * We allow constants and PARAM_EXTERN parameters to be squashed. To normalize + * such queries, we use the start and end locations of the list of elements in + * a list. + */ +static void +_jumbleElements(JumbleState *jstate, List *elements, Node *node) +{ + bool normalize_list = false; + + if (IsSquashableConstantList(elements)) + { + if (IsA(node, ArrayExpr)) + { + ArrayExpr *aexpr = (ArrayExpr *) node; + + if (aexpr->list_start > 0 && aexpr->list_end > 0) + { + RecordConstLocation(jstate, + false, + aexpr->list_start + 1, + (aexpr->list_end - aexpr->list_start) - 1); + normalize_list = true; + jstate->has_squashed_lists = true; + } + } + } + + if (!normalize_list) + { + _jumbleNode(jstate, (Node *) elements); + } +} + +/* + * We store the highest param ID of extern params. This can later be used + * to start the numbering of the placeholder for squashed lists. + */ +static void +_jumbleParam(JumbleState *jstate, Node *node) +{ + Param *expr = (Param *) node; + + JUMBLE_FIELD(paramkind); + JUMBLE_FIELD(paramid); + JUMBLE_FIELD(paramtype); + /* paramtypmode and paramcollid are ignored */ + + if (expr->paramkind == PARAM_EXTERN) + { + /* + * At this point, only external parameter locations outside of + * squashable lists will be recorded. + */ + RecordConstLocation(jstate, true, expr->location, -1); + + /* + * Update the highest Param id seen, in order to start normalization + * correctly. + * + * Note: This value is reset at the end of jumbling if there exists a + * squashable list. See the comment in the definition of JumbleState. + */ + if (expr->paramid > jstate->highest_extern_param_id) + jstate->highest_extern_param_id = expr->paramid; + } +} + static void _jumbleA_Const(JumbleState *jstate, Node *node) { diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h index 01510b01b649b..6dfca3cb35ba5 100644 --- a/src/include/nodes/primnodes.h +++ b/src/include/nodes/primnodes.h @@ -389,14 +389,16 @@ typedef enum ParamKind typedef struct Param { + pg_node_attr(custom_query_jumble) + Expr xpr; ParamKind paramkind; /* kind of parameter. See above */ int paramid; /* numeric ID for parameter */ Oid paramtype; /* pg_type OID of parameter's datatype */ /* typmod value, if known */ - int32 paramtypmod pg_node_attr(query_jumble_ignore); + int32 paramtypmod; /* OID of collation, or InvalidOid if none */ - Oid paramcollid pg_node_attr(query_jumble_ignore); + Oid paramcollid; /* token location, or -1 if unknown */ ParseLoc location; } Param; diff --git a/src/include/nodes/queryjumble.h b/src/include/nodes/queryjumble.h index da7c7abed2e6a..dcb36dcb44f2f 100644 --- a/src/include/nodes/queryjumble.h +++ b/src/include/nodes/queryjumble.h @@ -24,11 +24,11 @@ typedef struct LocationLen int location; /* start offset in query text */ int length; /* length in bytes, or -1 to ignore */ - /* - * Indicates that this location represents the beginning or end of a run - * of squashed constants. - */ + /* Does this location represent a squashed list? */ bool squashed; + + /* Is this location a PARAM_EXTERN parameter? */ + bool extern_param; } LocationLen; /* @@ -52,9 +52,18 @@ typedef struct JumbleState /* Current number of valid entries in clocations array */ int clocations_count; - /* highest Param id we've seen, in order to start normalization correctly */ + /* + * ID of the highest PARAM_EXTERN parameter we've seen in the query; used + * to start normalization correctly. However, if there are any squashed + * lists in the query, we disregard query-supplied parameter numbers and + * renumber everything. This is to avoid possible gaps caused by + * squashing in case any params are in squashed lists. + */ int highest_extern_param_id; + /* Whether squashable lists are present */ + bool has_squashed_lists; + /* * Count of the number of NULL nodes seen since last appending a value. * These are flushed out to the jumble buffer before subsequent appends From fd519419c9484a47f068cc04e2db81a4ec661669 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Tue, 24 Jun 2025 14:14:04 -0400 Subject: [PATCH 144/602] Prevent excessive delays before launching new logrep workers. The logical replication launcher process would sometimes sleep for as much as 3 minutes before noticing that it is supposed to launch a new worker. This could happen if (1) WaitForReplicationWorkerAttach absorbed a process latch wakeup that was meant to cause ApplyLauncherMain to do work, or (2) logicalrep_worker_launch reported failure, either because of resource limits or because the new worker terminated immediately. In case (2), the expected behavior is that we retry the launch after wal_retrieve_retry_interval, but that didn't reliably happen. It's not clear how often such conditions would occur in the field, but in our subscription test suite they are somewhat common, especially in tests that exercise cases that cause quick worker failure. That causes the tests to take substantially longer than they ought to do on typical setups. To fix (1), make WaitForReplicationWorkerAttach re-set the latch before returning if it cleared it while looping. To fix (2), ensure that we reduce wait_time to no more than wal_retrieve_retry_interval when logicalrep_worker_launch reports failure. In passing, fix a couple of perhaps-hypothetical race conditions, e.g. examining worker->in_use without a lock. Backpatch to v16. Problem (2) didn't exist before commit 5a3a95385 because the previous code always set wait_time to wal_retrieve_retry_interval when launching a worker, regardless of success or failure of the launch. That behavior also greatly mitigated problem (1), so I'm not excited about adapting the remainder of the patch to the substantially-different code in older branches. Author: Tom Lane Reviewed-by: Amit Kapila Reviewed-by: Ashutosh Bapat Discussion: https://postgr.es/m/817604.1750723007@sss.pgh.pa.us Backpatch-through: 16 --- src/backend/replication/logical/launcher.c | 40 ++++++++++++++++----- src/backend/replication/logical/tablesync.c | 19 ++++++---- 2 files changed, 44 insertions(+), 15 deletions(-) diff --git a/src/backend/replication/logical/launcher.c b/src/backend/replication/logical/launcher.c index 1c3c051403dd6..14d8efbd25bf5 100644 --- a/src/backend/replication/logical/launcher.c +++ b/src/backend/replication/logical/launcher.c @@ -175,12 +175,14 @@ WaitForReplicationWorkerAttach(LogicalRepWorker *worker, uint16 generation, BackgroundWorkerHandle *handle) { - BgwHandleStatus status; - int rc; + bool result = false; + bool dropped_latch = false; for (;;) { + BgwHandleStatus status; pid_t pid; + int rc; CHECK_FOR_INTERRUPTS(); @@ -189,8 +191,9 @@ WaitForReplicationWorkerAttach(LogicalRepWorker *worker, /* Worker either died or has started. Return false if died. */ if (!worker->in_use || worker->proc) { + result = worker->in_use; LWLockRelease(LogicalRepWorkerLock); - return worker->in_use; + break; } LWLockRelease(LogicalRepWorkerLock); @@ -205,7 +208,7 @@ WaitForReplicationWorkerAttach(LogicalRepWorker *worker, if (generation == worker->generation) logicalrep_worker_cleanup(worker); LWLockRelease(LogicalRepWorkerLock); - return false; + break; /* result is already false */ } /* @@ -220,8 +223,18 @@ WaitForReplicationWorkerAttach(LogicalRepWorker *worker, { ResetLatch(MyLatch); CHECK_FOR_INTERRUPTS(); + dropped_latch = true; } } + + /* + * If we had to clear a latch event in order to wait, be sure to restore + * it before exiting. Otherwise caller may miss events. + */ + if (dropped_latch) + SetLatch(MyLatch); + + return result; } /* @@ -1194,10 +1207,21 @@ ApplyLauncherMain(Datum main_arg) (elapsed = TimestampDifferenceMilliseconds(last_start, now)) >= wal_retrieve_retry_interval) { ApplyLauncherSetWorkerStartTime(sub->oid, now); - logicalrep_worker_launch(WORKERTYPE_APPLY, - sub->dbid, sub->oid, sub->name, - sub->owner, InvalidOid, - DSM_HANDLE_INVALID); + if (!logicalrep_worker_launch(WORKERTYPE_APPLY, + sub->dbid, sub->oid, sub->name, + sub->owner, InvalidOid, + DSM_HANDLE_INVALID)) + { + /* + * We get here either if we failed to launch a worker + * (perhaps for resource-exhaustion reasons) or if we + * launched one but it immediately quit. Either way, it + * seems appropriate to try again after + * wal_retrieve_retry_interval. + */ + wait_time = Min(wait_time, + wal_retrieve_retry_interval); + } } else { diff --git a/src/backend/replication/logical/tablesync.c b/src/backend/replication/logical/tablesync.c index 8e1e8762f6258..c90f23ee5b0b2 100644 --- a/src/backend/replication/logical/tablesync.c +++ b/src/backend/replication/logical/tablesync.c @@ -603,14 +603,19 @@ process_syncing_tables_for_apply(XLogRecPtr current_lsn) TimestampDifferenceExceeds(hentry->last_start_time, now, wal_retrieve_retry_interval)) { - logicalrep_worker_launch(WORKERTYPE_TABLESYNC, - MyLogicalRepWorker->dbid, - MySubscription->oid, - MySubscription->name, - MyLogicalRepWorker->userid, - rstate->relid, - DSM_HANDLE_INVALID); + /* + * Set the last_start_time even if we fail to start + * the worker, so that we won't retry until + * wal_retrieve_retry_interval has elapsed. + */ hentry->last_start_time = now; + (void) logicalrep_worker_launch(WORKERTYPE_TABLESYNC, + MyLogicalRepWorker->dbid, + MySubscription->oid, + MySubscription->name, + MyLogicalRepWorker->userid, + rstate->relid, + DSM_HANDLE_INVALID); } } } From 84c4e10e130f411a89a2f9fd9184075ef02b0eaf Mon Sep 17 00:00:00 2001 From: Fujii Masao Date: Wed, 25 Jun 2025 08:59:25 +0900 Subject: [PATCH 145/602] doc: Add secondary index entries for vacuum-related parameters. For parameters that exist as both configuration and storage options, the documentation typically includes secondary index entries to help users distinguish and locate the relevant references easily. However, such index entries were missing for vacuum_truncate and vacuum_max_eager_freeze_failure_rate, both introduced in v18. This commit adds appropriate secondary index terms for these parameters to ensure consistency with other parameters and improve usability of the documentation index. Author: Fujii Masao Discussion: https://postgr.es/m/e95c899a-2aeb-45b7-8fd3-7a27dcdb475b@oss.nttdata.com --- doc/src/sgml/config.sgml | 6 ++++-- doc/src/sgml/ref/create_table.sgml | 3 ++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index b265cc89c9d46..0ac519706a4d6 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -9340,7 +9340,8 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv; vacuum_truncate (boolean) - vacuum_truncate configuration parameter + vacuum_truncate + configuration parameter @@ -9544,7 +9545,8 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv; vacuum_max_eager_freeze_failure_rate (floating point) - vacuum_max_eager_freeze_failure_rate configuration parameter + vacuum_max_eager_freeze_failure_rate + configuration parameter diff --git a/doc/src/sgml/ref/create_table.sgml b/doc/src/sgml/ref/create_table.sgml index a581691818278..9d92e0a35516f 100644 --- a/doc/src/sgml/ref/create_table.sgml +++ b/doc/src/sgml/ref/create_table.sgml @@ -1694,7 +1694,8 @@ WITH ( MODULUS numeric_literal, REM vacuum_truncate, toast.vacuum_truncate (boolean) - vacuum_truncate storage parameter + vacuum_truncate + storage parameter From a9c2bde9295574fc79ce0dea1a2b481c8804c1a5 Mon Sep 17 00:00:00 2001 From: Fujii Masao Date: Wed, 25 Jun 2025 09:01:13 +0900 Subject: [PATCH 146/602] doc: Mention ANALYZE VERBOSE in track_cost_delay_timing description. The documentation for track_cost_delay_timing describes where cost-based vacuum delay timing information is displayed when the setting is enabled. While this information is also shown in the output of ANALYZE VERBOSE, that was previously omitted from the list. This commit updates the documentation to include ANALYZE VERBOSE in the list, clarifying that it also reports cost-based delay timing information. Author: Fujii Masao Discussion: https://postgr.es/m/e95c899a-2aeb-45b7-8fd3-7a27dcdb475b@oss.nttdata.com --- doc/src/sgml/config.sgml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index 0ac519706a4d6..f4ba58bb5c3df 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -8602,7 +8602,8 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv; timing information is displayed in pg_stat_progress_vacuum, pg_stat_progress_analyze, - in the output of when the + in the output of and + when the VERBOSE option is used, and by autovacuum for auto-vacuums and auto-analyzes when is set. From 82015fd9bdc0827c33245a0eef620f854d53b720 Mon Sep 17 00:00:00 2001 From: Fujii Masao Date: Wed, 25 Jun 2025 09:02:31 +0900 Subject: [PATCH 147/602] doc: Fix type description of io_workers GUC for consistency. The documentation previously described the type of the io_workers GUC parameter as "int". However, the documentation consistently uses "integer" for parameters of this type. This commit updates the type description of io_workers to "integer" for consistency with other GUC parameter descriptions. Author: Fujii Masao Discussion: https://postgr.es/m/e95c899a-2aeb-45b7-8fd3-7a27dcdb475b@oss.nttdata.com --- doc/src/sgml/config.sgml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index f4ba58bb5c3df..59a0874528a3a 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -2788,7 +2788,7 @@ include_dir 'conf.d' - io_workers (int) + io_workers (integer) io_workers configuration parameter From 661643dedad97f8b924991fdc739b1f47b0fb60b Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Wed, 25 Jun 2025 10:03:46 +0900 Subject: [PATCH 148/602] Avoid scribbling of VACUUM options This fixes two issues with the handling of VacuumParams in vacuum_rel(). This code path has the idea to change the passed-in pointer of VacuumParams for the "truncate" and "index_cleanup" options for the relation worked on, impacting the two following scenarios where incorrect options may be used because a VacuumParams pointer is shared across multiple relations: - Multiple relations in a single VACUUM command. - TOAST relations vacuumed with their main relation. The problem is avoided by providing to the two callers of vacuum_rel() copies of VacuumParams, before the pointer is updated for the "truncate" and "index_cleanup" options. The refactoring of the VACUUM option and parameters done in 0d831389749a did not introduce an issue, but it has encouraged the problem we are dealing with in this commit, with b84dbc8eb80b for "truncate" and a96c41feec6b for "index_cleanup" that have been added a couple of years after the initial refactoring. HEAD will be improved with a different patch that hardens the uses of VacuumParams across the tree. This cannot be backpatched as it introduces an ABI breakage. The backend portion of the patch has been authored by Nathan, while I have implemented the tests. The tests rely on injection points to check the option values, making them faster, more reliable than the tests originally proposed by Shihao, and they also provide more coverage. This part can only be backpatched down to v17. Reported-by: Shihao Zhong Author: Nathan Bossart Co-authored-by: Michael Paquier Discussion: https://postgr.es/m/CAGRkXqTo+aK=GTy5pSc-9cy8H2F2TJvcrZ-zXEiNJj93np1UUw@mail.gmail.com Backpatch-through: 13 --- src/backend/commands/vacuum.c | 39 +++++- src/test/modules/injection_points/Makefile | 2 +- .../injection_points/expected/vacuum.out | 122 ++++++++++++++++++ src/test/modules/injection_points/meson.build | 1 + .../modules/injection_points/sql/vacuum.sql | 47 +++++++ 5 files changed, 206 insertions(+), 5 deletions(-) create mode 100644 src/test/modules/injection_points/expected/vacuum.out create mode 100644 src/test/modules/injection_points/sql/vacuum.sql diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index 33a33bf6b1cfa..02993d320dafc 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -56,6 +56,7 @@ #include "utils/fmgroids.h" #include "utils/guc.h" #include "utils/guc_hooks.h" +#include "utils/injection_point.h" #include "utils/memutils.h" #include "utils/snapmgr.h" #include "utils/syscache.h" @@ -634,7 +635,15 @@ vacuum(List *relations, VacuumParams *params, BufferAccessStrategy bstrategy, if (params->options & VACOPT_VACUUM) { - if (!vacuum_rel(vrel->oid, vrel->relation, params, bstrategy)) + VacuumParams params_copy; + + /* + * vacuum_rel() scribbles on the parameters, so give it a copy + * to avoid affecting other relations. + */ + memcpy(¶ms_copy, params, sizeof(VacuumParams)); + + if (!vacuum_rel(vrel->oid, vrel->relation, ¶ms_copy, bstrategy)) continue; } @@ -2008,9 +2017,16 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params, Oid save_userid; int save_sec_context; int save_nestlevel; + VacuumParams toast_vacuum_params; Assert(params != NULL); + /* + * This function scribbles on the parameters, so make a copy early to + * avoid affecting the TOAST table (if we do end up recursing to it). + */ + memcpy(&toast_vacuum_params, params, sizeof(VacuumParams)); + /* Begin a transaction for vacuuming this relation */ StartTransactionCommand(); @@ -2191,6 +2207,15 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params, } } +#ifdef USE_INJECTION_POINTS + if (params->index_cleanup == VACOPTVALUE_AUTO) + INJECTION_POINT("vacuum-index-cleanup-auto", NULL); + else if (params->index_cleanup == VACOPTVALUE_DISABLED) + INJECTION_POINT("vacuum-index-cleanup-disabled", NULL); + else if (params->index_cleanup == VACOPTVALUE_ENABLED) + INJECTION_POINT("vacuum-index-cleanup-enabled", NULL); +#endif + /* * Check if the vacuum_max_eager_freeze_failure_rate table storage * parameter was specified. This overrides the GUC value. @@ -2221,6 +2246,15 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params, params->truncate = VACOPTVALUE_DISABLED; } +#ifdef USE_INJECTION_POINTS + if (params->truncate == VACOPTVALUE_AUTO) + INJECTION_POINT("vacuum-truncate-auto", NULL); + else if (params->truncate == VACOPTVALUE_DISABLED) + INJECTION_POINT("vacuum-truncate-disabled", NULL); + else if (params->truncate == VACOPTVALUE_ENABLED) + INJECTION_POINT("vacuum-truncate-enabled", NULL); +#endif + /* * Remember the relation's TOAST relation for later, if the caller asked * us to process it. In VACUUM FULL, though, the toast table is @@ -2299,15 +2333,12 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params, */ if (toast_relid != InvalidOid) { - VacuumParams toast_vacuum_params; - /* * Force VACOPT_PROCESS_MAIN so vacuum_rel() processes it. Likewise, * set toast_parent so that the privilege checks are done on the main * relation. NB: This is only safe to do because we hold a session * lock on the main relation that prevents concurrent deletion. */ - memcpy(&toast_vacuum_params, params, sizeof(VacuumParams)); toast_vacuum_params.options |= VACOPT_PROCESS_MAIN; toast_vacuum_params.toast_parent = relid; diff --git a/src/test/modules/injection_points/Makefile b/src/test/modules/injection_points/Makefile index e680991f8d4f0..fc82cd67f6cd6 100644 --- a/src/test/modules/injection_points/Makefile +++ b/src/test/modules/injection_points/Makefile @@ -11,7 +11,7 @@ EXTENSION = injection_points DATA = injection_points--1.0.sql PGFILEDESC = "injection_points - facility for injection points" -REGRESS = injection_points hashagg reindex_conc +REGRESS = injection_points hashagg reindex_conc vacuum REGRESS_OPTS = --dlpath=$(top_builddir)/src/test/regress ISOLATION = basic inplace syscache-update-pruned diff --git a/src/test/modules/injection_points/expected/vacuum.out b/src/test/modules/injection_points/expected/vacuum.out new file mode 100644 index 0000000000000..58df59fa927e3 --- /dev/null +++ b/src/test/modules/injection_points/expected/vacuum.out @@ -0,0 +1,122 @@ +-- Tests for VACUUM +CREATE EXTENSION injection_points; +SELECT injection_points_set_local(); + injection_points_set_local +---------------------------- + +(1 row) + +SELECT injection_points_attach('vacuum-index-cleanup-auto', 'notice'); + injection_points_attach +------------------------- + +(1 row) + +SELECT injection_points_attach('vacuum-index-cleanup-disabled', 'notice'); + injection_points_attach +------------------------- + +(1 row) + +SELECT injection_points_attach('vacuum-index-cleanup-enabled', 'notice'); + injection_points_attach +------------------------- + +(1 row) + +SELECT injection_points_attach('vacuum-truncate-auto', 'notice'); + injection_points_attach +------------------------- + +(1 row) + +SELECT injection_points_attach('vacuum-truncate-disabled', 'notice'); + injection_points_attach +------------------------- + +(1 row) + +SELECT injection_points_attach('vacuum-truncate-enabled', 'notice'); + injection_points_attach +------------------------- + +(1 row) + +-- Check state of index_cleanup and truncate in VACUUM. +CREATE TABLE vac_tab_on_toast_off(i int, j text) WITH + (autovacuum_enabled=false, + vacuum_index_cleanup=true, toast.vacuum_index_cleanup=false, + vacuum_truncate=true, toast.vacuum_truncate=false); +CREATE TABLE vac_tab_off_toast_on(i int, j text) WITH + (autovacuum_enabled=false, + vacuum_index_cleanup=false, toast.vacuum_index_cleanup=true, + vacuum_truncate=false, toast.vacuum_truncate=true); +-- Multiple relations should use their options in isolation. +VACUUM vac_tab_on_toast_off, vac_tab_off_toast_on; +NOTICE: notice triggered for injection point vacuum-index-cleanup-enabled +NOTICE: notice triggered for injection point vacuum-truncate-enabled +NOTICE: notice triggered for injection point vacuum-index-cleanup-disabled +NOTICE: notice triggered for injection point vacuum-truncate-disabled +NOTICE: notice triggered for injection point vacuum-index-cleanup-disabled +NOTICE: notice triggered for injection point vacuum-truncate-disabled +NOTICE: notice triggered for injection point vacuum-index-cleanup-enabled +NOTICE: notice triggered for injection point vacuum-truncate-enabled +-- Check "auto" case of index_cleanup and "truncate" controlled by +-- its GUC. +CREATE TABLE vac_tab_auto(i int, j text) WITH + (autovacuum_enabled=false, + vacuum_index_cleanup=auto, toast.vacuum_index_cleanup=auto); +SET vacuum_truncate = false; +VACUUM vac_tab_auto; +NOTICE: notice triggered for injection point vacuum-index-cleanup-auto +NOTICE: notice triggered for injection point vacuum-truncate-disabled +NOTICE: notice triggered for injection point vacuum-index-cleanup-auto +NOTICE: notice triggered for injection point vacuum-truncate-disabled +SET vacuum_truncate = true; +VACUUM vac_tab_auto; +NOTICE: notice triggered for injection point vacuum-index-cleanup-auto +NOTICE: notice triggered for injection point vacuum-truncate-enabled +NOTICE: notice triggered for injection point vacuum-index-cleanup-auto +NOTICE: notice triggered for injection point vacuum-truncate-enabled +RESET vacuum_truncate; +DROP TABLE vac_tab_auto; +DROP TABLE vac_tab_on_toast_off; +DROP TABLE vac_tab_off_toast_on; +-- Cleanup +SELECT injection_points_detach('vacuum-index-cleanup-auto'); + injection_points_detach +------------------------- + +(1 row) + +SELECT injection_points_detach('vacuum-index-cleanup-disabled'); + injection_points_detach +------------------------- + +(1 row) + +SELECT injection_points_detach('vacuum-index-cleanup-enabled'); + injection_points_detach +------------------------- + +(1 row) + +SELECT injection_points_detach('vacuum-truncate-auto'); + injection_points_detach +------------------------- + +(1 row) + +SELECT injection_points_detach('vacuum-truncate-disabled'); + injection_points_detach +------------------------- + +(1 row) + +SELECT injection_points_detach('vacuum-truncate-enabled'); + injection_points_detach +------------------------- + +(1 row) + +DROP EXTENSION injection_points; diff --git a/src/test/modules/injection_points/meson.build b/src/test/modules/injection_points/meson.build index d61149712fd7d..ce778ccf9ac45 100644 --- a/src/test/modules/injection_points/meson.build +++ b/src/test/modules/injection_points/meson.build @@ -37,6 +37,7 @@ tests += { 'injection_points', 'hashagg', 'reindex_conc', + 'vacuum', ], 'regress_args': ['--dlpath', meson.build_root() / 'src/test/regress'], # The injection points are cluster-wide, so disable installcheck diff --git a/src/test/modules/injection_points/sql/vacuum.sql b/src/test/modules/injection_points/sql/vacuum.sql new file mode 100644 index 0000000000000..23760dd0f380a --- /dev/null +++ b/src/test/modules/injection_points/sql/vacuum.sql @@ -0,0 +1,47 @@ +-- Tests for VACUUM + +CREATE EXTENSION injection_points; + +SELECT injection_points_set_local(); +SELECT injection_points_attach('vacuum-index-cleanup-auto', 'notice'); +SELECT injection_points_attach('vacuum-index-cleanup-disabled', 'notice'); +SELECT injection_points_attach('vacuum-index-cleanup-enabled', 'notice'); +SELECT injection_points_attach('vacuum-truncate-auto', 'notice'); +SELECT injection_points_attach('vacuum-truncate-disabled', 'notice'); +SELECT injection_points_attach('vacuum-truncate-enabled', 'notice'); + +-- Check state of index_cleanup and truncate in VACUUM. +CREATE TABLE vac_tab_on_toast_off(i int, j text) WITH + (autovacuum_enabled=false, + vacuum_index_cleanup=true, toast.vacuum_index_cleanup=false, + vacuum_truncate=true, toast.vacuum_truncate=false); +CREATE TABLE vac_tab_off_toast_on(i int, j text) WITH + (autovacuum_enabled=false, + vacuum_index_cleanup=false, toast.vacuum_index_cleanup=true, + vacuum_truncate=false, toast.vacuum_truncate=true); +-- Multiple relations should use their options in isolation. +VACUUM vac_tab_on_toast_off, vac_tab_off_toast_on; + +-- Check "auto" case of index_cleanup and "truncate" controlled by +-- its GUC. +CREATE TABLE vac_tab_auto(i int, j text) WITH + (autovacuum_enabled=false, + vacuum_index_cleanup=auto, toast.vacuum_index_cleanup=auto); +SET vacuum_truncate = false; +VACUUM vac_tab_auto; +SET vacuum_truncate = true; +VACUUM vac_tab_auto; +RESET vacuum_truncate; + +DROP TABLE vac_tab_auto; +DROP TABLE vac_tab_on_toast_off; +DROP TABLE vac_tab_off_toast_on; + +-- Cleanup +SELECT injection_points_detach('vacuum-index-cleanup-auto'); +SELECT injection_points_detach('vacuum-index-cleanup-disabled'); +SELECT injection_points_detach('vacuum-index-cleanup-enabled'); +SELECT injection_points_detach('vacuum-truncate-auto'); +SELECT injection_points_detach('vacuum-truncate-disabled'); +SELECT injection_points_detach('vacuum-truncate-enabled'); +DROP EXTENSION injection_points; From 69e5cdc47fa1fbd97c8b8c7abe6fb0b9a9822acb Mon Sep 17 00:00:00 2001 From: Amit Kapila Date: Wed, 25 Jun 2025 10:25:15 +0530 Subject: [PATCH 149/602] Doc: Improve documentation of stream abort. Protocol v4 introduces parallel streaming, which allows Stream Abort messages to include additional abort information such as LSN and timestamp. However, the current documentation only states, "This field is available since protocol version 4," which may misleadingly suggest that the fields are always present when using protocol v4. This patch clarifies that the abort LSN and timestamp are included only when parallel streaming is enabled, even under protocol v4. Author: Anthonin Bonnefoy Reviewed-by: Amit Kapila Backpatch-through: 16, where it was introduced Discussion: https://postgr.es/m/CAO6_XqoKteQR1AnaR8iPcegbBE+HkAc2-g12rxN04yOt4-2ORg@mail.gmail.com --- doc/src/sgml/protocol.sgml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/doc/src/sgml/protocol.sgml b/doc/src/sgml/protocol.sgml index 137ffc8d0b7eb..82fe3f93761dc 100644 --- a/doc/src/sgml/protocol.sgml +++ b/doc/src/sgml/protocol.sgml @@ -7292,8 +7292,8 @@ psql "dbname=postgres replication=database" -c "IDENTIFY_SYSTEM;" Int64 (XLogRecPtr) - The LSN of the abort. This field is available since protocol version - 4. + The LSN of the abort operation, present only when streaming is set to parallel. + This field is available since protocol version 4. @@ -7302,9 +7302,9 @@ psql "dbname=postgres replication=database" -c "IDENTIFY_SYSTEM;" Int64 (TimestampTz) - Abort timestamp of the transaction. The value is in number - of microseconds since PostgreSQL epoch (2000-01-01). This field is - available since protocol version 4. + Abort timestamp of the transaction, present only when streaming is set to + parallel. The value is in number of microseconds since PostgreSQL epoch (2000-01-01). + This field is available since protocol version 4. From 0cd69b3d7ef357f2b43258de5831c4de0bd51dec Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Wed, 25 Jun 2025 09:55:04 +0200 Subject: [PATCH 150/602] Restrict virtual columns to use built-in functions and types Just like selecting from a view is exploitable (CVE-2024-7348), selecting from a table with virtual generated columns is exploitable. Users who are concerned about this can avoid selecting from views, but telling them to avoid selecting from tables is less practical. To address this, this changes it so that generation expressions for virtual generated columns are restricted to using built-in functions and types, and the columns are restricted to having a built-in type. We assume that built-in functions and types cannot be exploited for this purpose. In the future, this could be expanded by some new mechanism to declare other functions and types as safe or trusted for this purpose, but that is to be designed. (An alternative approach might have been to expand the restrict_nonsystem_relation_kind GUC to handle this, like the fix for CVE-2024-7348. But that is kind of an ugly approach. That fix had to fit in the constraints of fixing an ancient vulnerability in all branches. Since virtual generated columns are new, we're free from the constraints of the past, and we can and should use cleaner options.) Reported-by: Feike Steenbergen Reviewed-by: jian he Discussion: https://www.postgresql.org/message-id/flat/CAK_s-G2Q7de8Q0qOYUR%3D_CTB5FzzVBm5iZjOp%2BmeVWpMpmfO0w%40mail.gmail.com --- doc/src/sgml/ddl.sgml | 10 ++ doc/src/sgml/ref/create_table.sgml | 9 ++ src/backend/catalog/heap.c | 93 +++++++++++++++++++ src/include/catalog/catversion.h | 2 +- .../regress/expected/generated_virtual.out | 43 +++++---- src/test/regress/expected/publication.out | 12 ++- src/test/regress/sql/generated_virtual.sql | 24 +++-- src/test/regress/sql/publication.sql | 5 +- 8 files changed, 163 insertions(+), 35 deletions(-) diff --git a/doc/src/sgml/ddl.sgml b/doc/src/sgml/ddl.sgml index 96936bcd3ae3e..65bc070d2e5fa 100644 --- a/doc/src/sgml/ddl.sgml +++ b/doc/src/sgml/ddl.sgml @@ -419,6 +419,16 @@ CREATE TABLE people ( tableoid. + + + A virtual generated column cannot have a user-defined type, and the + generation expression of a virtual generated column must not reference + user-defined functions or types, that is, it can only use built-in + functions or types. This applies also indirectly, such as for functions + or types that underlie operators or casts. (This restriction does not + exist for stored generated columns.) + + A generated column cannot have a column default or an identity definition. diff --git a/doc/src/sgml/ref/create_table.sgml b/doc/src/sgml/ref/create_table.sgml index 9d92e0a35516f..dc000e913c143 100644 --- a/doc/src/sgml/ref/create_table.sgml +++ b/doc/src/sgml/ref/create_table.sgml @@ -924,6 +924,15 @@ WITH ( MODULUS numeric_literal, REM not other generated columns. Any functions and operators used must be immutable. References to other tables are not allowed. + + + A virtual generated column cannot have a user-defined type, and the + generation expression of a virtual generated column must not reference + user-defined functions or types, that is, it can only use built-in + functions or types. This applies also indirectly, such as for functions + or types that underlie operators or casts. (This restriction does not + exist for stored generated columns.) + diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index 10f43c51c5af0..649d3966e8e21 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -664,6 +664,15 @@ CheckAttributeType(const char *attname, flags); } + /* + * For consistency with check_virtual_generated_security(). + */ + if ((flags & CHKATYPE_IS_VIRTUAL) && atttypid >= FirstUnpinnedObjectId) + ereport(ERROR, + errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("virtual generated column \"%s\" cannot have a user-defined type", attname), + errdetail("Virtual generated columns that make use of user-defined types are not yet supported.")); + /* * This might not be strictly invalid per SQL standard, but it is pretty * useless, and it cannot be dumped, so we must disallow it. @@ -3215,6 +3224,86 @@ check_nested_generated(ParseState *pstate, Node *node) check_nested_generated_walker(node, pstate); } +/* + * Check security of virtual generated column expression. + * + * Just like selecting from a view is exploitable (CVE-2024-7348), selecting + * from a table with virtual generated columns is exploitable. Users who are + * concerned about this can avoid selecting from views, but telling them to + * avoid selecting from tables is less practical. + * + * To address this, this restricts generation expressions for virtual + * generated columns are restricted to using built-in functions and types. We + * assume that built-in functions and types cannot be exploited for this + * purpose. Note the overall security also requires that all functions in use + * a immutable. (For example, there are some built-in non-immutable functions + * that can run arbitrary SQL.) The immutability is checked elsewhere, since + * that is a property that needs to hold independent of security + * considerations. + * + * In the future, this could be expanded by some new mechanism to declare + * other functions and types as safe or trusted for this purpose, but that is + * to be designed. + */ + +/* + * Callback for check_functions_in_node() that determines whether a function + * is user-defined. + */ +static bool +contains_user_functions_checker(Oid func_id, void *context) +{ + return (func_id >= FirstUnpinnedObjectId); +} + +/* + * Checks for all the things we don't want in the generation expressions of + * virtual generated columns for security reasons. Errors out if it finds + * one. + */ +static bool +check_virtual_generated_security_walker(Node *node, void *context) +{ + ParseState *pstate = context; + + if (node == NULL) + return false; + + if (!IsA(node, List)) + { + if (check_functions_in_node(node, contains_user_functions_checker, NULL)) + ereport(ERROR, + errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("generation expression uses user-defined function"), + errdetail("Virtual generated columns that make use of user-defined functions are not yet supported."), + parser_errposition(pstate, exprLocation(node))); + + /* + * check_functions_in_node() doesn't check some node types (see + * comment there). We handle CoerceToDomain and MinMaxExpr by + * checking for built-in types. The other listed node types cannot + * call user-definable SQL-visible functions. + * + * We furthermore need this type check to handle built-in, immutable + * polymorphic functions such as array_eq(). + */ + if (exprType(node) >= FirstUnpinnedObjectId) + ereport(ERROR, + errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("generation expression uses user-defined type"), + errdetail("Virtual generated columns that make use of user-defined types are not yet supported."), + parser_errposition(pstate, exprLocation(node))); + } + + return expression_tree_walker(node, check_virtual_generated_security_walker, context); +} + +static void +check_virtual_generated_security(ParseState *pstate, Node *node) +{ + check_virtual_generated_security_walker(node, pstate); +} + /* * Take a raw default and convert it to a cooked format ready for * storage. @@ -3254,6 +3343,10 @@ cookDefault(ParseState *pstate, ereport(ERROR, (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), errmsg("generation expression is not immutable"))); + + /* Check security of expressions for virtual generated column */ + if (attgenerated == ATTRIBUTE_GENERATED_VIRTUAL) + check_virtual_generated_security(pstate, expr); } else { diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index 67fbe9c929276..d63db42ed7b37 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -57,6 +57,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 202506121 +#define CATALOG_VERSION_NO 202506251 #endif diff --git a/src/test/regress/expected/generated_virtual.out b/src/test/regress/expected/generated_virtual.out index ab35a77477445..47cbd3a82fe2d 100644 --- a/src/test/regress/expected/generated_virtual.out +++ b/src/test/regress/expected/generated_virtual.out @@ -553,15 +553,11 @@ CREATE TABLE gtest4 ( a int, b double_int GENERATED ALWAYS AS ((a * 2, a * 3)) VIRTUAL ); -INSERT INTO gtest4 VALUES (1), (6); -SELECT * FROM gtest4; - a | b ----+--------- - 1 | (2,3) - 6 | (12,18) -(2 rows) - -DROP TABLE gtest4; +ERROR: virtual generated column "b" cannot have a user-defined type +DETAIL: Virtual generated columns that make use of user-defined types are not yet supported. +--INSERT INTO gtest4 VALUES (1), (6); +--SELECT * FROM gtest4; +--DROP TABLE gtest4; DROP TYPE double_int; -- using tableoid is allowed CREATE TABLE gtest_tableoid ( @@ -604,9 +600,13 @@ INSERT INTO gtest11 VALUES (1, 10), (2, 20); GRANT SELECT (a, c) ON gtest11 TO regress_user11; CREATE FUNCTION gf1(a int) RETURNS int AS $$ SELECT a * 3 $$ IMMUTABLE LANGUAGE SQL; REVOKE ALL ON FUNCTION gf1(int) FROM PUBLIC; -CREATE TABLE gtest12 (a int PRIMARY KEY, b int, c int GENERATED ALWAYS AS (gf1(b)) VIRTUAL); -INSERT INTO gtest12 VALUES (1, 10), (2, 20); -GRANT SELECT (a, c), INSERT ON gtest12 TO regress_user11; +CREATE TABLE gtest12 (a int PRIMARY KEY, b int, c int GENERATED ALWAYS AS (gf1(b)) VIRTUAL); -- fails, user-defined function +ERROR: generation expression uses user-defined function +LINE 1: ...nt PRIMARY KEY, b int, c int GENERATED ALWAYS AS (gf1(b)) VI... + ^ +DETAIL: Virtual generated columns that make use of user-defined functions are not yet supported. +--INSERT INTO gtest12 VALUES (1, 10), (2, 20); +--GRANT SELECT (a, c), INSERT ON gtest12 TO regress_user11; SET ROLE regress_user11; SELECT a, b FROM gtest11; -- not allowed ERROR: permission denied for table gtest11 @@ -619,15 +619,12 @@ SELECT a, c FROM gtest11; -- allowed SELECT gf1(10); -- not allowed ERROR: permission denied for function gf1 -INSERT INTO gtest12 VALUES (3, 30), (4, 40); -- allowed (does not actually invoke the function) -SELECT a, c FROM gtest12; -- currently not allowed because of function permissions, should arguably be allowed -ERROR: permission denied for function gf1 +--INSERT INTO gtest12 VALUES (3, 30), (4, 40); -- allowed (does not actually invoke the function) +--SELECT a, c FROM gtest12; -- currently not allowed because of function permissions, should arguably be allowed RESET ROLE; -DROP FUNCTION gf1(int); -- fail -ERROR: cannot drop function gf1(integer) because other objects depend on it -DETAIL: column c of table gtest12 depends on function gf1(integer) -HINT: Use DROP ... CASCADE to drop the dependent objects too. -DROP TABLE gtest11, gtest12; +--DROP FUNCTION gf1(int); -- fail +DROP TABLE gtest11; +--DROP TABLE gtest12; DROP FUNCTION gf1(int); DROP USER regress_user11; -- check constraints @@ -811,6 +808,12 @@ CREATE TABLE gtest24nn (a int, b gtestdomainnn GENERATED ALWAYS AS (a * 2) VIRTU ERROR: virtual generated column "b" cannot have a domain type --INSERT INTO gtest24nn (a) VALUES (4); -- ok --INSERT INTO gtest24nn (a) VALUES (NULL); -- error +-- using user-defined type not yet supported +CREATE TABLE gtest24xxx (a gtestdomain1, b gtestdomain1, c int GENERATED ALWAYS AS (greatest(a, b)) VIRTUAL); -- error +ERROR: generation expression uses user-defined type +LINE 1: ...main1, b gtestdomain1, c int GENERATED ALWAYS AS (greatest(a... + ^ +DETAIL: Virtual generated columns that make use of user-defined types are not yet supported. -- typed tables (currently not supported) CREATE TYPE gtest_type AS (f1 integer, f2 text, f3 bigint); CREATE TABLE gtest28 OF gtest_type (f1 WITH OPTIONS GENERATED ALWAYS AS (f2 *2) VIRTUAL); diff --git a/src/test/regress/expected/publication.out b/src/test/regress/expected/publication.out index 4de96c04f9de4..f1025fc0f198d 100644 --- a/src/test/regress/expected/publication.out +++ b/src/test/regress/expected/publication.out @@ -524,10 +524,16 @@ Tables from schemas: "testpub_rf_schema2" -- fail - virtual generated column uses user-defined function +-- (Actually, this already fails at CREATE TABLE rather than at CREATE +-- PUBLICATION, but let's keep the test in case the former gets +-- relaxed sometime.) CREATE TABLE testpub_rf_tbl6 (id int PRIMARY KEY, x int, y int GENERATED ALWAYS AS (x * testpub_rf_func2()) VIRTUAL); +ERROR: generation expression uses user-defined function +LINE 1: ...RIMARY KEY, x int, y int GENERATED ALWAYS AS (x * testpub_rf... + ^ +DETAIL: Virtual generated columns that make use of user-defined functions are not yet supported. CREATE PUBLICATION testpub7 FOR TABLE testpub_rf_tbl6 WHERE (y > 100); -ERROR: invalid publication WHERE expression -DETAIL: User-defined or built-in mutable functions are not allowed. +ERROR: relation "testpub_rf_tbl6" does not exist -- test that SET EXPRESSION is rejected, because it could affect a row filter SET client_min_messages = 'ERROR'; CREATE TABLE testpub_rf_tbl7 (id int PRIMARY KEY, x int, y int GENERATED ALWAYS AS (x * 111) VIRTUAL); @@ -541,7 +547,7 @@ DROP TABLE testpub_rf_tbl2; DROP TABLE testpub_rf_tbl3; DROP TABLE testpub_rf_tbl4; DROP TABLE testpub_rf_tbl5; -DROP TABLE testpub_rf_tbl6; +--DROP TABLE testpub_rf_tbl6; DROP TABLE testpub_rf_schema1.testpub_rf_tbl5; DROP TABLE testpub_rf_schema2.testpub_rf_tbl6; DROP SCHEMA testpub_rf_schema1; diff --git a/src/test/regress/sql/generated_virtual.sql b/src/test/regress/sql/generated_virtual.sql index 9011c9d26745f..c731d12376341 100644 --- a/src/test/regress/sql/generated_virtual.sql +++ b/src/test/regress/sql/generated_virtual.sql @@ -253,10 +253,10 @@ CREATE TABLE gtest4 ( a int, b double_int GENERATED ALWAYS AS ((a * 2, a * 3)) VIRTUAL ); -INSERT INTO gtest4 VALUES (1), (6); -SELECT * FROM gtest4; +--INSERT INTO gtest4 VALUES (1), (6); +--SELECT * FROM gtest4; -DROP TABLE gtest4; +--DROP TABLE gtest4; DROP TYPE double_int; -- using tableoid is allowed @@ -290,20 +290,21 @@ GRANT SELECT (a, c) ON gtest11 TO regress_user11; CREATE FUNCTION gf1(a int) RETURNS int AS $$ SELECT a * 3 $$ IMMUTABLE LANGUAGE SQL; REVOKE ALL ON FUNCTION gf1(int) FROM PUBLIC; -CREATE TABLE gtest12 (a int PRIMARY KEY, b int, c int GENERATED ALWAYS AS (gf1(b)) VIRTUAL); -INSERT INTO gtest12 VALUES (1, 10), (2, 20); -GRANT SELECT (a, c), INSERT ON gtest12 TO regress_user11; +CREATE TABLE gtest12 (a int PRIMARY KEY, b int, c int GENERATED ALWAYS AS (gf1(b)) VIRTUAL); -- fails, user-defined function +--INSERT INTO gtest12 VALUES (1, 10), (2, 20); +--GRANT SELECT (a, c), INSERT ON gtest12 TO regress_user11; SET ROLE regress_user11; SELECT a, b FROM gtest11; -- not allowed SELECT a, c FROM gtest11; -- allowed SELECT gf1(10); -- not allowed -INSERT INTO gtest12 VALUES (3, 30), (4, 40); -- allowed (does not actually invoke the function) -SELECT a, c FROM gtest12; -- currently not allowed because of function permissions, should arguably be allowed +--INSERT INTO gtest12 VALUES (3, 30), (4, 40); -- allowed (does not actually invoke the function) +--SELECT a, c FROM gtest12; -- currently not allowed because of function permissions, should arguably be allowed RESET ROLE; -DROP FUNCTION gf1(int); -- fail -DROP TABLE gtest11, gtest12; +--DROP FUNCTION gf1(int); -- fail +DROP TABLE gtest11; +--DROP TABLE gtest12; DROP FUNCTION gf1(int); DROP USER regress_user11; @@ -463,6 +464,9 @@ CREATE TABLE gtest24nn (a int, b gtestdomainnn GENERATED ALWAYS AS (a * 2) VIRTU --INSERT INTO gtest24nn (a) VALUES (4); -- ok --INSERT INTO gtest24nn (a) VALUES (NULL); -- error +-- using user-defined type not yet supported +CREATE TABLE gtest24xxx (a gtestdomain1, b gtestdomain1, c int GENERATED ALWAYS AS (greatest(a, b)) VIRTUAL); -- error + -- typed tables (currently not supported) CREATE TYPE gtest_type AS (f1 integer, f2 text, f3 bigint); CREATE TABLE gtest28 OF gtest_type (f1 WITH OPTIONS GENERATED ALWAYS AS (f2 *2) VIRTUAL); diff --git a/src/test/regress/sql/publication.sql b/src/test/regress/sql/publication.sql index 68001de4000fd..c9e309190dfa6 100644 --- a/src/test/regress/sql/publication.sql +++ b/src/test/regress/sql/publication.sql @@ -262,6 +262,9 @@ ALTER PUBLICATION testpub6 SET TABLES IN SCHEMA testpub_rf_schema2, TABLE testpu RESET client_min_messages; \dRp+ testpub6 -- fail - virtual generated column uses user-defined function +-- (Actually, this already fails at CREATE TABLE rather than at CREATE +-- PUBLICATION, but let's keep the test in case the former gets +-- relaxed sometime.) CREATE TABLE testpub_rf_tbl6 (id int PRIMARY KEY, x int, y int GENERATED ALWAYS AS (x * testpub_rf_func2()) VIRTUAL); CREATE PUBLICATION testpub7 FOR TABLE testpub_rf_tbl6 WHERE (y > 100); -- test that SET EXPRESSION is rejected, because it could affect a row filter @@ -276,7 +279,7 @@ DROP TABLE testpub_rf_tbl2; DROP TABLE testpub_rf_tbl3; DROP TABLE testpub_rf_tbl4; DROP TABLE testpub_rf_tbl5; -DROP TABLE testpub_rf_tbl6; +--DROP TABLE testpub_rf_tbl6; DROP TABLE testpub_rf_schema1.testpub_rf_tbl5; DROP TABLE testpub_rf_schema2.testpub_rf_tbl6; DROP SCHEMA testpub_rf_schema1; From 60dda7bbc45f30e2ba50ecf20dfb9d944a589a38 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Wed, 25 Jun 2025 10:33:23 +0200 Subject: [PATCH 151/602] pg_createsubscriber: Rename option --remove to --clean After discussion, the name --remove was suboptimally chosen. --clean has more precedent in other PostgreSQL tools. Reviewed-by: Hayato Kuroda (Fujitsu) Discussion: https://www.postgresql.org/message-id/84be7ff3-2763-4c0f-ac1e-ca9862077f41@eisentraut.org --- doc/src/sgml/ref/pg_createsubscriber.sgml | 59 +++++++++---------- src/bin/pg_basebackup/pg_createsubscriber.c | 34 +++++------ .../t/040_pg_createsubscriber.pl | 6 +- 3 files changed, 49 insertions(+), 50 deletions(-) diff --git a/doc/src/sgml/ref/pg_createsubscriber.sgml b/doc/src/sgml/ref/pg_createsubscriber.sgml index 4b1d08d5f16da..bb9cc72576c4a 100644 --- a/doc/src/sgml/ref/pg_createsubscriber.sgml +++ b/doc/src/sgml/ref/pg_createsubscriber.sgml @@ -169,36 +169,6 @@ PostgreSQL documentation - - - - - - Remove all objects of the specified type from specified databases on the - target server. - - - - - - publications: - The FOR ALL TABLES publications established for this - subscriber are always removed; specifying this object type causes all - other publications replicated from the source server to be dropped as - well. - - - - - - The objects selected to be dropped are individually logged, including during - a . There is no opportunity to affect or stop the - dropping of the selected objects, so consider taking a backup of them - using pg_dump. - - - - @@ -259,6 +229,35 @@ PostgreSQL documentation + + + + + Drop all objects of the specified type from specified databases on the + target server. + + + + + + publications: + The FOR ALL TABLES publications established for this + subscriber are always dropped; specifying this object type causes all + other publications replicated from the source server to be dropped as + well. + + + + + + The objects selected to be dropped are individually logged, including during + a . There is no opportunity to affect or stop the + dropping of the selected objects, so consider taking a backup of them + using pg_dump. + + + + diff --git a/src/bin/pg_basebackup/pg_createsubscriber.c b/src/bin/pg_basebackup/pg_createsubscriber.c index c43c0cbbba5a6..11f71c0380181 100644 --- a/src/bin/pg_basebackup/pg_createsubscriber.c +++ b/src/bin/pg_basebackup/pg_createsubscriber.c @@ -46,7 +46,7 @@ struct CreateSubscriberOptions SimpleStringList replslot_names; /* list of replication slot names */ int recovery_timeout; /* stop recovery after this time */ bool all_dbs; /* all option */ - SimpleStringList objecttypes_to_remove; /* list of object types to remove */ + SimpleStringList objecttypes_to_clean; /* list of object types to cleanup */ }; /* per-database publication/subscription info */ @@ -71,8 +71,8 @@ struct LogicalRepInfos { struct LogicalRepInfo *dbinfo; bool two_phase; /* enable-two-phase option */ - bits32 objecttypes_to_remove; /* flags indicating which object types - * to remove on subscriber */ + bits32 objecttypes_to_clean; /* flags indicating which object types + * to clean up on subscriber */ }; static void cleanup_objects_atexit(void); @@ -253,13 +253,13 @@ usage(void) printf(_(" -n, --dry-run dry run, just show what would be done\n")); printf(_(" -p, --subscriber-port=PORT subscriber port number (default %s)\n"), DEFAULT_SUB_PORT); printf(_(" -P, --publisher-server=CONNSTR publisher connection string\n")); - printf(_(" -R, --remove=OBJECTTYPE remove all objects of the specified type from specified\n" - " databases on the subscriber; accepts: \"%s\"\n"), "publications"); printf(_(" -s, --socketdir=DIR socket directory to use (default current dir.)\n")); printf(_(" -t, --recovery-timeout=SECS seconds to wait for recovery to end\n")); printf(_(" -T, --enable-two-phase enable two-phase commit for all subscriptions\n")); printf(_(" -U, --subscriber-username=NAME user name for subscriber connection\n")); printf(_(" -v, --verbose output verbose messages\n")); + printf(_(" --clean=OBJECTTYPE drop all objects of the specified type from specified\n" + " databases on the subscriber; accepts: \"%s\"\n"), "publications"); printf(_(" --config-file=FILENAME use specified main server configuration\n" " file when running target cluster\n")); printf(_(" --publication=NAME publication name\n")); @@ -1730,7 +1730,7 @@ static void check_and_drop_publications(PGconn *conn, struct LogicalRepInfo *dbinfo) { PGresult *res; - bool drop_all_pubs = dbinfos.objecttypes_to_remove & OBJECTTYPE_PUBLICATIONS; + bool drop_all_pubs = dbinfos.objecttypes_to_clean & OBJECTTYPE_PUBLICATIONS; Assert(conn != NULL); @@ -2026,7 +2026,6 @@ main(int argc, char **argv) {"dry-run", no_argument, NULL, 'n'}, {"subscriber-port", required_argument, NULL, 'p'}, {"publisher-server", required_argument, NULL, 'P'}, - {"remove", required_argument, NULL, 'R'}, {"socketdir", required_argument, NULL, 's'}, {"recovery-timeout", required_argument, NULL, 't'}, {"enable-two-phase", no_argument, NULL, 'T'}, @@ -2038,6 +2037,7 @@ main(int argc, char **argv) {"publication", required_argument, NULL, 2}, {"replication-slot", required_argument, NULL, 3}, {"subscription", required_argument, NULL, 4}, + {"clean", required_argument, NULL, 5}, {NULL, 0, NULL, 0} }; @@ -2109,7 +2109,7 @@ main(int argc, char **argv) get_restricted_token(); - while ((c = getopt_long(argc, argv, "ad:D:np:P:R:s:t:TU:v", + while ((c = getopt_long(argc, argv, "ad:D:np:P:s:t:TU:v", long_options, &option_index)) != -1) { switch (c) @@ -2139,12 +2139,6 @@ main(int argc, char **argv) case 'P': opt.pub_conninfo_str = pg_strdup(optarg); break; - case 'R': - if (!simple_string_list_member(&opt.objecttypes_to_remove, optarg)) - simple_string_list_append(&opt.objecttypes_to_remove, optarg); - else - pg_fatal("object type \"%s\" specified more than once for -R/--remove", optarg); - break; case 's': opt.socket_dir = pg_strdup(optarg); canonicalize_path(opt.socket_dir); @@ -2191,6 +2185,12 @@ main(int argc, char **argv) else pg_fatal("subscription \"%s\" specified more than once for --subscription", optarg); break; + case 5: + if (!simple_string_list_member(&opt.objecttypes_to_clean, optarg)) + simple_string_list_append(&opt.objecttypes_to_clean, optarg); + else + pg_fatal("object type \"%s\" specified more than once for --clean", optarg); + break; default: /* getopt_long already emitted a complaint */ pg_log_error_hint("Try \"%s --help\" for more information.", progname); @@ -2334,13 +2334,13 @@ main(int argc, char **argv) } /* Verify the object types specified for removal from the subscriber */ - for (SimpleStringListCell *cell = opt.objecttypes_to_remove.head; cell; cell = cell->next) + for (SimpleStringListCell *cell = opt.objecttypes_to_clean.head; cell; cell = cell->next) { if (pg_strcasecmp(cell->val, "publications") == 0) - dbinfos.objecttypes_to_remove |= OBJECTTYPE_PUBLICATIONS; + dbinfos.objecttypes_to_clean |= OBJECTTYPE_PUBLICATIONS; else { - pg_log_error("invalid object type \"%s\" specified for -R/--remove", cell->val); + pg_log_error("invalid object type \"%s\" specified for --clean", cell->val); pg_log_error_hint("The valid value is: \"%s\"", "publications"); exit(1); } diff --git a/src/bin/pg_basebackup/t/040_pg_createsubscriber.pl b/src/bin/pg_basebackup/t/040_pg_createsubscriber.pl index df4924023fdf2..229fef5b3b52b 100644 --- a/src/bin/pg_basebackup/t/040_pg_createsubscriber.pl +++ b/src/bin/pg_basebackup/t/040_pg_createsubscriber.pl @@ -331,7 +331,7 @@ sub generate_db $node_p->wait_for_replay_catchup($node_s); # Create user-defined publications, wait for streaming replication to sync them -# to the standby, then verify that '--remove' +# to the standby, then verify that '--clean' # removes them. $node_p->safe_psql( $db1, qq( @@ -446,7 +446,7 @@ sub generate_db # Run pg_createsubscriber on node S. --verbose is used twice # to show more information. # In passing, also test the --enable-two-phase option and -# --remove option +# --clean option command_ok( [ 'pg_createsubscriber', @@ -463,7 +463,7 @@ sub generate_db '--database' => $db1, '--database' => $db2, '--enable-two-phase', - '--remove' => 'publications', + '--clean' => 'publications', ], 'run pg_createsubscriber on node S'); From 62a47aea1d8d8ea36e63fe6dd3d9891452a3f968 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Wed, 25 Jun 2025 12:44:03 +0200 Subject: [PATCH 152/602] doc: Some copy-editing around constraint validation and enforcement Author: Robert Treat Reviewed-by: jian he Discussion: https://www.postgresql.org/message-id/flat/CACJufxFo4yTwzbSZrP%2BzQiR6_M00skoZMFaUnNJCdY6he%3DuQfA%40mail.gmail.com --- doc/src/sgml/ref/alter_table.sgml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/doc/src/sgml/ref/alter_table.sgml b/doc/src/sgml/ref/alter_table.sgml index d63f3a621acc6..d16969916835d 100644 --- a/doc/src/sgml/ref/alter_table.sgml +++ b/doc/src/sgml/ref/alter_table.sgml @@ -460,8 +460,8 @@ WITH ( MODULUS numeric_literal, REM This form adds a new constraint to a table using the same constraint syntax as CREATE TABLE, plus the option NOT - VALID, which is currently only allowed for foreign key, - CHECK constraints and not-null constraints. + VALID, which is currently only allowed for foreign-key, + CHECK, and not-null constraints. @@ -469,7 +469,7 @@ WITH ( MODULUS numeric_literal, REM existing rows in the table satisfy the new constraint. But if the NOT VALID option is used, this potentially-lengthy scan is skipped. The constraint will still be - enforced against subsequent inserts or updates (that is, they'll fail + applied against subsequent inserts or updates (that is, they'll fail unless there is a matching row in the referenced table, in the case of foreign keys, or they'll fail unless the new row matches the specified check condition). But the @@ -591,7 +591,7 @@ WITH ( MODULUS numeric_literal, REM This form validates a foreign key, check, or not-null constraint that was previously created as NOT VALID, by scanning the table to ensure there are no rows for which the constraint is not - satisfied. If the constraint is not enforced, an error is thrown. + satisfied. If the constraint was set to NOT ENFORCED, an error is thrown. Nothing happens if the constraint is already marked valid. (See below for an explanation of the usefulness of this command.) @@ -1466,11 +1466,11 @@ WITH ( MODULUS numeric_literal, REM - Adding an enforced CHECK or NOT NULL + Adding a CHECK or NOT NULL constraint requires scanning the table to verify that existing rows meet the constraint, but does not require a table rewrite. If a CHECK - constraint is added as NOT ENFORCED, the validation will - not be performed. + constraint is added as NOT ENFORCED, no verification will + be performed. @@ -1485,7 +1485,7 @@ WITH ( MODULUS numeric_literal, REM - Scanning a large table to verify a new foreign key or check constraint + Scanning a large table to verify new foreign-key, check, or not-null constraints can take a long time, and other updates to the table are locked out until the ALTER TABLE ADD CONSTRAINT command is committed. The main purpose of the NOT VALID From 5069fef1cfae271ca62e254b16dc831145bc5a4f Mon Sep 17 00:00:00 2001 From: Richard Guo Date: Thu, 26 Jun 2025 12:17:12 +0900 Subject: [PATCH 153/602] Expand virtual generated columns for ALTER COLUMN TYPE For the subcommand ALTER COLUMN TYPE of the ALTER TABLE command, the USING expression may reference virtual generated columns. These columns must be expanded before the expression is fed through expression_planner and the expression-execution machinery. Failing to do so can result in incorrect rewrite decisions, and can also lead to "ERROR: unexpected virtual generated column reference". Reported-by: Alexander Lakhin Reviewed-by: jian he Discussion: https://postgr.es/m/b5f96b24-ccac-47fd-9e20-14681b894f36@gmail.com --- src/backend/commands/tablecmds.c | 3 ++ .../regress/expected/generated_virtual.out | 36 ++++++++++--------- src/test/regress/sql/generated_virtual.sql | 10 ++++-- 3 files changed, 30 insertions(+), 19 deletions(-) diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 074ddb6b9cd17..1c3ad74e7b9e9 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -14484,6 +14484,9 @@ ATPrepAlterColumnType(List **wqueue, /* Fix collations after all else */ assign_expr_collations(pstate, transform); + /* Expand virtual generated columns in the expr. */ + transform = expand_generated_columns_in_expr(transform, rel, 1); + /* Plan the expr now so we can accurately assess the need to rewrite. */ transform = (Node *) expression_planner((Expr *) transform); diff --git a/src/test/regress/expected/generated_virtual.out b/src/test/regress/expected/generated_virtual.out index 47cbd3a82fe2d..46713f06797e5 100644 --- a/src/test/regress/expected/generated_virtual.out +++ b/src/test/regress/expected/generated_virtual.out @@ -1479,7 +1479,8 @@ create table gtest32 ( a int primary key, b int generated always as (a * 2), c int generated always as (10 + 10), - d int generated always as (coalesce(a, 100)) + d int generated always as (coalesce(a, 100)), + e int ); insert into gtest32 values (1), (2); analyze gtest32; @@ -1563,41 +1564,44 @@ select t2.* from gtest32 t1 left join gtest32 t2 on false; QUERY PLAN ------------------------------------------------------ Nested Loop Left Join - Output: a, (a * 2), (20), (COALESCE(a, 100)) + Output: a, (a * 2), (20), (COALESCE(a, 100)), e Join Filter: false -> Seq Scan on generated_virtual_tests.gtest32 t1 - Output: t1.a, t1.b, t1.c, t1.d + Output: t1.a, t1.b, t1.c, t1.d, t1.e -> Result - Output: a, 20, COALESCE(a, 100) + Output: a, e, 20, COALESCE(a, 100) One-Time Filter: false (8 rows) select t2.* from gtest32 t1 left join gtest32 t2 on false; - a | b | c | d ----+---+---+--- - | | | - | | | + a | b | c | d | e +---+---+---+---+--- + | | | | + | | | | (2 rows) explain (verbose, costs off) -select * from gtest32 t group by grouping sets (a, b, c, d) having c = 20; +select * from gtest32 t group by grouping sets (a, b, c, d, e) having c = 20; QUERY PLAN ----------------------------------------------------- HashAggregate - Output: a, ((a * 2)), (20), (COALESCE(a, 100)) + Output: a, ((a * 2)), (20), (COALESCE(a, 100)), e Hash Key: t.a Hash Key: (t.a * 2) Hash Key: 20 Hash Key: COALESCE(t.a, 100) + Hash Key: t.e Filter: ((20) = 20) -> Seq Scan on generated_virtual_tests.gtest32 t - Output: a, (a * 2), 20, COALESCE(a, 100) -(9 rows) + Output: a, (a * 2), 20, COALESCE(a, 100), e +(10 rows) -select * from gtest32 t group by grouping sets (a, b, c, d) having c = 20; - a | b | c | d ----+---+----+--- - | | 20 | +select * from gtest32 t group by grouping sets (a, b, c, d, e) having c = 20; + a | b | c | d | e +---+---+----+---+--- + | | 20 | | (1 row) +-- Ensure that the virtual generated columns in ALTER COLUMN TYPE USING expression are expanded +alter table gtest32 alter column e type bigint using b; drop table gtest32; diff --git a/src/test/regress/sql/generated_virtual.sql b/src/test/regress/sql/generated_virtual.sql index c731d12376341..6fa986515b9e3 100644 --- a/src/test/regress/sql/generated_virtual.sql +++ b/src/test/regress/sql/generated_virtual.sql @@ -797,7 +797,8 @@ create table gtest32 ( a int primary key, b int generated always as (a * 2), c int generated always as (10 + 10), - d int generated always as (coalesce(a, 100)) + d int generated always as (coalesce(a, 100)), + e int ); insert into gtest32 values (1), (2); @@ -838,7 +839,10 @@ select t2.* from gtest32 t1 left join gtest32 t2 on false; select t2.* from gtest32 t1 left join gtest32 t2 on false; explain (verbose, costs off) -select * from gtest32 t group by grouping sets (a, b, c, d) having c = 20; -select * from gtest32 t group by grouping sets (a, b, c, d) having c = 20; +select * from gtest32 t group by grouping sets (a, b, c, d, e) having c = 20; +select * from gtest32 t group by grouping sets (a, b, c, d, e) having c = 20; + +-- Ensure that the virtual generated columns in ALTER COLUMN TYPE USING expression are expanded +alter table gtest32 alter column e type bigint using b; drop table gtest32; From eca624c6591be14a1a03f0b2d374043cdf276431 Mon Sep 17 00:00:00 2001 From: Dean Rasheed Date: Thu, 26 Jun 2025 09:36:05 +0100 Subject: [PATCH 154/602] doc: Fix indentation of MERGE synopsis. The convention in the documentation for other SQL commands is to indent continuation lines and sub-clauses in the "Synopsis" section by 4 spaces, so do the same for MERGE. Author: Dean Rasheed Reviewed-by: Nathan Bossart Discussion: https://postgr.es/m/CAEZATCV+9tR9+WM-SCcdBEZ3x7WVxUpADD5jX9WeGX97z4LCGA@mail.gmail.com Backpatch-through: 15 --- doc/src/sgml/ref/merge.sgml | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/doc/src/sgml/ref/merge.sgml b/doc/src/sgml/ref/merge.sgml index ecbcd8345d874..e76ebd157e52a 100644 --- a/doc/src/sgml/ref/merge.sgml +++ b/doc/src/sgml/ref/merge.sgml @@ -23,37 +23,37 @@ PostgreSQL documentation [ WITH with_query [, ...] ] MERGE INTO [ ONLY ] target_table_name [ * ] [ [ AS ] target_alias ] -USING data_source ON join_condition -when_clause [...] -[ RETURNING [ WITH ( { OLD | NEW } AS output_alias [, ...] ) ] - { * | output_expression [ [ AS ] output_name ] } [, ...] ] + USING data_source ON join_condition + when_clause [...] + [ RETURNING [ WITH ( { OLD | NEW } AS output_alias [, ...] ) ] + { * | output_expression [ [ AS ] output_name ] } [, ...] ] where data_source is: -{ [ ONLY ] source_table_name [ * ] | ( source_query ) } [ [ AS ] source_alias ] + { [ ONLY ] source_table_name [ * ] | ( source_query ) } [ [ AS ] source_alias ] and when_clause is: -{ WHEN MATCHED [ AND condition ] THEN { merge_update | merge_delete | DO NOTHING } | - WHEN NOT MATCHED BY SOURCE [ AND condition ] THEN { merge_update | merge_delete | DO NOTHING } | - WHEN NOT MATCHED [ BY TARGET ] [ AND condition ] THEN { merge_insert | DO NOTHING } } + { WHEN MATCHED [ AND condition ] THEN { merge_update | merge_delete | DO NOTHING } | + WHEN NOT MATCHED BY SOURCE [ AND condition ] THEN { merge_update | merge_delete | DO NOTHING } | + WHEN NOT MATCHED [ BY TARGET ] [ AND condition ] THEN { merge_insert | DO NOTHING } } and merge_insert is: -INSERT [( column_name [, ...] )] -[ OVERRIDING { SYSTEM | USER } VALUE ] -{ VALUES ( { expression | DEFAULT } [, ...] ) | DEFAULT VALUES } + INSERT [( column_name [, ...] )] + [ OVERRIDING { SYSTEM | USER } VALUE ] + { VALUES ( { expression | DEFAULT } [, ...] ) | DEFAULT VALUES } and merge_update is: -UPDATE SET { column_name = { expression | DEFAULT } | - ( column_name [, ...] ) = [ ROW ] ( { expression | DEFAULT } [, ...] ) | - ( column_name [, ...] ) = ( sub-SELECT ) - } [, ...] + UPDATE SET { column_name = { expression | DEFAULT } | + ( column_name [, ...] ) = [ ROW ] ( { expression | DEFAULT } [, ...] ) | + ( column_name [, ...] ) = ( sub-SELECT ) + } [, ...] and merge_delete is: -DELETE + DELETE From 3ba9639e39ed8cf2d9dd30f6b8a3b3e9fffb9a64 Mon Sep 17 00:00:00 2001 From: Dean Rasheed Date: Thu, 26 Jun 2025 10:13:00 +0100 Subject: [PATCH 155/602] doc: Updates for RETURNING OLD/NEW. Fix a couple of sentences in the documentation that were missed in commit 80feb727c8. Author: Dean Rasheed Reviewed-by: Robert Treat Discussion: https://postgr.es/m/CAEZATCUcqADJuapZSjPf2b6hFJ6AGOUwefRvh8Ht3UZoqqw69Q@mail.gmail.com --- doc/src/sgml/ref/merge.sgml | 7 ++++--- doc/src/sgml/ref/update.sgml | 3 ++- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/doc/src/sgml/ref/merge.sgml b/doc/src/sgml/ref/merge.sgml index e76ebd157e52a..c2e181066a4e1 100644 --- a/doc/src/sgml/ref/merge.sgml +++ b/doc/src/sgml/ref/merge.sgml @@ -106,10 +106,11 @@ MERGE INTO [ ONLY ] target_table_namemerge_action() - function can be computed. When an INSERT or + function can be computed. By default, when an INSERT or UPDATE action is performed, the new values of the target - table's columns are used. When a DELETE is performed, - the old values of the target table's columns are used. The syntax of the + table's columns are used, and when a DELETE is performed, + the old values of the target table's columns are used, but it is also + possible to explicitly request old and new values. The syntax of the RETURNING list is identical to that of the output list of SELECT. diff --git a/doc/src/sgml/ref/update.sgml b/doc/src/sgml/ref/update.sgml index 12ec5ba070939..40cca06394636 100644 --- a/doc/src/sgml/ref/update.sgml +++ b/doc/src/sgml/ref/update.sgml @@ -57,7 +57,8 @@ UPDATE [ ONLY ] table_name [ * ] [ to compute and return value(s) based on each row actually updated. Any expression using the table's columns, and/or columns of other tables mentioned in FROM, can be computed. - The new (post-update) values of the table's columns are used. + By default, the new (post-update) values of the table's columns are used, + but it is also possible to request the old (pre-update) values. The syntax of the RETURNING list is identical to that of the output list of SELECT. From 81ce602d48e8b9cbc3c3a7d5b9d9ddbea7789c02 Mon Sep 17 00:00:00 2001 From: Fujii Masao Date: Thu, 26 Jun 2025 20:25:34 +0900 Subject: [PATCH 156/602] Make CREATE TABLE LIKE copy comments on NOT NULL constraints when requested. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 14e87ffa5c5 introduced support for adding comments to NOT NULL constraints. However, CREATE TABLE LIKE INCLUDING COMMENTS did not copy these comments to the new table. This was an oversight in that commit. This commit corrects the behavior by ensuring CREATE TABLE LIKE to also copy the comments on NOT NULL constraints when INCLUDING COMMENTS is specified. Author: Jian He Co-authored-by: Álvaro Herrera Reviewed-by: Fujii Masao Discussion: https://postgr.es/m/127debef-e558-4784-9e24-0d5eaf91e2d1@oss.nttdata.com --- src/backend/parser/parse_utilcmd.c | 22 ++++++++++++++ .../regress/expected/create_table_like.out | 30 ++++++++++++++++--- src/test/regress/sql/create_table_like.sql | 16 ++++++++-- 3 files changed, 62 insertions(+), 6 deletions(-) diff --git a/src/backend/parser/parse_utilcmd.c b/src/backend/parser/parse_utilcmd.c index 62015431fdf1a..afcf54169c3b3 100644 --- a/src/backend/parser/parse_utilcmd.c +++ b/src/backend/parser/parse_utilcmd.c @@ -1279,6 +1279,28 @@ transformTableLikeClause(CreateStmtContext *cxt, TableLikeClause *table_like_cla lst = RelationGetNotNullConstraints(RelationGetRelid(relation), false, true); cxt->nnconstraints = list_concat(cxt->nnconstraints, lst); + + /* Copy comments on not-null constraints */ + if (table_like_clause->options & CREATE_TABLE_LIKE_COMMENTS) + { + foreach_node(Constraint, nnconstr, lst) + { + if ((comment = GetComment(get_relation_constraint_oid(RelationGetRelid(relation), + nnconstr->conname, false), + ConstraintRelationId, + 0)) != NULL) + { + CommentStmt *stmt = makeNode(CommentStmt); + + stmt->objtype = OBJECT_TABCONSTRAINT; + stmt->object = (Node *) list_make3(makeString(cxt->relation->schemaname), + makeString(cxt->relation->relname), + makeString(nnconstr->conname)); + stmt->comment = comment; + cxt->alist = lappend(cxt->alist, stmt); + } + } + } } /* diff --git a/src/test/regress/expected/create_table_like.out b/src/test/regress/expected/create_table_like.out index bf34289e9842b..29a779c2e9072 100644 --- a/src/test/regress/expected/create_table_like.out +++ b/src/test/regress/expected/create_table_like.out @@ -332,9 +332,10 @@ COMMENT ON CONSTRAINT ctlt1_a_check ON ctlt1 IS 't1_a_check'; COMMENT ON INDEX ctlt1_pkey IS 'index pkey'; COMMENT ON INDEX ctlt1_b_key IS 'index b_key'; ALTER TABLE ctlt1 ALTER COLUMN a SET STORAGE MAIN; -CREATE TABLE ctlt2 (c text); +CREATE TABLE ctlt2 (c text NOT NULL); ALTER TABLE ctlt2 ALTER COLUMN c SET STORAGE EXTERNAL; COMMENT ON COLUMN ctlt2.c IS 'C'; +COMMENT ON CONSTRAINT ctlt2_c_not_null ON ctlt2 IS 't2_c_not_null'; CREATE TABLE ctlt3 (a text CHECK (length(a) < 5), c text CHECK (length(c) < 7)); ALTER TABLE ctlt3 ALTER COLUMN c SET STORAGE EXTERNAL; ALTER TABLE ctlt3 ALTER COLUMN a SET STORAGE MAIN; @@ -351,9 +352,10 @@ CREATE TABLE ctlt12_storage (LIKE ctlt1 INCLUDING STORAGE, LIKE ctlt2 INCLUDING --------+------+-----------+----------+---------+----------+--------------+------------- a | text | | not null | | main | | b | text | | | | extended | | - c | text | | | | external | | + c | text | | not null | | external | | Not-null constraints: "ctlt1_a_not_null" NOT NULL "a" + "ctlt2_c_not_null" NOT NULL "c" CREATE TABLE ctlt12_comments (LIKE ctlt1 INCLUDING COMMENTS, LIKE ctlt2 INCLUDING COMMENTS); \d+ ctlt12_comments @@ -362,9 +364,16 @@ CREATE TABLE ctlt12_comments (LIKE ctlt1 INCLUDING COMMENTS, LIKE ctlt2 INCLUDIN --------+------+-----------+----------+---------+----------+--------------+------------- a | text | | not null | | extended | | A b | text | | | | extended | | B - c | text | | | | extended | | C + c | text | | not null | | extended | | C Not-null constraints: "ctlt1_a_not_null" NOT NULL "a" + "ctlt2_c_not_null" NOT NULL "c" + +SELECT conname, description FROM pg_description, pg_constraint c WHERE classoid = 'pg_constraint'::regclass AND objoid = c.oid AND c.conrelid = 'ctlt12_comments'::regclass; + conname | description +------------------+--------------- + ctlt2_c_not_null | t2_c_not_null +(1 row) CREATE TABLE ctlt1_inh (LIKE ctlt1 INCLUDING CONSTRAINTS INCLUDING COMMENTS) INHERITS (ctlt1); NOTICE: merging column "a" with inherited definition @@ -529,7 +538,9 @@ NOTICE: drop cascades to table inhe -- LIKE must respect NO INHERIT property of constraints CREATE TABLE noinh_con_copy (a int CHECK (a > 0) NO INHERIT, b int not null, c int not null no inherit); -CREATE TABLE noinh_con_copy1 (LIKE noinh_con_copy INCLUDING CONSTRAINTS); +COMMENT ON CONSTRAINT noinh_con_copy_b_not_null ON noinh_con_copy IS 'not null b'; +COMMENT ON CONSTRAINT noinh_con_copy_c_not_null ON noinh_con_copy IS 'not null c no inherit'; +CREATE TABLE noinh_con_copy1 (LIKE noinh_con_copy INCLUDING CONSTRAINTS INCLUDING COMMENTS); \d+ noinh_con_copy1 Table "public.noinh_con_copy1" Column | Type | Collation | Nullable | Default | Storage | Stats target | Description @@ -543,6 +554,17 @@ Not-null constraints: "noinh_con_copy_b_not_null" NOT NULL "b" "noinh_con_copy_c_not_null" NOT NULL "c" NO INHERIT +SELECT conname, description +FROM pg_description, pg_constraint c +WHERE classoid = 'pg_constraint'::regclass +AND objoid = c.oid AND c.conrelid = 'noinh_con_copy1'::regclass +ORDER BY conname COLLATE "C"; + conname | description +---------------------------+----------------------- + noinh_con_copy_b_not_null | not null b + noinh_con_copy_c_not_null | not null c no inherit +(2 rows) + -- fail, as partitioned tables don't allow NO INHERIT constraints CREATE TABLE noinh_con_copy1_parted (LIKE noinh_con_copy INCLUDING ALL) PARTITION BY LIST (a); diff --git a/src/test/regress/sql/create_table_like.sql b/src/test/regress/sql/create_table_like.sql index 6e21722aaeb95..bf8702116a74b 100644 --- a/src/test/regress/sql/create_table_like.sql +++ b/src/test/regress/sql/create_table_like.sql @@ -143,9 +143,10 @@ COMMENT ON INDEX ctlt1_pkey IS 'index pkey'; COMMENT ON INDEX ctlt1_b_key IS 'index b_key'; ALTER TABLE ctlt1 ALTER COLUMN a SET STORAGE MAIN; -CREATE TABLE ctlt2 (c text); +CREATE TABLE ctlt2 (c text NOT NULL); ALTER TABLE ctlt2 ALTER COLUMN c SET STORAGE EXTERNAL; COMMENT ON COLUMN ctlt2.c IS 'C'; +COMMENT ON CONSTRAINT ctlt2_c_not_null ON ctlt2 IS 't2_c_not_null'; CREATE TABLE ctlt3 (a text CHECK (length(a) < 5), c text CHECK (length(c) < 7)); ALTER TABLE ctlt3 ALTER COLUMN c SET STORAGE EXTERNAL; @@ -162,6 +163,7 @@ CREATE TABLE ctlt12_storage (LIKE ctlt1 INCLUDING STORAGE, LIKE ctlt2 INCLUDING \d+ ctlt12_storage CREATE TABLE ctlt12_comments (LIKE ctlt1 INCLUDING COMMENTS, LIKE ctlt2 INCLUDING COMMENTS); \d+ ctlt12_comments +SELECT conname, description FROM pg_description, pg_constraint c WHERE classoid = 'pg_constraint'::regclass AND objoid = c.oid AND c.conrelid = 'ctlt12_comments'::regclass; CREATE TABLE ctlt1_inh (LIKE ctlt1 INCLUDING CONSTRAINTS INCLUDING COMMENTS) INHERITS (ctlt1); \d+ ctlt1_inh SELECT description FROM pg_description, pg_constraint c WHERE classoid = 'pg_constraint'::regclass AND objoid = c.oid AND c.conrelid = 'ctlt1_inh'::regclass; @@ -197,9 +199,19 @@ DROP TABLE ctlt1, ctlt2, ctlt3, ctlt4, ctlt12_storage, ctlt12_comments, ctlt1_in -- LIKE must respect NO INHERIT property of constraints CREATE TABLE noinh_con_copy (a int CHECK (a > 0) NO INHERIT, b int not null, c int not null no inherit); -CREATE TABLE noinh_con_copy1 (LIKE noinh_con_copy INCLUDING CONSTRAINTS); + +COMMENT ON CONSTRAINT noinh_con_copy_b_not_null ON noinh_con_copy IS 'not null b'; +COMMENT ON CONSTRAINT noinh_con_copy_c_not_null ON noinh_con_copy IS 'not null c no inherit'; + +CREATE TABLE noinh_con_copy1 (LIKE noinh_con_copy INCLUDING CONSTRAINTS INCLUDING COMMENTS); \d+ noinh_con_copy1 +SELECT conname, description +FROM pg_description, pg_constraint c +WHERE classoid = 'pg_constraint'::regclass +AND objoid = c.oid AND c.conrelid = 'noinh_con_copy1'::regclass +ORDER BY conname COLLATE "C"; + -- fail, as partitioned tables don't allow NO INHERIT constraints CREATE TABLE noinh_con_copy1_parted (LIKE noinh_con_copy INCLUDING ALL) PARTITION BY LIST (a); From 47fb87563bf3cca2244840241dde2eb93830559b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Herrera?= Date: Thu, 26 Jun 2025 18:24:12 +0200 Subject: [PATCH 157/602] pg_dump: include comments on valid not-null constraints, too MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We were missing collecting comments for not-null constraints that are dumped inline with the table definition (i.e., valid ones), because they aren't represented by a separately dumpable object. Fix by creating separate TocEntries for the comments. Co-Authored-By: Jian He Co-Authored-By: Álvaro Herrera Reported-By: Fujii Masao Reviewed-By: Fujii Masao Discussion: https://postgr.es/m/d50ff977-c728-4e9e-8488-fc2688e08754@oss.nttdata.com --- doc/src/sgml/ref/pg_dump.sgml | 2 +- src/bin/pg_dump/pg_dump.c | 92 ++++++++++++++++++++--- src/bin/pg_dump/pg_dump.h | 1 + src/bin/pg_dump/t/002_pg_dump.pl | 32 +++++++- src/test/regress/expected/constraints.out | 2 + src/test/regress/sql/constraints.sql | 3 + 6 files changed, 118 insertions(+), 14 deletions(-) diff --git a/doc/src/sgml/ref/pg_dump.sgml b/doc/src/sgml/ref/pg_dump.sgml index 0d9270116549a..edbb377a5eda1 100644 --- a/doc/src/sgml/ref/pg_dump.sgml +++ b/doc/src/sgml/ref/pg_dump.sgml @@ -1281,7 +1281,7 @@ PostgreSQL documentation materialized views, and foriegn tables. Post-data items include definitions of indexes, triggers, rules, statistics for indexes, and constraints other than validated check - constraints. + and not-null constraints. Pre-data items include all other data definition items. diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index db944ec223071..1937997ea674d 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -350,7 +350,9 @@ static void buildMatViewRefreshDependencies(Archive *fout); static void getTableDataFKConstraints(void); static void determineNotNullFlags(Archive *fout, PGresult *res, int r, TableInfo *tbinfo, int j, - int i_notnull_name, int i_notnull_invalidoid, + int i_notnull_name, + int i_notnull_comment, + int i_notnull_invalidoid, int i_notnull_noinherit, int i_notnull_islocal, PQExpBuffer *invalidnotnulloids); @@ -9006,6 +9008,7 @@ getTableAttrs(Archive *fout, TableInfo *tblinfo, int numTables) int i_attalign; int i_attislocal; int i_notnull_name; + int i_notnull_comment; int i_notnull_noinherit; int i_notnull_islocal; int i_notnull_invalidoid; @@ -9089,7 +9092,8 @@ getTableAttrs(Archive *fout, TableInfo *tblinfo, int numTables) /* * Find out any NOT NULL markings for each column. In 18 and up we read - * pg_constraint to obtain the constraint name. notnull_noinherit is set + * pg_constraint to obtain the constraint name, and for valid constraints + * also pg_description to obtain its comment. notnull_noinherit is set * according to the NO INHERIT property. For versions prior to 18, we * store an empty string as the name when a constraint is marked as * attnotnull (this cues dumpTableSchema to print the NOT NULL clause @@ -9097,7 +9101,8 @@ getTableAttrs(Archive *fout, TableInfo *tblinfo, int numTables) * * For invalid constraints, we need to store their OIDs for processing * elsewhere, so we bring the pg_constraint.oid value when the constraint - * is invalid, and NULL otherwise. + * is invalid, and NULL otherwise. Their comments are handled not here + * but by collectComments, because they're their own dumpable object. * * We track in notnull_islocal whether the constraint was defined directly * in this table or via an ancestor, for binary upgrade. flagInhAttrs @@ -9107,6 +9112,8 @@ getTableAttrs(Archive *fout, TableInfo *tblinfo, int numTables) if (fout->remoteVersion >= 180000) appendPQExpBufferStr(q, "co.conname AS notnull_name,\n" + "CASE WHEN co.convalidated THEN pt.description" + " ELSE NULL END AS notnull_comment,\n" "CASE WHEN NOT co.convalidated THEN co.oid " "ELSE NULL END AS notnull_invalidoid,\n" "co.connoinherit AS notnull_noinherit,\n" @@ -9114,6 +9121,7 @@ getTableAttrs(Archive *fout, TableInfo *tblinfo, int numTables) else appendPQExpBufferStr(q, "CASE WHEN a.attnotnull THEN '' ELSE NULL END AS notnull_name,\n" + "NULL AS notnull_comment,\n" "NULL AS notnull_invalidoid,\n" "false AS notnull_noinherit,\n" "a.attislocal AS notnull_islocal,\n"); @@ -9157,15 +9165,16 @@ getTableAttrs(Archive *fout, TableInfo *tblinfo, int numTables) /* * In versions 18 and up, we need pg_constraint for explicit NOT NULL - * entries. Also, we need to know if the NOT NULL for each column is - * backing a primary key. + * entries and pg_description to get their comments. */ if (fout->remoteVersion >= 180000) appendPQExpBufferStr(q, " LEFT JOIN pg_catalog.pg_constraint co ON " "(a.attrelid = co.conrelid\n" " AND co.contype = 'n' AND " - "co.conkey = array[a.attnum])\n"); + "co.conkey = array[a.attnum])\n" + " LEFT JOIN pg_catalog.pg_description pt ON " + "(pt.classoid = co.tableoid AND pt.objoid = co.oid)\n"); appendPQExpBufferStr(q, "WHERE a.attnum > 0::pg_catalog.int2\n" @@ -9189,6 +9198,7 @@ getTableAttrs(Archive *fout, TableInfo *tblinfo, int numTables) i_attalign = PQfnumber(res, "attalign"); i_attislocal = PQfnumber(res, "attislocal"); i_notnull_name = PQfnumber(res, "notnull_name"); + i_notnull_comment = PQfnumber(res, "notnull_comment"); i_notnull_invalidoid = PQfnumber(res, "notnull_invalidoid"); i_notnull_noinherit = PQfnumber(res, "notnull_noinherit"); i_notnull_islocal = PQfnumber(res, "notnull_islocal"); @@ -9257,6 +9267,7 @@ getTableAttrs(Archive *fout, TableInfo *tblinfo, int numTables) tbinfo->attfdwoptions = (char **) pg_malloc(numatts * sizeof(char *)); tbinfo->attmissingval = (char **) pg_malloc(numatts * sizeof(char *)); tbinfo->notnull_constrs = (char **) pg_malloc(numatts * sizeof(char *)); + tbinfo->notnull_comment = (char **) pg_malloc(numatts * sizeof(char *)); tbinfo->notnull_invalid = (bool *) pg_malloc(numatts * sizeof(bool)); tbinfo->notnull_noinh = (bool *) pg_malloc(numatts * sizeof(bool)); tbinfo->notnull_islocal = (bool *) pg_malloc(numatts * sizeof(bool)); @@ -9288,11 +9299,14 @@ getTableAttrs(Archive *fout, TableInfo *tblinfo, int numTables) determineNotNullFlags(fout, res, r, tbinfo, j, i_notnull_name, + i_notnull_comment, i_notnull_invalidoid, i_notnull_noinherit, i_notnull_islocal, &invalidnotnulloids); + tbinfo->notnull_comment[j] = PQgetisnull(res, r, i_notnull_comment) ? + NULL : pg_strdup(PQgetvalue(res, r, i_notnull_comment)); tbinfo->attoptions[j] = pg_strdup(PQgetvalue(res, r, i_attoptions)); tbinfo->attcollation[j] = atooid(PQgetvalue(res, r, i_attcollation)); tbinfo->attcompression[j] = *(PQgetvalue(res, r, i_attcompression)); @@ -9704,8 +9718,9 @@ getTableAttrs(Archive *fout, TableInfo *tblinfo, int numTables) * 4) The column has a constraint with a known name; in that case * notnull_constrs carries that name and dumpTableSchema will print * "CONSTRAINT the_name NOT NULL". However, if the name is the default - * (table_column_not_null), there's no need to print that name in the dump, - * so notnull_constrs is set to the empty string and it behaves as case 2. + * (table_column_not_null) and there's no comment on the constraint, + * there's no need to print that name in the dump, so notnull_constrs + * is set to the empty string and it behaves as case 2. * * In a child table that inherits from a parent already containing NOT NULL * constraints and the columns in the child don't have their own NOT NULL @@ -9732,6 +9747,7 @@ static void determineNotNullFlags(Archive *fout, PGresult *res, int r, TableInfo *tbinfo, int j, int i_notnull_name, + int i_notnull_comment, int i_notnull_invalidoid, int i_notnull_noinherit, int i_notnull_islocal, @@ -9805,11 +9821,13 @@ determineNotNullFlags(Archive *fout, PGresult *res, int r, { /* * In binary upgrade of inheritance child tables, must have a - * constraint name that we can UPDATE later. + * constraint name that we can UPDATE later; same if there's a + * comment on the constraint. */ - if (dopt->binary_upgrade && - !tbinfo->ispartition && - !tbinfo->notnull_islocal) + if ((dopt->binary_upgrade && + !tbinfo->ispartition && + !tbinfo->notnull_islocal) || + !PQgetisnull(res, r, i_notnull_comment)) { tbinfo->notnull_constrs[j] = pstrdup(PQgetvalue(res, r, i_notnull_name)); @@ -17686,6 +17704,56 @@ dumpTableSchema(Archive *fout, const TableInfo *tbinfo) if (tbinfo->dobj.dump & DUMP_COMPONENT_SECLABEL) dumpTableSecLabel(fout, tbinfo, reltypename); + /* + * Dump comments for not-null constraints that aren't to be dumped + * separately (those are processed by collectComments/dumpComment). + */ + if (!fout->dopt->no_comments && dopt->dumpSchema && + fout->remoteVersion >= 180000) + { + PQExpBuffer comment = NULL; + PQExpBuffer tag = NULL; + + for (j = 0; j < tbinfo->numatts; j++) + { + if (tbinfo->notnull_constrs[j] != NULL && + tbinfo->notnull_comment[j] != NULL) + { + if (comment == NULL) + { + comment = createPQExpBuffer(); + tag = createPQExpBuffer(); + } + else + { + resetPQExpBuffer(comment); + resetPQExpBuffer(tag); + } + + appendPQExpBuffer(comment, "COMMENT ON CONSTRAINT %s ON %s IS ", + fmtId(tbinfo->notnull_constrs[j]), qualrelname); + appendStringLiteralAH(comment, tbinfo->notnull_comment[j], fout); + appendPQExpBufferStr(comment, ";\n"); + + appendPQExpBuffer(tag, "CONSTRAINT %s ON %s", + fmtId(tbinfo->notnull_constrs[j]), qrelname); + + ArchiveEntry(fout, nilCatalogId, createDumpId(), + ARCHIVE_OPTS(.tag = tag->data, + .namespace = tbinfo->dobj.namespace->dobj.name, + .owner = tbinfo->rolname, + .description = "COMMENT", + .section = SECTION_NONE, + .createStmt = comment->data, + .deps = &(tbinfo->dobj.dumpId), + .nDeps = 1)); + } + } + + destroyPQExpBuffer(comment); + destroyPQExpBuffer(tag); + } + /* Dump comments on inlined table constraints */ for (j = 0; j < tbinfo->ncheck; j++) { diff --git a/src/bin/pg_dump/pg_dump.h b/src/bin/pg_dump/pg_dump.h index 7417eab6aefa6..39eef1d6617f4 100644 --- a/src/bin/pg_dump/pg_dump.h +++ b/src/bin/pg_dump/pg_dump.h @@ -365,6 +365,7 @@ typedef struct _tableInfo * there isn't one on this column. If * empty string, unnamed constraint * (pre-v17) */ + char **notnull_comment; /* comment thereof */ bool *notnull_invalid; /* true for NOT NULL NOT VALID */ bool *notnull_noinh; /* NOT NULL is NO INHERIT */ bool *notnull_islocal; /* true if NOT NULL has local definition */ diff --git a/src/bin/pg_dump/t/002_pg_dump.pl b/src/bin/pg_dump/t/002_pg_dump.pl index 386e21e0c596a..e1cfa99874ec4 100644 --- a/src/bin/pg_dump/t/002_pg_dump.pl +++ b/src/bin/pg_dump/t/002_pg_dump.pl @@ -1191,7 +1191,9 @@ ) INHERITS (dump_test.test_table_nn, dump_test.test_table_nn_2); ALTER TABLE dump_test.test_table_nn ADD CONSTRAINT nn NOT NULL col1 NOT VALID; ALTER TABLE dump_test.test_table_nn_chld1 VALIDATE CONSTRAINT nn; - ALTER TABLE dump_test.test_table_nn_chld2 VALIDATE CONSTRAINT nn;', + ALTER TABLE dump_test.test_table_nn_chld2 VALIDATE CONSTRAINT nn; + COMMENT ON CONSTRAINT nn ON dump_test.test_table_nn IS \'nn comment is valid\'; + COMMENT ON CONSTRAINT nn ON dump_test.test_table_nn_chld2 IS \'nn_chld2 comment is valid\';', regexp => qr/^ \QALTER TABLE dump_test.test_table_nn\E \n^\s+ \QADD CONSTRAINT nn NOT NULL col1 NOT VALID;\E @@ -1205,6 +1207,34 @@ }, }, + # This constraint is invalid therefore it goes in SECTION_POST_DATA + 'COMMENT ON CONSTRAINT ON test_table_nn' => { + regexp => qr/^ + \QCOMMENT ON CONSTRAINT nn ON dump_test.test_table_nn IS\E + /xm, + like => { + %full_runs, %dump_test_schema_runs, section_post_data => 1, + }, + unlike => { + exclude_dump_test_schema => 1, + only_dump_measurement => 1, + }, + }, + + # This constraint is valid therefore it goes in SECTION_PRE_DATA + 'COMMENT ON CONSTRAINT ON test_table_chld2' => { + regexp => qr/^ + \QCOMMENT ON CONSTRAINT nn ON dump_test.test_table_nn_chld2 IS\E + /xm, + like => { + %full_runs, %dump_test_schema_runs, section_pre_data => 1, + }, + unlike => { + exclude_dump_test_schema => 1, + only_dump_measurement => 1, + }, + }, + 'CONSTRAINT NOT NULL / NOT VALID (child1)' => { regexp => qr/^ \QCREATE TABLE dump_test.test_table_nn_chld1 (\E\n diff --git a/src/test/regress/expected/constraints.out b/src/test/regress/expected/constraints.out index ad6aaab738538..b5592617d9755 100644 --- a/src/test/regress/expected/constraints.out +++ b/src/test/regress/expected/constraints.out @@ -1659,6 +1659,8 @@ EXECUTE get_nnconstraint_info('{constr_parent3, constr_child3}'); constr_parent3 | constr_parent3_a_not_null | t | t | 0 (2 rows) +COMMENT ON CONSTRAINT constr_parent2_a_not_null ON constr_parent2 IS 'this constraint is invalid'; +COMMENT ON CONSTRAINT constr_parent2_a_not_null ON constr_child2 IS 'this constraint is valid'; DEALLOCATE get_nnconstraint_info; -- end NOT NULL NOT VALID -- Comments diff --git a/src/test/regress/sql/constraints.sql b/src/test/regress/sql/constraints.sql index 337baab7ced93..12668f0e0ce0f 100644 --- a/src/test/regress/sql/constraints.sql +++ b/src/test/regress/sql/constraints.sql @@ -997,6 +997,9 @@ create table constr_parent3 (a int not null); create table constr_child3 () inherits (constr_parent2, constr_parent3); EXECUTE get_nnconstraint_info('{constr_parent3, constr_child3}'); +COMMENT ON CONSTRAINT constr_parent2_a_not_null ON constr_parent2 IS 'this constraint is invalid'; +COMMENT ON CONSTRAINT constr_parent2_a_not_null ON constr_child2 IS 'this constraint is valid'; + DEALLOCATE get_nnconstraint_info; -- end NOT NULL NOT VALID From 48c80aba7538d7c515d0c89f4d11f88974fee851 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Herrera?= Date: Thu, 26 Jun 2025 18:25:05 +0200 Subject: [PATCH 158/602] docs: fix typo --- doc/src/sgml/ref/pg_dump.sgml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/src/sgml/ref/pg_dump.sgml b/doc/src/sgml/ref/pg_dump.sgml index edbb377a5eda1..2ae084b5fa6fc 100644 --- a/doc/src/sgml/ref/pg_dump.sgml +++ b/doc/src/sgml/ref/pg_dump.sgml @@ -1278,7 +1278,7 @@ PostgreSQL documentation The data section contains actual table data, large-object contents, sequence values, and statistics for tables, - materialized views, and foriegn tables. + materialized views, and foreign tables. Post-data items include definitions of indexes, triggers, rules, statistics for indexes, and constraints other than validated check and not-null constraints. From a3994ec6acb27545300ce1e336e4d119d8000ba9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Herrera?= Date: Thu, 26 Jun 2025 18:33:48 +0200 Subject: [PATCH 159/602] Fix typo in comment Introduced by c2da1a5d6325 Reported-by: Michael Paquier Discussion: https://postgr.es/m/aFt4qeRwrV-3qNix@paquier.xyz --- contrib/pg_stat_statements/pg_stat_statements.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c index 5597fcaaa053d..e7857f81ec057 100644 --- a/contrib/pg_stat_statements/pg_stat_statements.c +++ b/contrib/pg_stat_statements/pg_stat_statements.c @@ -2844,9 +2844,9 @@ generate_normalized_query(JumbleState *jstate, const char *query, /* * If we have an external param at this location, but no lists are * being squashed across the query, then we skip here; this will make - * us print print the characters found in the original query that - * represent the parameter in the next iteration (or after the loop is - * done), which is a bit odd but seems to work okay in most cases. + * us print the characters found in the original query that represent + * the parameter in the next iteration (or after the loop is done), + * which is a bit odd but seems to work okay in most cases. */ if (jstate->clocations[i].extern_param && !jstate->has_squashed_lists) continue; From 060f420a03a8d8186423e7d64302b72e01365c20 Mon Sep 17 00:00:00 2001 From: Melanie Plageman Date: Thu, 26 Jun 2025 14:25:45 -0400 Subject: [PATCH 160/602] Simplify vacuum VM update logging counters We can simplify the VM counters added in dc6acfd910b8 to lazy_vacuum_heap_page() and lazy_scan_new_or_empty(). We won't invoke lazy_vacuum_heap_page() unless there are dead line pointers, so we know the page can't be all-visible. In lazy_scan_new_or_empty(), we only update the VM if the page-level hint PD_ALL_VISIBLE is clear, and the VM bit cannot be set if the page level bit is clear because a subsequent page update would fail to clear the visibility map bit. Simplify the logic for determining which log counters to increment based on this knowledge. Doing so is worthwhile because the old logic was confusing and misguided. Author: Melanie Plageman Reviewed-by: Nazir Bilal Yavuz Reviewed-by: Masahiko Sawada Discussion: https://postgr.es/m/flat/CAAKRu_a9w_n2mwY%3DG4LjfWTvRTJtjbfvnYAKi4WjO8QXHHrA0g%40mail.gmail.com --- src/backend/access/heap/vacuumlazy.c | 53 +++++++++------------------- 1 file changed, 16 insertions(+), 37 deletions(-) diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index 09416450af962..8a42e17aec210 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -1872,8 +1872,6 @@ lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf, BlockNumber blkno, */ if (!PageIsAllVisible(page)) { - uint8 old_vmbits; - START_CRIT_SECTION(); /* mark buffer dirty before writing a WAL record */ @@ -1893,24 +1891,16 @@ lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf, BlockNumber blkno, log_newpage_buffer(buf, true); PageSetAllVisible(page); - old_vmbits = visibilitymap_set(vacrel->rel, blkno, buf, - InvalidXLogRecPtr, - vmbuffer, InvalidTransactionId, - VISIBILITYMAP_ALL_VISIBLE | - VISIBILITYMAP_ALL_FROZEN); + visibilitymap_set(vacrel->rel, blkno, buf, + InvalidXLogRecPtr, + vmbuffer, InvalidTransactionId, + VISIBILITYMAP_ALL_VISIBLE | + VISIBILITYMAP_ALL_FROZEN); END_CRIT_SECTION(); - /* - * If the page wasn't already set all-visible and/or all-frozen in - * the VM, count it as newly set for logging. - */ - if ((old_vmbits & VISIBILITYMAP_ALL_VISIBLE) == 0) - { - vacrel->vm_new_visible_pages++; - vacrel->vm_new_visible_frozen_pages++; - } - else if ((old_vmbits & VISIBILITYMAP_ALL_FROZEN) == 0) - vacrel->vm_new_frozen_pages++; + /* Count the newly all-frozen pages for logging */ + vacrel->vm_new_visible_pages++; + vacrel->vm_new_visible_frozen_pages++; } freespace = PageGetHeapFreeSpace(page); @@ -2915,7 +2905,6 @@ lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer, if (heap_page_is_all_visible(vacrel, buffer, &visibility_cutoff_xid, &all_frozen)) { - uint8 old_vmbits; uint8 flags = VISIBILITYMAP_ALL_VISIBLE; if (all_frozen) @@ -2925,25 +2914,15 @@ lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer, } PageSetAllVisible(page); - old_vmbits = visibilitymap_set(vacrel->rel, blkno, buffer, - InvalidXLogRecPtr, - vmbuffer, visibility_cutoff_xid, - flags); + visibilitymap_set(vacrel->rel, blkno, buffer, + InvalidXLogRecPtr, + vmbuffer, visibility_cutoff_xid, + flags); - /* - * If the page wasn't already set all-visible and/or all-frozen in the - * VM, count it as newly set for logging. - */ - if ((old_vmbits & VISIBILITYMAP_ALL_VISIBLE) == 0) - { - vacrel->vm_new_visible_pages++; - if (all_frozen) - vacrel->vm_new_visible_frozen_pages++; - } - - else if ((old_vmbits & VISIBILITYMAP_ALL_FROZEN) == 0 && - all_frozen) - vacrel->vm_new_frozen_pages++; + /* Count the newly set VM page for logging */ + vacrel->vm_new_visible_pages++; + if (all_frozen) + vacrel->vm_new_visible_frozen_pages++; } /* Revert to the previous phase information for error traceback */ From 483f7246f39b3af250fed1e613d962b85b568861 Mon Sep 17 00:00:00 2001 From: Melanie Plageman Date: Thu, 26 Jun 2025 15:03:48 -0400 Subject: [PATCH 161/602] Remove unused check in heap_xlog_insert() 8e03eb92e9a reverted the commit 39b66a91bd which allowed freezing in the heap_insert() code path but forgot to remove the corresponding check in heap_xlog_insert(). This code is extraneous but not harmful. However, cleaning it up makes it very clear that, as of now, we do not support any freezing of pages in the heap_insert() path. Author: Melanie Plageman Reviewed-by: Tomas Vondra Discussion: https://postgr.es/m/flat/CAAKRu_Zp4Pi-t51OFWm1YZ-cctDfBhHCMZ%3DEx6PKxv0o8y2GvA%40mail.gmail.com Backpatch-through: 14 --- src/backend/access/heap/heapam_xlog.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/backend/access/heap/heapam_xlog.c b/src/backend/access/heap/heapam_xlog.c index 30f4c2d3c6719..eb4bd3d6ae3a3 100644 --- a/src/backend/access/heap/heapam_xlog.c +++ b/src/backend/access/heap/heapam_xlog.c @@ -438,6 +438,9 @@ heap_xlog_insert(XLogReaderState *record) ItemPointerSetBlockNumber(&target_tid, blkno); ItemPointerSetOffsetNumber(&target_tid, xlrec->offnum); + /* No freezing in the heap_insert() code path */ + Assert(!(xlrec->flags & XLH_INSERT_ALL_FROZEN_SET)); + /* * The visibility map may need to be fixed even if the heap page is * already up-to-date. @@ -508,10 +511,6 @@ heap_xlog_insert(XLogReaderState *record) if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED) PageClearAllVisible(page); - /* XLH_INSERT_ALL_FROZEN_SET implies that all tuples are visible */ - if (xlrec->flags & XLH_INSERT_ALL_FROZEN_SET) - PageSetAllVisible(page); - MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) From 95e12d4d9b228855af8a2a34ca28c33924d4edd1 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Thu, 26 Jun 2025 22:02:16 +0200 Subject: [PATCH 162/602] Correct misleading error messages Commit 7d6d2c4bbd7 dropped opcintype from the index AM strategy translation API. But some error messages about failed lookups still mentioned it, even though it was not used for the lookup. Fix by removing ipcintype from the error messages as well. --- src/backend/commands/indexcmds.c | 4 ++-- src/backend/commands/tablecmds.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c index f2898fee5fcd5..6f753ab6d7a0d 100644 --- a/src/backend/commands/indexcmds.c +++ b/src/backend/commands/indexcmds.c @@ -2469,8 +2469,8 @@ GetOperatorFromCompareType(Oid opclass, Oid rhstype, CompareType cmptype, cmptype == COMPARE_EQ ? errmsg("could not identify an equality operator for type %s", format_type_be(opcintype)) : cmptype == COMPARE_OVERLAP ? errmsg("could not identify an overlaps operator for type %s", format_type_be(opcintype)) : cmptype == COMPARE_CONTAINED_BY ? errmsg("could not identify a contained-by operator for type %s", format_type_be(opcintype)) : 0, - errdetail("Could not translate compare type %d for operator family \"%s\", input type %s, access method \"%s\".", - cmptype, get_opfamily_name(opfamily, false), format_type_be(opcintype), get_am_name(amid))); + errdetail("Could not translate compare type %d for operator family \"%s\" of access method \"%s\".", + cmptype, get_opfamily_name(opfamily, false), get_am_name(amid))); /* * We parameterize rhstype so foreign keys can ask for a <@ operator diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 1c3ad74e7b9e9..e2b94c8c6098b 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -10330,8 +10330,8 @@ ATAddForeignKeyConstraint(List **wqueue, AlteredTableInfo *tab, Relation rel, for_overlaps ? errmsg("could not identify an overlaps operator for foreign key") : errmsg("could not identify an equality operator for foreign key"), - errdetail("Could not translate compare type %d for operator family \"%s\", input type %s, access method \"%s\".", - cmptype, get_opfamily_name(opfamily, false), format_type_be(opcintype), get_am_name(amid))); + errdetail("Could not translate compare type %d for operator family \"%s\" of access method \"%s\".", + cmptype, get_opfamily_name(opfamily, false), get_am_name(amid))); /* * There had better be a primary equality operator for the index. From 7fb3c38e7d7d12a742e1e7600879570251e1886a Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Thu, 26 Jun 2025 22:13:53 +0200 Subject: [PATCH 163/602] libpq: Message style improvements --- src/interfaces/libpq/fe-connect.c | 2 +- src/interfaces/libpq/fe-protocol3.c | 7 ++++--- src/interfaces/libpq/fe-secure-openssl.c | 4 ++-- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/interfaces/libpq/fe-connect.c b/src/interfaces/libpq/fe-connect.c index ccb01aad36109..51a9c41658455 100644 --- a/src/interfaces/libpq/fe-connect.c +++ b/src/interfaces/libpq/fe-connect.c @@ -2141,7 +2141,7 @@ pqConnectOptions2(PGconn *conn) if (conn->min_pversion > conn->max_pversion) { conn->status = CONNECTION_BAD; - libpq_append_conn_error(conn, "min_protocol_version is greater than max_protocol_version"); + libpq_append_conn_error(conn, "\"%s\" is greater than \"%s\"", "min_protocol_version", "max_protocol_version"); return false; } diff --git a/src/interfaces/libpq/fe-protocol3.c b/src/interfaces/libpq/fe-protocol3.c index beb1c889aad73..1599de757d130 100644 --- a/src/interfaces/libpq/fe-protocol3.c +++ b/src/interfaces/libpq/fe-protocol3.c @@ -1434,7 +1434,7 @@ pqGetNegotiateProtocolVersion3(PGconn *conn) /* 3.1 never existed, we went straight from 3.0 to 3.2 */ if (their_version == PG_PROTOCOL(3, 1)) { - libpq_append_conn_error(conn, "received invalid protocol negotiation message: server requests downgrade to non-existent 3.1 protocol version"); + libpq_append_conn_error(conn, "received invalid protocol negotiation message: server requested downgrade to non-existent 3.1 protocol version"); goto failure; } @@ -1452,9 +1452,10 @@ pqGetNegotiateProtocolVersion3(PGconn *conn) if (their_version < conn->min_pversion) { - libpq_append_conn_error(conn, "server only supports protocol version %d.%d, but min_protocol_version was set to %d.%d", + libpq_append_conn_error(conn, "server only supports protocol version %d.%d, but \"%s\" was set to %d.%d", PG_PROTOCOL_MAJOR(their_version), PG_PROTOCOL_MINOR(their_version), + "min_protocol_version", PG_PROTOCOL_MAJOR(conn->min_pversion), PG_PROTOCOL_MINOR(conn->min_pversion)); @@ -1476,7 +1477,7 @@ pqGetNegotiateProtocolVersion3(PGconn *conn) } if (strncmp(conn->workBuffer.data, "_pq_.", 5) != 0) { - libpq_append_conn_error(conn, "received invalid protocol negotiation message: server reported unsupported parameter name without a _pq_. prefix (\"%s\")", conn->workBuffer.data); + libpq_append_conn_error(conn, "received invalid protocol negotiation message: server reported unsupported parameter name without a \"%s\" prefix (\"%s\")", "_pq_.", conn->workBuffer.data); goto failure; } libpq_append_conn_error(conn, "received invalid protocol negotiation message: server reported an unsupported parameter that was not requested (\"%s\")", conn->workBuffer.data); diff --git a/src/interfaces/libpq/fe-secure-openssl.c b/src/interfaces/libpq/fe-secure-openssl.c index 78f9e84eb353b..b08b3a6901b77 100644 --- a/src/interfaces/libpq/fe-secure-openssl.c +++ b/src/interfaces/libpq/fe-secure-openssl.c @@ -711,7 +711,7 @@ SSL_CTX_keylog_cb(const SSL *ssl, const char *line) if (fd == -1) { - libpq_append_conn_error(conn, "could not open ssl keylog file \"%s\": %s", + libpq_append_conn_error(conn, "could not open SSL key logging file \"%s\": %s", conn->sslkeylogfile, pg_strerror(errno)); return; } @@ -719,7 +719,7 @@ SSL_CTX_keylog_cb(const SSL *ssl, const char *line) /* line is guaranteed by OpenSSL to be NUL terminated */ rc = write(fd, line, strlen(line)); if (rc < 0) - libpq_append_conn_error(conn, "could not write to ssl keylog file \"%s\": %s", + libpq_append_conn_error(conn, "could not write to SSL key logging file \"%s\": %s", conn->sslkeylogfile, pg_strerror(errno)); else rc = write(fd, "\n", 1); From 94e2e150ec72a3b37e3847be99c4aca3320c38f9 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Fri, 27 Jun 2025 09:31:23 +0900 Subject: [PATCH 164/602] Correct list of files in src/backend/lib/README binaryheap.c and stringinfo.c have been moved to src/common/ by respectively 5af0263afd7b and 26aaf97b683d, and the README patched here still mentioned these two files as available in src/backend/lib/. Author: Aleksander Alekseev Discussion: https://postgr.es/m/CAJ7c6TPg-=tC+fzq0tGTtmL7r79-aWeCmpwAyQiGu0N+sKGj8Q@mail.gmail.com --- src/backend/lib/README | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/backend/lib/README b/src/backend/lib/README index f2fb591237dba..c28cbe356f0b3 100644 --- a/src/backend/lib/README +++ b/src/backend/lib/README @@ -1,8 +1,6 @@ This directory contains a general purpose data structures, for use anywhere in the backend: -binaryheap.c - a binary heap - bipartite_match.c - Hopcroft-Karp maximum cardinality algorithm for bipartite graphs bloomfilter.c - probabilistic, space-efficient set membership testing @@ -21,8 +19,6 @@ pairingheap.c - a pairing heap rbtree.c - a red-black tree -stringinfo.c - an extensible string type - Aside from the inherent characteristics of the data structures, there are a few practical differences between the binary heap and the pairing heap. The From 7195c804bd12f47a9f1b2df9c2e1794bb04c5987 Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Fri, 27 Jun 2025 11:49:00 +0300 Subject: [PATCH 165/602] Fix CheckPointReplicationSlots() with max_replication_slots == 0 ca307d5cec90 made CheckPointReplicationSlots() unconditionally call ReplicationSlotsComputeRequiredLSN(). It causes an assertion trap when max_replication_slots equals 0. This commit makes CheckPointReplicationSlots() call ReplicationSlotsComputeRequiredLSN() only when at least one slot gets its last_saved_restart_lsn updated. That avoids an assert trap and also saves some cycles when no one slot has last_saved_restart_lsn updated. Based on ideas from Dilip Kumar and Hayato Kuroda . Reported-by: Zhijie Hou Discussion: https://postgr.es/m/OS0PR01MB5716BB506AF934376FF3A8BB947BA%40OS0PR01MB5716.jpnprd01.prod.outlook.com --- src/backend/replication/slot.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c index c11e588d63221..f9fec50ae883f 100644 --- a/src/backend/replication/slot.c +++ b/src/backend/replication/slot.c @@ -2079,6 +2079,7 @@ void CheckPointReplicationSlots(bool is_shutdown) { int i; + bool last_saved_restart_lsn_updated = false; elog(DEBUG1, "performing replication slot checkpoint"); @@ -2123,15 +2124,23 @@ CheckPointReplicationSlots(bool is_shutdown) SpinLockRelease(&s->mutex); } + /* + * Track if we're going to update slot's last_saved_restart_lsn. We + * need this to know if we need to recompute the required LSN. + */ + if (s->last_saved_restart_lsn != s->data.restart_lsn) + last_saved_restart_lsn_updated = true; + SaveSlotToPath(s, path, LOG); } LWLockRelease(ReplicationSlotAllocationLock); /* - * Recompute the required LSN as SaveSlotToPath() updated - * last_saved_restart_lsn for slots. + * Recompute the required LSN if SaveSlotToPath() updated + * last_saved_restart_lsn for any slot. */ - ReplicationSlotsComputeRequiredLSN(); + if (last_saved_restart_lsn_updated) + ReplicationSlotsComputeRequiredLSN(); } /* From bbccf7ecb363e50ae9d9aa71d0e7c6d49ee0bb06 Mon Sep 17 00:00:00 2001 From: Nathan Bossart Date: Fri, 27 Jun 2025 13:37:26 -0500 Subject: [PATCH 166/602] Use correct DatumGet*() function in test_shm_mq_main(). This is purely cosmetic, as dsm_attach() interprets its argument as a dsm_handle (i.e., an unsigned integer), but we might as well fix it. Oversight in commit 4db3744f1f. Author: Jianghua Yang Reviewed-by: Tom Lane Discussion: https://postgr.es/m/CAAZLFmRxkUD5jRs0W3K%3DUe4_ZS%2BRcAb0PCE1S0vVJBn3sWH2UQ%40mail.gmail.com Backpatch-through: 13 --- src/test/modules/test_shm_mq/worker.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/modules/test_shm_mq/worker.c b/src/test/modules/test_shm_mq/worker.c index 96cd304dbbc83..c1d321b69a427 100644 --- a/src/test/modules/test_shm_mq/worker.c +++ b/src/test/modules/test_shm_mq/worker.c @@ -77,7 +77,7 @@ test_shm_mq_main(Datum main_arg) * exit, which is fine. If there were a ResourceOwner, it would acquire * ownership of the mapping, but we have no need for that. */ - seg = dsm_attach(DatumGetInt32(main_arg)); + seg = dsm_attach(DatumGetUInt32(main_arg)); if (seg == NULL) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), From 50fd428b2b9cb036c9c5982b56443d7e28119707 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Sat, 28 Jun 2025 19:18:06 +0200 Subject: [PATCH 167/602] Message style improvements --- src/backend/access/heap/vacuumlazy.c | 2 +- src/backend/catalog/heap.c | 2 +- src/backend/commands/matview.c | 3 ++- src/backend/commands/publicationcmds.c | 4 ++-- src/backend/commands/subscriptioncmds.c | 6 +++--- src/backend/commands/tablecmds.c | 11 ++++++----- src/backend/libpq/be-secure-openssl.c | 4 ++-- src/backend/replication/logical/launcher.c | 2 +- src/backend/replication/logical/slotsync.c | 4 ++-- src/backend/replication/pgoutput/pgoutput.c | 2 +- src/backend/tcop/backend_startup.c | 6 +++--- src/backend/utils/adt/formatting.c | 5 +++-- src/backend/utils/misc/guc_tables.c | 4 ++-- src/test/regress/expected/generated_virtual.out | 4 ++-- src/test/regress/expected/inherit.out | 4 ++-- src/test/regress/expected/matview.out | 2 +- src/test/regress/expected/publication.out | 5 +++-- src/test/regress/expected/without_overlaps.out | 4 ++-- src/test/subscription/t/024_add_drop_pub.pl | 2 +- 19 files changed, 40 insertions(+), 36 deletions(-) diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index 8a42e17aec210..4111a8996b5a1 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -1428,7 +1428,7 @@ lazy_scan_heap(LVRelState *vacrel) */ if (vacrel->eager_scan_max_fails_per_region > 0) ereport(vacrel->verbose ? INFO : DEBUG2, - (errmsg("disabling eager scanning after freezing %u eagerly scanned blocks of \"%s.%s.%s\"", + (errmsg("disabling eager scanning after freezing %u eagerly scanned blocks of relation \"%s.%s.%s\"", orig_eager_scan_success_limit, vacrel->dbname, vacrel->relnamespace, vacrel->relname))); diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index 649d3966e8e21..fd6537567ea27 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -3006,7 +3006,7 @@ AddRelationNotNullConstraints(Relation rel, List *constraints, if (constr->is_no_inherit) ereport(ERROR, (errcode(ERRCODE_DATATYPE_MISMATCH), - errmsg("cannot define not-null constraint on column \"%s\" with NO INHERIT", + errmsg("cannot define not-null constraint with NO INHERIT on column \"%s\"", strVal(linitial(constr->keys))), errdetail("The column has an inherited not-null constraint."))); diff --git a/src/backend/commands/matview.c b/src/backend/commands/matview.c index 27c2cb26ef5f3..188e26f0e6e29 100644 --- a/src/backend/commands/matview.c +++ b/src/backend/commands/matview.c @@ -835,7 +835,8 @@ refresh_by_match_merge(Oid matviewOid, Oid tempOid, Oid relowner, if (!foundUniqueIndex) ereport(ERROR, errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("could not find suitable unique index on materialized view")); + errmsg("could not find suitable unique index on materialized view \"%s\"", + RelationGetRelationName(matviewRel))); appendStringInfoString(&querybuf, " AND newdata.* OPERATOR(pg_catalog.*=) mv.*) " diff --git a/src/backend/commands/publicationcmds.c b/src/backend/commands/publicationcmds.c index 0b23d94c38e20..1bf7eaae5b362 100644 --- a/src/backend/commands/publicationcmds.c +++ b/src/backend/commands/publicationcmds.c @@ -2130,8 +2130,8 @@ defGetGeneratedColsOption(DefElem *def) ereport(ERROR, errcode(ERRCODE_SYNTAX_ERROR), - errmsg("%s requires a \"none\" or \"stored\" value", - def->defname)); + errmsg("invalid value for publication parameter \"%s\": \"%s\"", def->defname, sval), + errdetail("Valid values are \"%s\" and \"%s\".", "none", "stored")); return PUBLISH_GENCOLS_NONE; /* keep compiler quiet */ } diff --git a/src/backend/commands/subscriptioncmds.c b/src/backend/commands/subscriptioncmds.c index 4aec73bcc6bbc..4ff246cd94321 100644 --- a/src/backend/commands/subscriptioncmds.c +++ b/src/backend/commands/subscriptioncmds.c @@ -1267,7 +1267,7 @@ AlterSubscription(ParseState *pstate, AlterSubscriptionStmt *stmt, IsSet(opts.specified_opts, SUBOPT_SLOT_NAME)) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("slot_name and two_phase cannot be altered at the same time"))); + errmsg("\"slot_name\" and \"two_phase\" cannot be altered at the same time"))); /* * Note that workers may still survive even if the @@ -1283,7 +1283,7 @@ AlterSubscription(ParseState *pstate, AlterSubscriptionStmt *stmt, if (logicalrep_workers_find(subid, true, true)) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), - errmsg("cannot alter two_phase when logical replication worker is still running"), + errmsg("cannot alter \"two_phase\" when logical replication worker is still running"), errhint("Try again after some time."))); /* @@ -1297,7 +1297,7 @@ AlterSubscription(ParseState *pstate, AlterSubscriptionStmt *stmt, LookupGXactBySubid(subid)) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), - errmsg("cannot disable two_phase when prepared transactions are present"), + errmsg("cannot disable \"two_phase\" when prepared transactions exist"), errhint("Resolve these transactions and try again."))); /* Change system catalog accordingly */ diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index e2b94c8c6098b..991bc946ffc44 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -8609,7 +8609,7 @@ ATExecSetExpression(AlteredTableInfo *tab, Relation rel, const char *colName, rel->rd_att->constr && rel->rd_att->constr->num_check > 0) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("ALTER TABLE / SET EXPRESSION is not supported for virtual generated columns on tables with check constraints"), + errmsg("ALTER TABLE / SET EXPRESSION is not supported for virtual generated columns in tables with check constraints"), errdetail("Column \"%s\" of relation \"%s\" is a virtual generated column.", colName, RelationGetRelationName(rel)))); @@ -8627,7 +8627,7 @@ ATExecSetExpression(AlteredTableInfo *tab, Relation rel, const char *colName, GetRelationPublications(RelationGetRelid(rel)) != NIL) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("ALTER TABLE / SET EXPRESSION is not supported for virtual generated columns on tables that are part of a publication"), + errmsg("ALTER TABLE / SET EXPRESSION is not supported for virtual generated columns in tables that are part of a publication"), errdetail("Column \"%s\" of relation \"%s\" is a virtual generated column.", colName, RelationGetRelationName(rel)))); @@ -10189,7 +10189,7 @@ ATAddForeignKeyConstraint(List **wqueue, AlteredTableInfo *tab, Relation rel, if (pk_has_without_overlaps && !with_period) ereport(ERROR, errcode(ERRCODE_INVALID_FOREIGN_KEY), - errmsg("foreign key must use PERIOD when referencing a primary using WITHOUT OVERLAPS")); + errmsg("foreign key must use PERIOD when referencing a primary key using WITHOUT OVERLAPS")); /* * Now we can check permissions. @@ -12913,8 +12913,9 @@ ATExecValidateConstraint(List **wqueue, Relation rel, char *constrName, con->contype != CONSTRAINT_NOTNULL) ereport(ERROR, errcode(ERRCODE_WRONG_OBJECT_TYPE), - errmsg("constraint \"%s\" of relation \"%s\" is not a foreign key, check, or not-null constraint", - constrName, RelationGetRelationName(rel))); + errmsg("cannot validate constraint \"%s\" of relation \"%s\"", + constrName, RelationGetRelationName(rel)), + errdetail("This operation is not supported for this type of constraint.")); if (!con->conenforced) ereport(ERROR, diff --git a/src/backend/libpq/be-secure-openssl.c b/src/backend/libpq/be-secure-openssl.c index 64ff3ce3d6a7a..c8b63ef824900 100644 --- a/src/backend/libpq/be-secure-openssl.c +++ b/src/backend/libpq/be-secure-openssl.c @@ -1436,10 +1436,10 @@ initialize_ecdh(SSL_CTX *context, bool isServerStart) */ ereport(isServerStart ? FATAL : LOG, errcode(ERRCODE_CONFIG_FILE_ERROR), - errmsg("failed to set group names specified in ssl_groups: %s", + errmsg("could not set group names specified in ssl_groups: %s", SSLerrmessageExt(ERR_get_error(), _("No valid groups found"))), - errhint("Ensure that each group name is spelled correctly and supported by the installed version of OpenSSL")); + errhint("Ensure that each group name is spelled correctly and supported by the installed version of OpenSSL.")); return false; } #endif diff --git a/src/backend/replication/logical/launcher.c b/src/backend/replication/logical/launcher.c index 14d8efbd25bf5..4aed0dfcebb24 100644 --- a/src/backend/replication/logical/launcher.c +++ b/src/backend/replication/logical/launcher.c @@ -341,7 +341,7 @@ logicalrep_worker_launch(LogicalRepWorkerType wtype, if (max_active_replication_origins == 0) ereport(ERROR, (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED), - errmsg("cannot start logical replication workers when \"max_active_replication_origins\"=0"))); + errmsg("cannot start logical replication workers when \"max_active_replication_origins\" is 0"))); /* * We need to do the modification of the shared memory under lock so that diff --git a/src/backend/replication/logical/slotsync.c b/src/backend/replication/logical/slotsync.c index f1dcbebfa1ae7..3ec3abfa3da60 100644 --- a/src/backend/replication/logical/slotsync.c +++ b/src/backend/replication/logical/slotsync.c @@ -213,7 +213,7 @@ update_local_synced_slot(RemoteSlot *remote_slot, Oid remote_dbid, ereport(slot->data.persistency == RS_TEMPORARY ? LOG : DEBUG1, errmsg("could not synchronize replication slot \"%s\"", remote_slot->name), - errdetail("Synchronization could lead to data loss as the remote slot needs WAL at LSN %X/%X and catalog xmin %u, but the standby has LSN %X/%X and catalog xmin %u.", + errdetail("Synchronization could lead to data loss, because the remote slot needs WAL at LSN %X/%X and catalog xmin %u, but the standby has LSN %X/%X and catalog xmin %u.", LSN_FORMAT_ARGS(remote_slot->restart_lsn), remote_slot->catalog_xmin, LSN_FORMAT_ARGS(slot->data.restart_lsn), @@ -593,7 +593,7 @@ update_and_persist_local_synced_slot(RemoteSlot *remote_slot, Oid remote_dbid) { ereport(LOG, errmsg("could not synchronize replication slot \"%s\"", remote_slot->name), - errdetail("Synchronization could lead to data loss as standby could not build a consistent snapshot to decode WALs at LSN %X/%X.", + errdetail("Synchronization could lead to data loss, because the standby could not build a consistent snapshot to decode WALs at LSN %X/%X.", LSN_FORMAT_ARGS(slot->data.restart_lsn))); return false; diff --git a/src/backend/replication/pgoutput/pgoutput.c b/src/backend/replication/pgoutput/pgoutput.c index 693a766e6d75f..082b4d9d32798 100644 --- a/src/backend/replication/pgoutput/pgoutput.c +++ b/src/backend/replication/pgoutput/pgoutput.c @@ -1789,7 +1789,7 @@ LoadPublications(List *pubnames) else ereport(WARNING, errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), - errmsg("skipped loading publication: %s", pubname), + errmsg("skipped loading publication \"%s\"", pubname), errdetail("The publication does not exist at this point in the WAL."), errhint("Create the publication if it does not exist.")); } diff --git a/src/backend/tcop/backend_startup.c b/src/backend/tcop/backend_startup.c index a7d1fec981f88..ad0af5edc1f21 100644 --- a/src/backend/tcop/backend_startup.c +++ b/src/backend/tcop/backend_startup.c @@ -881,7 +881,7 @@ ProcessCancelRequestPacket(Port *port, void *pkt, int pktlen) { ereport(COMMERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION), - errmsg("invalid length of query cancel packet"))); + errmsg("invalid length of cancel request packet"))); return; } len = pktlen - offsetof(CancelRequestPacket, cancelAuthCode); @@ -889,7 +889,7 @@ ProcessCancelRequestPacket(Port *port, void *pkt, int pktlen) { ereport(COMMERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION), - errmsg("invalid length of query cancel key"))); + errmsg("invalid length of cancel key in cancel request packet"))); return; } @@ -1077,7 +1077,7 @@ check_log_connections(char **newval, void **extra, GucSource source) if (!SplitIdentifierString(rawstring, ',', &elemlist)) { - GUC_check_errdetail("Invalid list syntax in parameter \"log_connections\"."); + GUC_check_errdetail("Invalid list syntax in parameter \"%s\".", "log_connections"); pfree(rawstring); list_free(elemlist); return false; diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c index 5bd1e01f7e463..1d05481181db7 100644 --- a/src/backend/utils/adt/formatting.c +++ b/src/backend/utils/adt/formatting.c @@ -3590,14 +3590,15 @@ DCH_from_char(FormatNode *node, const char *in, TmFromChar *out, if (matched < 2) ereturn(escontext,, (errcode(ERRCODE_INVALID_DATETIME_FORMAT), - errmsg("invalid input string for \"Y,YYY\""))); + errmsg("invalid value \"%s\" for \"%s\"", + s, "Y,YYY"))); /* years += (millennia * 1000); */ if (pg_mul_s32_overflow(millennia, 1000, &millennia) || pg_add_s32_overflow(years, millennia, &years)) ereturn(escontext,, (errcode(ERRCODE_DATETIME_FIELD_OVERFLOW), - errmsg("value for \"Y,YYY\" in source string is out of range"))); + errmsg("value for \"%s\" in source string is out of range", "Y,YYY"))); if (!from_char_set_int(&out->year, years, n, escontext)) return; diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c index f04bfedb2fd10..511dc32d51921 100644 --- a/src/backend/utils/misc/guc_tables.c +++ b/src/backend/utils/misc/guc_tables.c @@ -1028,7 +1028,7 @@ struct config_bool ConfigureNamesBool[] = }, { {"enable_distinct_reordering", PGC_USERSET, QUERY_TUNING_METHOD, - gettext_noop("Enables reordering of DISTINCT pathkeys."), + gettext_noop("Enables reordering of DISTINCT keys."), NULL, GUC_EXPLAIN }, @@ -4837,7 +4837,7 @@ struct config_string ConfigureNamesString[] = { {"ssl_groups", PGC_SIGHUP, CONN_AUTH_SSL, gettext_noop("Sets the group(s) to use for Diffie-Hellman key exchange."), - gettext_noop("Multiple groups can be specified using colon-separated list."), + gettext_noop("Multiple groups can be specified using a colon-separated list."), GUC_SUPERUSER_ONLY }, &SSLECDHCurve, diff --git a/src/test/regress/expected/generated_virtual.out b/src/test/regress/expected/generated_virtual.out index 46713f06797e5..df704b5166fa3 100644 --- a/src/test/regress/expected/generated_virtual.out +++ b/src/test/regress/expected/generated_virtual.out @@ -634,10 +634,10 @@ INSERT INTO gtest20 (a) VALUES (30); -- violates constraint ERROR: new row for relation "gtest20" violates check constraint "gtest20_b_check" DETAIL: Failing row contains (30, virtual). ALTER TABLE gtest20 ALTER COLUMN b SET EXPRESSION AS (a * 100); -- violates constraint (currently not supported) -ERROR: ALTER TABLE / SET EXPRESSION is not supported for virtual generated columns on tables with check constraints +ERROR: ALTER TABLE / SET EXPRESSION is not supported for virtual generated columns in tables with check constraints DETAIL: Column "b" of relation "gtest20" is a virtual generated column. ALTER TABLE gtest20 ALTER COLUMN b SET EXPRESSION AS (a * 3); -- ok (currently not supported) -ERROR: ALTER TABLE / SET EXPRESSION is not supported for virtual generated columns on tables with check constraints +ERROR: ALTER TABLE / SET EXPRESSION is not supported for virtual generated columns in tables with check constraints DETAIL: Column "b" of relation "gtest20" is a virtual generated column. CREATE TABLE gtest20a (a int PRIMARY KEY, b int GENERATED ALWAYS AS (a * 2) VIRTUAL); INSERT INTO gtest20a (a) VALUES (10); diff --git a/src/test/regress/expected/inherit.out b/src/test/regress/expected/inherit.out index f9b0c415cfdcc..78dead65325e9 100644 --- a/src/test/regress/expected/inherit.out +++ b/src/test/regress/expected/inherit.out @@ -2281,7 +2281,7 @@ Inherits: pp1, create table cc3 (a2 int not null no inherit) inherits (cc1); NOTICE: moving and merging column "a2" with inherited definition DETAIL: User-specified column moved to the position of the inherited column. -ERROR: cannot define not-null constraint on column "a2" with NO INHERIT +ERROR: cannot define not-null constraint with NO INHERIT on column "a2" DETAIL: The column has an inherited not-null constraint. -- change NO INHERIT status of inherited constraint: no dice, it's inherited alter table cc2 add not null a2 no inherit; @@ -2530,7 +2530,7 @@ ERROR: conflicting NO INHERIT declaration for not-null constraint on column "a" CREATE TABLE inh_nn1 (a int not null); CREATE TABLE inh_nn2 (a int not null no inherit) INHERITS (inh_nn1); NOTICE: merging column "a" with inherited definition -ERROR: cannot define not-null constraint on column "a" with NO INHERIT +ERROR: cannot define not-null constraint with NO INHERIT on column "a" DETAIL: The column has an inherited not-null constraint. CREATE TABLE inh_nn3 (a int not null, b int, not null a no inherit); ERROR: conflicting NO INHERIT declaration for not-null constraint on column "a" diff --git a/src/test/regress/expected/matview.out b/src/test/regress/expected/matview.out index 54939ecc6b08a..c56c9fa3a2544 100644 --- a/src/test/regress/expected/matview.out +++ b/src/test/regress/expected/matview.out @@ -587,7 +587,7 @@ CREATE MATERIALIZED VIEW drop_idx_matview AS NOTICE: index "mvtest_drop_idx" does not exist, skipping CREATE UNIQUE INDEX mvtest_drop_idx ON drop_idx_matview (i); REFRESH MATERIALIZED VIEW CONCURRENTLY drop_idx_matview; -ERROR: could not find suitable unique index on materialized view +ERROR: could not find suitable unique index on materialized view "drop_idx_matview" DROP MATERIALIZED VIEW drop_idx_matview; -- clean up RESET search_path; -- make sure that create WITH NO DATA works via SPI diff --git a/src/test/regress/expected/publication.out b/src/test/regress/expected/publication.out index f1025fc0f198d..3a2eacd793f70 100644 --- a/src/test/regress/expected/publication.out +++ b/src/test/regress/expected/publication.out @@ -34,7 +34,8 @@ ERROR: conflicting or redundant options LINE 1: ...pub_xxx WITH (publish_generated_columns = stored, publish_ge... ^ CREATE PUBLICATION testpub_xxx WITH (publish_generated_columns = foo); -ERROR: publish_generated_columns requires a "none" or "stored" value +ERROR: invalid value for publication parameter "publish_generated_columns": "foo" +DETAIL: Valid values are "none" and "stored". \dRp List of publications Name | Owner | All tables | Inserts | Updates | Deletes | Truncates | Generated columns | Via root @@ -539,7 +540,7 @@ SET client_min_messages = 'ERROR'; CREATE TABLE testpub_rf_tbl7 (id int PRIMARY KEY, x int, y int GENERATED ALWAYS AS (x * 111) VIRTUAL); CREATE PUBLICATION testpub8 FOR TABLE testpub_rf_tbl7 WHERE (y > 100); ALTER TABLE testpub_rf_tbl7 ALTER COLUMN y SET EXPRESSION AS (x * testpub_rf_func2()); -ERROR: ALTER TABLE / SET EXPRESSION is not supported for virtual generated columns on tables that are part of a publication +ERROR: ALTER TABLE / SET EXPRESSION is not supported for virtual generated columns in tables that are part of a publication DETAIL: Column "y" of relation "testpub_rf_tbl7" is a virtual generated column. RESET client_min_messages; DROP TABLE testpub_rf_tbl1; diff --git a/src/test/regress/expected/without_overlaps.out b/src/test/regress/expected/without_overlaps.out index ea607bed0a412..f3144bdc39c21 100644 --- a/src/test/regress/expected/without_overlaps.out +++ b/src/test/regress/expected/without_overlaps.out @@ -1426,7 +1426,7 @@ CREATE TABLE temporal_fk_rng2rng ( CONSTRAINT temporal_fk_rng2rng_fk FOREIGN KEY (parent_id, valid_at) REFERENCES temporal_rng (id, valid_at) ); -ERROR: foreign key must use PERIOD when referencing a primary using WITHOUT OVERLAPS +ERROR: foreign key must use PERIOD when referencing a primary key using WITHOUT OVERLAPS -- (parent_id, valid_at) REFERENCES (id, PERIOD valid_at) -- FOREIGN KEY part should specify PERIOD CREATE TABLE temporal_fk_rng2rng ( @@ -1900,7 +1900,7 @@ CREATE TABLE temporal_fk_mltrng2mltrng ( CONSTRAINT temporal_fk_mltrng2mltrng_fk FOREIGN KEY (parent_id, valid_at) REFERENCES temporal_mltrng (id, valid_at) ); -ERROR: foreign key must use PERIOD when referencing a primary using WITHOUT OVERLAPS +ERROR: foreign key must use PERIOD when referencing a primary key using WITHOUT OVERLAPS -- (parent_id, valid_at) REFERENCES (id, PERIOD valid_at) -- FOREIGN KEY part should specify PERIOD CREATE TABLE temporal_fk_mltrng2mltrng ( diff --git a/src/test/subscription/t/024_add_drop_pub.pl b/src/test/subscription/t/024_add_drop_pub.pl index e995d8b383901..5298d43197900 100644 --- a/src/test/subscription/t/024_add_drop_pub.pl +++ b/src/test/subscription/t/024_add_drop_pub.pl @@ -112,7 +112,7 @@ # Verify that a warning is logged. $node_publisher->wait_for_log( - qr/WARNING: ( [A-Z0-9]+:)? skipped loading publication: tap_pub_3/, $offset); + qr/WARNING: ( [A-Z0-9]+:)? skipped loading publication "tap_pub_3"/, $offset); $node_publisher->safe_psql('postgres', "CREATE PUBLICATION tap_pub_3 FOR TABLE tab_3"); From 6d12d5a433c9c8cbf92fc19afd2a3465f275564c Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Sun, 29 Jun 2025 17:02:35 +0200 Subject: [PATCH 168/602] pg_recvlogical: Rename --two-phase and --failover options. This commit renames the pg_recvlogical options --two-phase and --failover to --enable-two-phase and --enable-failover, respectively. The new names distinguish these enabling options from action options like --start and --create-slot, while clearly indicating their purpose to enable specific logical slot features. The option --failover is new in PostgreSQL 18 (commit cf2655a9029), so no compatibility break there. The option --two-phase has existed since PostgreSQL 15 (commit cda03cfed6b), so for compatibility we keep the old option name --two-phase around as deprecated. Also note that pg_createsubscriber has acquired an --enable-two-phase option, so this increases consistency across tools. Co-authored-by: Masahiko Sawada Discussion: https://postgr.es/m/a28f66df-1354-4709-8d63-932ded4cac35@eisentraut.org --- doc/src/sgml/logicaldecoding.sgml | 2 +- doc/src/sgml/ref/pg_recvlogical.sgml | 9 +++++---- src/bin/pg_basebackup/pg_recvlogical.c | 20 ++++++++++--------- src/bin/pg_basebackup/t/030_pg_recvlogical.pl | 4 ++-- 4 files changed, 19 insertions(+), 16 deletions(-) diff --git a/doc/src/sgml/logicaldecoding.sgml b/doc/src/sgml/logicaldecoding.sgml index 5c5957e0d37a1..fc288d691b9f6 100644 --- a/doc/src/sgml/logicaldecoding.sgml +++ b/doc/src/sgml/logicaldecoding.sgml @@ -169,7 +169,7 @@ COMMIT 693 $ pg_recvlogical -d postgres --slot=test --drop-slot Example 2: -$ pg_recvlogical -d postgres --slot=test --create-slot --two-phase +$ pg_recvlogical -d postgres --slot=test --create-slot --enable-two-phase $ pg_recvlogical -d postgres --slot=test --start -f - ControlZ $ psql -d postgres -c "BEGIN;INSERT INTO data(data) VALUES('5');PREPARE TRANSACTION 'test';" diff --git a/doc/src/sgml/ref/pg_recvlogical.sgml b/doc/src/sgml/ref/pg_recvlogical.sgml index 63a45c7018a45..f68182266a9fa 100644 --- a/doc/src/sgml/ref/pg_recvlogical.sgml +++ b/doc/src/sgml/ref/pg_recvlogical.sgml @@ -79,8 +79,8 @@ PostgreSQL documentation - The and options - can be specified with . + The and + options can be specified with . @@ -166,7 +166,7 @@ PostgreSQL documentation - + Enables the slot to be synchronized to the standbys. This option may @@ -300,7 +300,8 @@ PostgreSQL documentation - + + (deprecated) Enables decoding of prepared transactions. This option may only be specified with diff --git a/src/bin/pg_basebackup/pg_recvlogical.c b/src/bin/pg_basebackup/pg_recvlogical.c index 4b4b545917d7d..fb7a6a1d05d8d 100644 --- a/src/bin/pg_basebackup/pg_recvlogical.c +++ b/src/bin/pg_basebackup/pg_recvlogical.c @@ -41,8 +41,8 @@ typedef enum /* Global Options */ static char *outfile = NULL; static int verbose = 0; -static bool two_phase = false; -static bool failover = false; +static bool two_phase = false; /* enable-two-phase option */ +static bool failover = false; /* enable-failover option */ static int noloop = 0; static int standby_message_timeout = 10 * 1000; /* 10 sec = default */ static int fsync_interval = 10 * 1000; /* 10 sec = default */ @@ -89,9 +89,9 @@ usage(void) printf(_(" --drop-slot drop the replication slot (for the slot's name see --slot)\n")); printf(_(" --start start streaming in a replication slot (for the slot's name see --slot)\n")); printf(_("\nOptions:\n")); - printf(_(" -E, --endpos=LSN exit after receiving the specified LSN\n")); - printf(_(" --failover enable replication slot synchronization to standby servers when\n" + printf(_(" --enable-failover enable replication slot synchronization to standby servers when\n" " creating a replication slot\n")); + printf(_(" -E, --endpos=LSN exit after receiving the specified LSN\n")); printf(_(" -f, --file=FILE receive log into this file, - for stdout\n")); printf(_(" -F --fsync-interval=SECS\n" " time between fsyncs to the output file (default: %d)\n"), (fsync_interval / 1000)); @@ -105,7 +105,8 @@ usage(void) printf(_(" -s, --status-interval=SECS\n" " time between status packets sent to server (default: %d)\n"), (standby_message_timeout / 1000)); printf(_(" -S, --slot=SLOTNAME name of the logical replication slot\n")); - printf(_(" -t, --two-phase enable decoding of prepared transactions when creating a slot\n")); + printf(_(" -t, --enable-two-phase enable decoding of prepared transactions when creating a slot\n")); + printf(_(" --two-phase (same as --enable-two-phase, deprecated)\n")); printf(_(" -v, --verbose output verbose messages\n")); printf(_(" -V, --version output version information, then exit\n")); printf(_(" -?, --help show this help, then exit\n")); @@ -698,9 +699,10 @@ main(int argc, char **argv) {"file", required_argument, NULL, 'f'}, {"fsync-interval", required_argument, NULL, 'F'}, {"no-loop", no_argument, NULL, 'n'}, - {"failover", no_argument, NULL, 5}, + {"enable-failover", no_argument, NULL, 5}, + {"enable-two-phase", no_argument, NULL, 't'}, + {"two-phase", no_argument, NULL, 't'}, /* deprecated */ {"verbose", no_argument, NULL, 'v'}, - {"two-phase", no_argument, NULL, 't'}, {"version", no_argument, NULL, 'V'}, {"help", no_argument, NULL, '?'}, /* connection options */ @@ -928,14 +930,14 @@ main(int argc, char **argv) { if (two_phase) { - pg_log_error("--two-phase may only be specified with --create-slot"); + pg_log_error("%s may only be specified with --create-slot", "--enable-two-phase"); pg_log_error_hint("Try \"%s --help\" for more information.", progname); exit(1); } if (failover) { - pg_log_error("--failover may only be specified with --create-slot"); + pg_log_error("%s may only be specified with --create-slot", "--enable-failover"); pg_log_error_hint("Try \"%s --help\" for more information.", progname); exit(1); } diff --git a/src/bin/pg_basebackup/t/030_pg_recvlogical.pl b/src/bin/pg_basebackup/t/030_pg_recvlogical.pl index c82e78847b382..5f46357e72ac7 100644 --- a/src/bin/pg_basebackup/t/030_pg_recvlogical.pl +++ b/src/bin/pg_basebackup/t/030_pg_recvlogical.pl @@ -110,7 +110,7 @@ '--dbname' => $node->connstr('postgres'), '--start', '--endpos' => $nextlsn, - '--two-phase', '--no-loop', + '--enable-two-phase', '--no-loop', '--file' => '-', ], 'incorrect usage'); @@ -142,7 +142,7 @@ '--slot' => 'test', '--dbname' => $node->connstr('postgres'), '--create-slot', - '--failover', + '--enable-failover', ], 'slot with failover created'); From 8319e5cb5493046e65d60da3cc17ab78c91749b1 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sun, 29 Jun 2025 13:56:03 -0400 Subject: [PATCH 169/602] Obtain required table lock during cross-table constraint updates. Sometimes a table's constraint may depend on a column of another table, so that we have to update the constraint when changing the referenced column's type. We need to have lock on the constraint's table to do that. ATPostAlterTypeCleanup believed that this case was only possible for FOREIGN KEY constraints, but it's wrong at least for CHECK and EXCLUDE constraints; and in general, we'd probably need exclusive lock to alter any sort of constraint. So just remove the contype check and acquire lock for any other table. This prevents a "you don't have lock" assertion failure, though no ill effect is observed in production builds. We'll error out later anyway because we don't presently support physically altering column types within stored composite columns. But the catalog-munging is basically all there, so we may as well make that part work. Bug: #18970 Reported-by: Alexander Lakhin Diagnosed-by: jian he Author: Tom Lane Discussion: https://postgr.es/m/18970-a7d1cfe1f8d5d8d9@postgresql.org Backpatch-through: 13 --- src/backend/commands/tablecmds.c | 21 +++++++++++---------- src/test/regress/expected/alter_table.out | 7 +++++++ src/test/regress/sql/alter_table.sql | 9 +++++++++ 3 files changed, 27 insertions(+), 10 deletions(-) diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 991bc946ffc44..b8837f26cb4fd 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -15415,9 +15415,12 @@ ATPostAlterTypeCleanup(List **wqueue, AlteredTableInfo *tab, LOCKMODE lockmode) /* * Re-parse the index and constraint definitions, and attach them to the * appropriate work queue entries. We do this before dropping because in - * the case of a FOREIGN KEY constraint, we might not yet have exclusive - * lock on the table the constraint is attached to, and we need to get - * that before reparsing/dropping. + * the case of a constraint on another table, we might not yet have + * exclusive lock on the table the constraint is attached to, and we need + * to get that before reparsing/dropping. (That's possible at least for + * FOREIGN KEY, CHECK, and EXCLUSION constraints; in non-FK cases it + * requires a dependency on the target table's composite type in the other + * table's constraint expressions.) * * We can't rely on the output of deparsing to tell us which relation to * operate on, because concurrent activity might have made the name @@ -15433,7 +15436,6 @@ ATPostAlterTypeCleanup(List **wqueue, AlteredTableInfo *tab, LOCKMODE lockmode) Form_pg_constraint con; Oid relid; Oid confrelid; - char contype; bool conislocal; tup = SearchSysCache1(CONSTROID, ObjectIdGetDatum(oldId)); @@ -15450,7 +15452,6 @@ ATPostAlterTypeCleanup(List **wqueue, AlteredTableInfo *tab, LOCKMODE lockmode) elog(ERROR, "could not identify relation associated with constraint %u", oldId); } confrelid = con->confrelid; - contype = con->contype; conislocal = con->conislocal; ReleaseSysCache(tup); @@ -15468,12 +15469,12 @@ ATPostAlterTypeCleanup(List **wqueue, AlteredTableInfo *tab, LOCKMODE lockmode) continue; /* - * When rebuilding an FK constraint that references the table we're - * modifying, we might not yet have any lock on the FK's table, so get - * one now. We'll need AccessExclusiveLock for the DROP CONSTRAINT - * step, so there's no value in asking for anything weaker. + * When rebuilding another table's constraint that references the + * table we're modifying, we might not yet have any lock on the other + * table, so get one now. We'll need AccessExclusiveLock for the DROP + * CONSTRAINT step, so there's no value in asking for anything weaker. */ - if (relid != tab->relid && contype == CONSTRAINT_FOREIGN) + if (relid != tab->relid) LockRelationOid(relid, AccessExclusiveLock); ATPostAlterTypeParse(oldId, relid, confrelid, diff --git a/src/test/regress/expected/alter_table.out b/src/test/regress/expected/alter_table.out index 476266e3f4b03..750efc042d8ee 100644 --- a/src/test/regress/expected/alter_table.out +++ b/src/test/regress/expected/alter_table.out @@ -4745,6 +4745,13 @@ alter table attbl alter column p1 set data type bigint; alter table atref alter column c1 set data type bigint; drop table attbl, atref; /* End test case for bug #17409 */ +/* Test case for bug #18970 */ +create table attbl(a int); +create table atref(b attbl check ((b).a is not null)); +alter table attbl alter column a type numeric; -- someday this should work +ERROR: cannot alter table "attbl" because column "atref.b" uses its row type +drop table attbl, atref; +/* End test case for bug #18970 */ -- Test that ALTER TABLE rewrite preserves a clustered index -- for normal indexes and indexes on constraints. create table alttype_cluster (a int); diff --git a/src/test/regress/sql/alter_table.sql b/src/test/regress/sql/alter_table.sql index 5ce9d1e429f81..41cff198e183c 100644 --- a/src/test/regress/sql/alter_table.sql +++ b/src/test/regress/sql/alter_table.sql @@ -3069,6 +3069,15 @@ drop table attbl, atref; /* End test case for bug #17409 */ +/* Test case for bug #18970 */ + +create table attbl(a int); +create table atref(b attbl check ((b).a is not null)); +alter table attbl alter column a type numeric; -- someday this should work +drop table attbl, atref; + +/* End test case for bug #18970 */ + -- Test that ALTER TABLE rewrite preserves a clustered index -- for normal indexes and indexes on constraints. create table alttype_cluster (a int); From 66e9df9f6ef50719b25ca63b60aad934e14f4a1c Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sun, 29 Jun 2025 15:04:32 -0400 Subject: [PATCH 170/602] Fix some new issues with planning of PlaceHolderVars. In the wake of commit a16ef313f, we need to deal with more cases involving PlaceHolderVars in NestLoopParams than we did before. For one thing, a16ef313f was incorrect to suppose that we could rely on the required-outer relids of the lefthand path to decide placement of nestloop-parameter PHVs. As Richard Guo argued at the time, we must look at the required-outer relids of the join path itself. For another, we have to apply replace_nestloop_params() to such a PHV's expression, in case it contains references to values that will be supplied from NestLoopParams of higher-level nestloops. For another, we need to be more careful about the phnullingrels of the PHV than we were being. identify_current_nestloop_params only bothered to ensure that the phnullingrels didn't contain "too many" relids, but now it has to be exact, because setrefs.c will apply both NRM_SUBSET and NRM_SUPERSET checks in different places. We can compute the correct relids by determining the set of outer joins that should be able to null the PHV and then subtracting whatever's been applied at or below this join. Do the same for plain Vars, too. (This should make it possible to use NRM_EQUAL to process nestloop params in setrefs.c, but I won't risk making such a change in v18 now.) Lastly, if a nestloop parameter PHV was pulled up out of a subquery and it contains a subquery that was originally pushed down from this query level, then that will still be represented as a SubLink, because SS_process_sublinks won't recurse into outer PHVs, so it didn't get transformed during expression preprocessing in the subquery. We can substitute the version of the PHV's expression appearing in its PlaceHolderInfo to ensure that that preprocessing has happened. (Seems like this processing sequence could stand to be redesigned, but again, late in v18 development is not the time for that.) It's not very clear to me why the old have_dangerous_phv join-order restriction prevented us from seeing the last three of these problems. But given the lack of field complaints, it must have done so. Reported-by: Alexander Lakhin Author: Tom Lane Discussion: https://postgr.es/m/18953-1c9883a9d4afeb30@postgresql.org --- src/backend/optimizer/plan/createplan.c | 34 ++++- src/backend/optimizer/util/paramassign.c | 84 +++++++----- src/backend/optimizer/util/placeholder.c | 40 ++++++ src/include/optimizer/paramassign.h | 3 +- src/include/optimizer/placeholder.h | 2 + src/test/regress/expected/join.out | 158 +++++++++++++++++++++++ src/test/regress/sql/join.sql | 46 +++++++ 7 files changed, 330 insertions(+), 37 deletions(-) diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 8baf36ba4b791..0b61aef962c6d 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -4344,6 +4344,7 @@ create_nestloop_plan(PlannerInfo *root, NestLoop *join_plan; Plan *outer_plan; Plan *inner_plan; + Relids outerrelids; List *tlist = build_path_tlist(root, &best_path->jpath.path); List *joinrestrictclauses = best_path->jpath.joinrestrictinfo; List *joinclauses; @@ -4374,8 +4375,8 @@ create_nestloop_plan(PlannerInfo *root, outer_plan = create_plan_recurse(root, best_path->jpath.outerjoinpath, 0); /* For a nestloop, include outer relids in curOuterRels for inner side */ - root->curOuterRels = bms_union(root->curOuterRels, - best_path->jpath.outerjoinpath->parent->relids); + outerrelids = best_path->jpath.outerjoinpath->parent->relids; + root->curOuterRels = bms_union(root->curOuterRels, outerrelids); inner_plan = create_plan_recurse(root, best_path->jpath.innerjoinpath, 0); @@ -4415,7 +4416,8 @@ create_nestloop_plan(PlannerInfo *root, * node, and remove them from root->curOuterParams. */ nestParams = identify_current_nestloop_params(root, - best_path->jpath.outerjoinpath); + outerrelids, + PATH_REQ_OUTER((Path *) best_path)); /* * While nestloop parameters that are Vars had better be available from @@ -4423,32 +4425,50 @@ create_nestloop_plan(PlannerInfo *root, * that are PHVs won't be. In such cases we must add them to the * outer_plan's tlist, since the executor's NestLoopParam machinery * requires the params to be simple outer-Var references to that tlist. + * (This is cheating a little bit, because the outer path's required-outer + * relids might not be enough to allow evaluating such a PHV. But in + * practice, if we could have evaluated the PHV at the nestloop node, we + * can do so in the outer plan too.) */ outer_tlist = outer_plan->targetlist; outer_parallel_safe = outer_plan->parallel_safe; foreach(lc, nestParams) { NestLoopParam *nlp = (NestLoopParam *) lfirst(lc); + PlaceHolderVar *phv; TargetEntry *tle; if (IsA(nlp->paramval, Var)) continue; /* nothing to do for simple Vars */ - if (tlist_member((Expr *) nlp->paramval, outer_tlist)) + /* Otherwise it must be a PHV */ + phv = castNode(PlaceHolderVar, nlp->paramval); + + if (tlist_member((Expr *) phv, outer_tlist)) continue; /* already available */ + /* + * It's possible that nestloop parameter PHVs selected to evaluate + * here contain references to surviving root->curOuterParams items + * (that is, they reference values that will be supplied by some + * higher-level nestloop). Those need to be converted to Params now. + * Note: it's safe to do this after the tlist_member() check, because + * equal() won't pay attention to phv->phexpr. + */ + phv->phexpr = (Expr *) replace_nestloop_params(root, + (Node *) phv->phexpr); + /* Make a shallow copy of outer_tlist, if we didn't already */ if (outer_tlist == outer_plan->targetlist) outer_tlist = list_copy(outer_tlist); /* ... and add the needed expression */ - tle = makeTargetEntry((Expr *) copyObject(nlp->paramval), + tle = makeTargetEntry((Expr *) copyObject(phv), list_length(outer_tlist) + 1, NULL, true); outer_tlist = lappend(outer_tlist, tle); /* ... and track whether tlist is (still) parallel-safe */ if (outer_parallel_safe) - outer_parallel_safe = is_parallel_safe(root, - (Node *) nlp->paramval); + outer_parallel_safe = is_parallel_safe(root, (Node *) phv); } if (outer_tlist != outer_plan->targetlist) outer_plan = change_plan_targetlist(outer_plan, outer_tlist, diff --git a/src/backend/optimizer/util/paramassign.c b/src/backend/optimizer/util/paramassign.c index 9836abf947995..4c13c5931b4c9 100644 --- a/src/backend/optimizer/util/paramassign.c +++ b/src/backend/optimizer/util/paramassign.c @@ -599,38 +599,31 @@ process_subquery_nestloop_params(PlannerInfo *root, List *subplan_params) } /* - * Identify any NestLoopParams that should be supplied by a NestLoop plan - * node with the specified lefthand input path. Remove them from the active - * root->curOuterParams list and return them as the result list. + * Identify any NestLoopParams that should be supplied by a NestLoop + * plan node with the specified lefthand rels and required-outer rels. + * Remove them from the active root->curOuterParams list and return + * them as the result list. * - * XXX Here we also hack up the returned Vars and PHVs so that they do not - * contain nullingrel sets exceeding what is available from the outer side. - * This is needed if we have applied outer join identity 3, - * (A leftjoin B on (Pab)) leftjoin C on (Pb*c) - * = A leftjoin (B leftjoin C on (Pbc)) on (Pab) - * and C contains lateral references to B. It's still safe to apply the - * identity, but the parser will have created those references in the form - * "b*" (i.e., with varnullingrels listing the A/B join), while what we will - * have available from the nestloop's outer side is just "b". We deal with - * that here by stripping the nullingrels down to what is available from the - * outer side according to leftrelids. - * - * That fixes matters for the case of forward application of identity 3. - * If the identity was applied in the reverse direction, we will have - * parameter Vars containing too few nullingrel bits rather than too many. - * Currently, that causes no problems because setrefs.c applies only a - * subset check to nullingrels in NestLoopParams, but we'd have to work - * harder if we ever want to tighten that check. This is all pretty annoying - * because it greatly weakens setrefs.c's cross-check, but the alternative + * Vars and PHVs appearing in the result list must have nullingrel sets + * that could validly appear in the lefthand rel's output. Ordinarily that + * would be true already, but if we have applied outer join identity 3, + * there could be more or fewer nullingrel bits in the nodes appearing in + * curOuterParams than are in the nominal leftrelids. We deal with that by + * forcing their nullingrel sets to include exactly the outer-join relids + * that appear in leftrelids and can null the respective Var or PHV. + * This fix is a bit ad-hoc and intellectually unsatisfactory, because it's + * essentially jumping to the conclusion that we've placed evaluation of + * the nestloop parameters correctly, and thus it defeats the intent of the + * subsequent nullingrel cross-checks in setrefs.c. But the alternative * seems to be to generate multiple versions of each laterally-parameterized * subquery, which'd be unduly expensive. */ List * -identify_current_nestloop_params(PlannerInfo *root, Path *leftpath) +identify_current_nestloop_params(PlannerInfo *root, + Relids leftrelids, + Relids outerrelids) { List *result; - Relids leftrelids = leftpath->parent->relids; - Relids outerrelids = PATH_REQ_OUTER(leftpath); Relids allleftrelids; ListCell *cell; @@ -661,25 +654,58 @@ identify_current_nestloop_params(PlannerInfo *root, Path *leftpath) bms_is_member(nlp->paramval->varno, leftrelids)) { Var *var = (Var *) nlp->paramval; + RelOptInfo *rel = root->simple_rel_array[var->varno]; root->curOuterParams = foreach_delete_current(root->curOuterParams, cell); - var->varnullingrels = bms_intersect(var->varnullingrels, + var->varnullingrels = bms_intersect(rel->nulling_relids, leftrelids); result = lappend(result, nlp); } else if (IsA(nlp->paramval, PlaceHolderVar)) { PlaceHolderVar *phv = (PlaceHolderVar *) nlp->paramval; - Relids eval_at = find_placeholder_info(root, phv)->ph_eval_at; + PlaceHolderInfo *phinfo = find_placeholder_info(root, phv); + Relids eval_at = phinfo->ph_eval_at; if (bms_is_subset(eval_at, allleftrelids) && bms_overlap(eval_at, leftrelids)) { root->curOuterParams = foreach_delete_current(root->curOuterParams, cell); - phv->phnullingrels = bms_intersect(phv->phnullingrels, - leftrelids); + + /* + * Deal with an edge case: if the PHV was pulled up out of a + * subquery and it contains a subquery that was originally + * pushed down from this query level, then that will still be + * represented as a SubLink, because SS_process_sublinks won't + * recurse into outer PHVs, so it didn't get transformed + * during expression preprocessing in the subquery. We need a + * version of the PHV that has a SubPlan, which we can get + * from the current query level's placeholder_list. This is + * quite grotty of course, but dealing with it earlier in the + * handling of subplan params would be just as grotty, and it + * might end up being a waste of cycles if we don't decide to + * treat the PHV as a NestLoopParam. (Perhaps that whole + * mechanism should be redesigned someday, but today is not + * that day.) + */ + if (root->parse->hasSubLinks) + { + phv = copyObject(phinfo->ph_var); + + /* + * The ph_var will have empty nullingrels, but that + * doesn't matter since we're about to overwrite + * phv->phnullingrels. Other fields should be OK already. + */ + nlp->paramval = (Var *) phv; + } + + phv->phnullingrels = + bms_intersect(get_placeholder_nulling_relids(root, phinfo), + leftrelids); + result = lappend(result, nlp); } } diff --git a/src/backend/optimizer/util/placeholder.c b/src/backend/optimizer/util/placeholder.c index 41a4c81e94a75..e1cd00a72fbf7 100644 --- a/src/backend/optimizer/util/placeholder.c +++ b/src/backend/optimizer/util/placeholder.c @@ -545,3 +545,43 @@ contain_placeholder_references_walker(Node *node, return expression_tree_walker(node, contain_placeholder_references_walker, context); } + +/* + * Compute the set of outer-join relids that can null a placeholder. + * + * This is analogous to RelOptInfo.nulling_relids for Vars, but we compute it + * on-the-fly rather than saving it somewhere. Currently the value is needed + * at most once per query, so there's little value in doing otherwise. If it + * ever gains more widespread use, perhaps we should cache the result in + * PlaceHolderInfo. + */ +Relids +get_placeholder_nulling_relids(PlannerInfo *root, PlaceHolderInfo *phinfo) +{ + Relids result = NULL; + int relid = -1; + + /* + * Form the union of all potential nulling OJs for each baserel included + * in ph_eval_at. + */ + while ((relid = bms_next_member(phinfo->ph_eval_at, relid)) > 0) + { + RelOptInfo *rel = root->simple_rel_array[relid]; + + /* ignore the RTE_GROUP RTE */ + if (relid == root->group_rtindex) + continue; + + if (rel == NULL) /* must be an outer join */ + { + Assert(bms_is_member(relid, root->outer_join_rels)); + continue; + } + result = bms_add_members(result, rel->nulling_relids); + } + + /* Now remove any OJs already included in ph_eval_at, and we're done. */ + result = bms_del_members(result, phinfo->ph_eval_at); + return result; +} diff --git a/src/include/optimizer/paramassign.h b/src/include/optimizer/paramassign.h index d30d20de29922..bbf7214289bd5 100644 --- a/src/include/optimizer/paramassign.h +++ b/src/include/optimizer/paramassign.h @@ -30,7 +30,8 @@ extern Param *replace_nestloop_param_placeholdervar(PlannerInfo *root, extern void process_subquery_nestloop_params(PlannerInfo *root, List *subplan_params); extern List *identify_current_nestloop_params(PlannerInfo *root, - Path *leftpath); + Relids leftrelids, + Relids outerrelids); extern Param *generate_new_exec_param(PlannerInfo *root, Oid paramtype, int32 paramtypmod, Oid paramcollation); extern int assign_special_exec_param(PlannerInfo *root); diff --git a/src/include/optimizer/placeholder.h b/src/include/optimizer/placeholder.h index d351045e2e056..db92d8861babe 100644 --- a/src/include/optimizer/placeholder.h +++ b/src/include/optimizer/placeholder.h @@ -30,5 +30,7 @@ extern void add_placeholders_to_joinrel(PlannerInfo *root, RelOptInfo *joinrel, SpecialJoinInfo *sjinfo); extern bool contain_placeholder_references_to(PlannerInfo *root, Node *clause, int relid); +extern Relids get_placeholder_nulling_relids(PlannerInfo *root, + PlaceHolderInfo *phinfo); #endif /* PLACEHOLDER_H */ diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index c292f04fdbaab..390aabfb34b9a 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -4119,6 +4119,164 @@ select * from int8_tbl t1 (16 rows) rollback; +-- ... not that the initial replacement didn't have some bugs too +begin; +create temp table t(i int primary key); +explain (verbose, costs off) +select * from t t1 + left join (select 1 as x, * from t t2(i2)) t2ss on t1.i = t2ss.i2 + left join t t3(i3) on false + left join t t4(i4) on t4.i4 > t2ss.x; + QUERY PLAN +---------------------------------------------------------- + Nested Loop Left Join + Output: t1.i, (1), t2.i2, i3, t4.i4 + -> Nested Loop Left Join + Output: t1.i, t2.i2, (1), i3 + Join Filter: false + -> Hash Left Join + Output: t1.i, t2.i2, (1) + Inner Unique: true + Hash Cond: (t1.i = t2.i2) + -> Seq Scan on pg_temp.t t1 + Output: t1.i + -> Hash + Output: t2.i2, (1) + -> Seq Scan on pg_temp.t t2 + Output: t2.i2, 1 + -> Result + Output: i3 + One-Time Filter: false + -> Memoize + Output: t4.i4 + Cache Key: (1) + Cache Mode: binary + -> Index Only Scan using t_pkey on pg_temp.t t4 + Output: t4.i4 + Index Cond: (t4.i4 > (1)) +(25 rows) + +explain (verbose, costs off) +select * from + (select k from + (select i, coalesce(i, j) as k from + (select i from t union all select 0) + join (select 1 as j limit 1) on i = j) + right join (select 2 as x) on true + join (select 3 as y) on i is not null + ), + lateral (select k as kl limit 1); + QUERY PLAN +------------------------------------------------------------------- + Nested Loop + Output: COALESCE(t.i, (1)), ((COALESCE(t.i, (1)))) + -> Limit + Output: 1 + -> Result + Output: 1 + -> Nested Loop + Output: t.i, ((COALESCE(t.i, (1)))) + -> Result + Output: t.i, COALESCE(t.i, (1)) + -> Append + -> Index Only Scan using t_pkey on pg_temp.t + Output: t.i + Index Cond: (t.i = (1)) + -> Result + Output: 0 + One-Time Filter: ((1) = 0) + -> Limit + Output: ((COALESCE(t.i, (1)))) + -> Result + Output: (COALESCE(t.i, (1))) +(21 rows) + +rollback; +-- PHVs containing SubLinks are quite tricky to get right +explain (verbose, costs off) +select * +from int8_tbl i8 + inner join + (select (select true) as x + from int4_tbl i4, lateral (select i4.f1 as y limit 1) ss1 + where i4.f1 = 0) ss2 on true + right join (select false as z) ss3 on true, + lateral (select i8.q2 as q2l where x limit 1) ss4 +where i8.q2 = 123; + QUERY PLAN +---------------------------------------------------------------- + Nested Loop + Output: i8.q1, i8.q2, (InitPlan 1).col1, false, (i8.q2) + InitPlan 1 + -> Result + Output: true + InitPlan 2 + -> Result + Output: true + -> Seq Scan on public.int4_tbl i4 + Output: i4.f1 + Filter: (i4.f1 = 0) + -> Nested Loop + Output: i8.q1, i8.q2, (i8.q2) + -> Subquery Scan on ss1 + Output: ss1.y, (InitPlan 1).col1 + -> Limit + Output: NULL::integer + -> Result + Output: NULL::integer + -> Nested Loop + Output: i8.q1, i8.q2, (i8.q2) + -> Seq Scan on public.int8_tbl i8 + Output: i8.q1, i8.q2 + Filter: (i8.q2 = 123) + -> Limit + Output: (i8.q2) + -> Result + Output: i8.q2 + One-Time Filter: ((InitPlan 1).col1) +(29 rows) + +explain (verbose, costs off) +select * +from int8_tbl i8 + inner join + (select (select true) as x + from int4_tbl i4, lateral (select 1 as y limit 1) ss1 + where i4.f1 = 0) ss2 on true + right join (select false as z) ss3 on true, + lateral (select i8.q2 as q2l where x limit 1) ss4 +where i8.q2 = 123; + QUERY PLAN +---------------------------------------------------------------- + Nested Loop + Output: i8.q1, i8.q2, (InitPlan 1).col1, false, (i8.q2) + InitPlan 1 + -> Result + Output: true + InitPlan 2 + -> Result + Output: true + -> Limit + Output: NULL::integer + -> Result + Output: NULL::integer + -> Nested Loop + Output: i8.q1, i8.q2, (i8.q2) + -> Seq Scan on public.int4_tbl i4 + Output: i4.f1, (InitPlan 1).col1 + Filter: (i4.f1 = 0) + -> Nested Loop + Output: i8.q1, i8.q2, (i8.q2) + -> Seq Scan on public.int8_tbl i8 + Output: i8.q1, i8.q2 + Filter: (i8.q2 = 123) + -> Limit + Output: (i8.q2) + -> Result + Output: i8.q2 + One-Time Filter: ((InitPlan 1).col1) +(27 rows) + -- Test proper handling of appendrel PHVs during useless-RTE removal explain (costs off) select * from diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index 88d2204e4471d..f6e7070db656b 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -1361,6 +1361,52 @@ select * from int8_tbl t1 on true; rollback; +-- ... not that the initial replacement didn't have some bugs too +begin; +create temp table t(i int primary key); + +explain (verbose, costs off) +select * from t t1 + left join (select 1 as x, * from t t2(i2)) t2ss on t1.i = t2ss.i2 + left join t t3(i3) on false + left join t t4(i4) on t4.i4 > t2ss.x; + +explain (verbose, costs off) +select * from + (select k from + (select i, coalesce(i, j) as k from + (select i from t union all select 0) + join (select 1 as j limit 1) on i = j) + right join (select 2 as x) on true + join (select 3 as y) on i is not null + ), + lateral (select k as kl limit 1); + +rollback; + +-- PHVs containing SubLinks are quite tricky to get right +explain (verbose, costs off) +select * +from int8_tbl i8 + inner join + (select (select true) as x + from int4_tbl i4, lateral (select i4.f1 as y limit 1) ss1 + where i4.f1 = 0) ss2 on true + right join (select false as z) ss3 on true, + lateral (select i8.q2 as q2l where x limit 1) ss4 +where i8.q2 = 123; + +explain (verbose, costs off) +select * +from int8_tbl i8 + inner join + (select (select true) as x + from int4_tbl i4, lateral (select 1 as y limit 1) ss1 + where i4.f1 = 0) ss2 on true + right join (select false as z) ss3 on true, + lateral (select i8.q2 as q2l where x limit 1) ss4 +where i8.q2 = 123; + -- Test proper handling of appendrel PHVs during useless-RTE removal explain (costs off) select * from From 0ebd24255581837f9a5b189ef15147b769df116b Mon Sep 17 00:00:00 2001 From: Joe Conway Date: Sun, 29 Jun 2025 21:14:21 -0400 Subject: [PATCH 171/602] Run pgperltidy This is required before the creation of a new branch. pgindent is clean, as well as is reformat-dat-files. perltidy version is v20230309, as documented in pgindent's README. --- contrib/amcheck/t/006_verify_gin.pl | 97 +++++++------------ src/bin/initdb/t/001_initdb.pl | 3 +- src/bin/pg_basebackup/t/030_pg_recvlogical.pl | 3 +- src/bin/pg_combinebackup/t/010_hardlink.pl | 92 +++++++++--------- src/bin/pg_dump/t/001_basic.pl | 9 +- src/bin/pg_dump/t/002_pg_dump.pl | 5 +- src/bin/pg_dump/t/006_pg_dumpall.pl | 53 +++++----- src/bin/pg_rewind/t/RewindTest.pm | 2 +- src/bin/pg_upgrade/t/004_subscription.pl | 6 +- src/bin/pg_upgrade/t/006_transfer_modes.pl | 28 ++++-- src/bin/scripts/t/100_vacuumdb.pl | 79 +++++++++++---- src/include/catalog/pg_collation.dat | 3 +- src/include/catalog/pg_proc.dat | 93 ++++++++---------- src/test/authentication/t/001_password.pl | 23 ++--- .../libpq_pipeline/t/001_libpq_pipeline.pl | 6 +- src/test/modules/test_aio/t/001_aio.pl | 46 +++++---- .../postmaster/t/002_connection_limits.pl | 3 +- .../t/040_standby_failover_slots_sync.pl | 3 +- .../recovery/t/048_vacuum_horizon_floor.pl | 54 ++++++----- src/test/ssl/t/SSL/Server.pm | 3 +- src/test/subscription/t/007_ddl.pl | 35 ++++--- src/test/subscription/t/013_partition.pl | 3 +- src/test/subscription/t/024_add_drop_pub.pl | 14 +-- src/test/subscription/t/035_conflicts.pl | 3 +- 24 files changed, 369 insertions(+), 297 deletions(-) diff --git a/contrib/amcheck/t/006_verify_gin.pl b/contrib/amcheck/t/006_verify_gin.pl index e540cd6606adf..5be0bee32183f 100644 --- a/contrib/amcheck/t/006_verify_gin.pl +++ b/contrib/amcheck/t/006_verify_gin.pl @@ -54,20 +54,17 @@ sub invalid_entry_order_leaf_page_test $node->stop; - my $blkno = 1; # root + my $blkno = 1; # root # produce wrong order by replacing aaaaa with ccccc - string_replace_block( - $relpath, - 'aaaaa', - 'ccccc', - $blkno - ); + string_replace_block($relpath, 'aaaaa', 'ccccc', $blkno); $node->start; - my ($result, $stdout, $stderr) = $node->psql('postgres', qq(SELECT gin_index_check('$indexname'))); - my $expected = "index \"$indexname\" has wrong tuple order on entry tree page, block 1, offset 2, rightlink 4294967295"; + my ($result, $stdout, $stderr) = + $node->psql('postgres', qq(SELECT gin_index_check('$indexname'))); + my $expected = + "index \"$indexname\" has wrong tuple order on entry tree page, block 1, offset 2, rightlink 4294967295"; like($stderr, qr/$expected/); } @@ -96,20 +93,17 @@ sub invalid_entry_order_inner_page_test $node->stop; - my $blkno = 1; # root + my $blkno = 1; # root # we have rrrrrrrrr... and tttttttttt... as keys in the root, so produce wrong order by replacing rrrrrrrrrr.... - string_replace_block( - $relpath, - 'rrrrrrrrrr', - 'zzzzzzzzzz', - $blkno - ); + string_replace_block($relpath, 'rrrrrrrrrr', 'zzzzzzzzzz', $blkno); $node->start; - my ($result, $stdout, $stderr) = $node->psql('postgres', qq(SELECT gin_index_check('$indexname'))); - my $expected = "index \"$indexname\" has wrong tuple order on entry tree page, block 1, offset 2, rightlink 4294967295"; + my ($result, $stdout, $stderr) = + $node->psql('postgres', qq(SELECT gin_index_check('$indexname'))); + my $expected = + "index \"$indexname\" has wrong tuple order on entry tree page, block 1, offset 2, rightlink 4294967295"; like($stderr, qr/$expected/); } @@ -129,7 +123,7 @@ sub invalid_entry_columns_order_test $node->stop; - my $blkno = 1; # root + my $blkno = 1; # root # mess column numbers # root items order before: (1,aaa), (2,bbb) @@ -139,26 +133,18 @@ sub invalid_entry_columns_order_test my $find = qr/($attrno_1)(.)(aaa)/s; my $replace = $attrno_2 . '$2$3'; - string_replace_block( - $relpath, - $find, - $replace, - $blkno - ); + string_replace_block($relpath, $find, $replace, $blkno); $find = qr/($attrno_2)(.)(bbb)/s; $replace = $attrno_1 . '$2$3'; - string_replace_block( - $relpath, - $find, - $replace, - $blkno - ); + string_replace_block($relpath, $find, $replace, $blkno); $node->start; - my ($result, $stdout, $stderr) = $node->psql('postgres', qq(SELECT gin_index_check('$indexname'))); - my $expected = "index \"$indexname\" has wrong tuple order on entry tree page, block 1, offset 2, rightlink 4294967295"; + my ($result, $stdout, $stderr) = + $node->psql('postgres', qq(SELECT gin_index_check('$indexname'))); + my $expected = + "index \"$indexname\" has wrong tuple order on entry tree page, block 1, offset 2, rightlink 4294967295"; like($stderr, qr/$expected/); } @@ -183,20 +169,17 @@ sub inconsistent_with_parent_key__parent_key_corrupted_test $node->stop; - my $blkno = 1; # root + my $blkno = 1; # root # we have nnnnnnnnnn... as parent key in the root, so replace it with something smaller then child's keys - string_replace_block( - $relpath, - 'nnnnnnnnnn', - 'aaaaaaaaaa', - $blkno - ); + string_replace_block($relpath, 'nnnnnnnnnn', 'aaaaaaaaaa', $blkno); $node->start; - my ($result, $stdout, $stderr) = $node->psql('postgres', qq(SELECT gin_index_check('$indexname'))); - my $expected = "index \"$indexname\" has inconsistent records on page 3 offset 3"; + my ($result, $stdout, $stderr) = + $node->psql('postgres', qq(SELECT gin_index_check('$indexname'))); + my $expected = + "index \"$indexname\" has inconsistent records on page 3 offset 3"; like($stderr, qr/$expected/); } @@ -221,20 +204,17 @@ sub inconsistent_with_parent_key__child_key_corrupted_test $node->stop; - my $blkno = 3; # leaf + my $blkno = 3; # leaf # we have nnnnnnnnnn... as parent key in the root, so replace child key with something bigger - string_replace_block( - $relpath, - 'nnnnnnnnnn', - 'pppppppppp', - $blkno - ); + string_replace_block($relpath, 'nnnnnnnnnn', 'pppppppppp', $blkno); $node->start; - my ($result, $stdout, $stderr) = $node->psql('postgres', qq(SELECT gin_index_check('$indexname'))); - my $expected = "index \"$indexname\" has inconsistent records on page 3 offset 3"; + my ($result, $stdout, $stderr) = + $node->psql('postgres', qq(SELECT gin_index_check('$indexname'))); + my $expected = + "index \"$indexname\" has inconsistent records on page 3 offset 3"; like($stderr, qr/$expected/); } @@ -254,24 +234,21 @@ sub inconsistent_with_parent_key__parent_key_corrupted_posting_tree_test $node->stop; - my $blkno = 2; # posting tree root + my $blkno = 2; # posting tree root # we have a posting tree for 'aaaaa' key with the root at 2nd block # and two leaf pages 3 and 4. replace 4th page's high key with (1,1) # so that there are tid's in leaf page that are larger then the new high key. my $find = pack('S*', 0, 4, 0) . '....'; my $replace = pack('S*', 0, 4, 0, 1, 1); - string_replace_block( - $relpath, - $find, - $replace, - $blkno - ); + string_replace_block($relpath, $find, $replace, $blkno); $node->start; - my ($result, $stdout, $stderr) = $node->psql('postgres', qq(SELECT gin_index_check('$indexname'))); - my $expected = "index \"$indexname\": tid exceeds parent's high key in postingTree leaf on block 4"; + my ($result, $stdout, $stderr) = + $node->psql('postgres', qq(SELECT gin_index_check('$indexname'))); + my $expected = + "index \"$indexname\": tid exceeds parent's high key in postingTree leaf on block 4"; like($stderr, qr/$expected/); } diff --git a/src/bin/initdb/t/001_initdb.pl b/src/bin/initdb/t/001_initdb.pl index 15dd10ce40a31..b7ef7ed8d06b7 100644 --- a/src/bin/initdb/t/001_initdb.pl +++ b/src/bin/initdb/t/001_initdb.pl @@ -76,7 +76,8 @@ 'checksums are enabled in control file'); command_ok([ 'initdb', '--sync-only', $datadir ], 'sync only'); -command_ok([ 'initdb', '--sync-only', '--no-sync-data-files', $datadir ], '--no-sync-data-files'); +command_ok([ 'initdb', '--sync-only', '--no-sync-data-files', $datadir ], + '--no-sync-data-files'); command_fails([ 'initdb', $datadir ], 'existing data directory'); if ($supports_syncfs) diff --git a/src/bin/pg_basebackup/t/030_pg_recvlogical.pl b/src/bin/pg_basebackup/t/030_pg_recvlogical.pl index 5f46357e72ac7..1b7a6f6f43fdd 100644 --- a/src/bin/pg_basebackup/t/030_pg_recvlogical.pl +++ b/src/bin/pg_basebackup/t/030_pg_recvlogical.pl @@ -147,7 +147,8 @@ 'slot with failover created'); my $result = $node->safe_psql('postgres', - "SELECT failover FROM pg_catalog.pg_replication_slots WHERE slot_name = 'test'"); + "SELECT failover FROM pg_catalog.pg_replication_slots WHERE slot_name = 'test'" +); is($result, 't', "failover is enabled for the new slot"); done_testing(); diff --git a/src/bin/pg_combinebackup/t/010_hardlink.pl b/src/bin/pg_combinebackup/t/010_hardlink.pl index a0ee419090cf6..4f92d6676bdef 100644 --- a/src/bin/pg_combinebackup/t/010_hardlink.pl +++ b/src/bin/pg_combinebackup/t/010_hardlink.pl @@ -56,7 +56,7 @@ '--pgdata' => $backup1path, '--no-sync', '--checkpoint' => 'fast', - '--wal-method' => 'none' + '--wal-method' => 'none' ], "full backup"); @@ -74,7 +74,7 @@ '--pgdata' => $backup2path, '--no-sync', '--checkpoint' => 'fast', - '--wal-method' => 'none', + '--wal-method' => 'none', '--incremental' => $backup1path . '/backup_manifest' ], "incremental backup"); @@ -112,45 +112,45 @@ # of the given data file. sub check_data_file { - my ($data_file, $last_segment_nlinks) = @_; - - my @data_file_segments = ($data_file); - - # Start checking for additional segments - my $segment_number = 1; - - while (1) - { - my $next_segment = $data_file . '.' . $segment_number; - - # If the file exists and is a regular file, add it to the list - if (-f $next_segment) - { - push @data_file_segments, $next_segment; - $segment_number++; - } - # Stop the loop if the file doesn't exist - else - { - last; - } - } - - # All segments of the given data file should contain 2 hard links, except - # for the last one, which should match the given number of links. - my $last_segment = pop @data_file_segments; - - for my $segment (@data_file_segments) - { - # Get the file's stat information of each segment - my $nlink_count = get_hard_link_count($segment); - ok($nlink_count == 2, "File '$segment' has 2 hard links"); - } - - # Get the file's stat information of the last segment - my $nlink_count = get_hard_link_count($last_segment); - ok($nlink_count == $last_segment_nlinks, - "File '$last_segment' has $last_segment_nlinks hard link(s)"); + my ($data_file, $last_segment_nlinks) = @_; + + my @data_file_segments = ($data_file); + + # Start checking for additional segments + my $segment_number = 1; + + while (1) + { + my $next_segment = $data_file . '.' . $segment_number; + + # If the file exists and is a regular file, add it to the list + if (-f $next_segment) + { + push @data_file_segments, $next_segment; + $segment_number++; + } + # Stop the loop if the file doesn't exist + else + { + last; + } + } + + # All segments of the given data file should contain 2 hard links, except + # for the last one, which should match the given number of links. + my $last_segment = pop @data_file_segments; + + for my $segment (@data_file_segments) + { + # Get the file's stat information of each segment + my $nlink_count = get_hard_link_count($segment); + ok($nlink_count == 2, "File '$segment' has 2 hard links"); + } + + # Get the file's stat information of the last segment + my $nlink_count = get_hard_link_count($last_segment); + ok($nlink_count == $last_segment_nlinks, + "File '$last_segment' has $last_segment_nlinks hard link(s)"); } @@ -159,11 +159,11 @@ sub check_data_file # that file. sub get_hard_link_count { - my ($file) = @_; + my ($file) = @_; - # Get file stats - my @stats = stat($file); - my $nlink = $stats[3]; # Number of hard links + # Get file stats + my @stats = stat($file); + my $nlink = $stats[3]; # Number of hard links - return $nlink; + return $nlink; } diff --git a/src/bin/pg_dump/t/001_basic.pl b/src/bin/pg_dump/t/001_basic.pl index 0be9f6dd538fd..c3c5fae11eaaf 100644 --- a/src/bin/pg_dump/t/001_basic.pl +++ b/src/bin/pg_dump/t/001_basic.pl @@ -240,17 +240,20 @@ command_fails_like( [ 'pg_restore', '--exclude-database=foo', '--globals-only', '-d', 'xxx' ], qr/\Qpg_restore: error: option --exclude-database cannot be used together with -g\/--globals-only\E/, - 'pg_restore: option --exclude-database cannot be used together with -g/--globals-only'); + 'pg_restore: option --exclude-database cannot be used together with -g/--globals-only' +); command_fails_like( [ 'pg_restore', '--exclude-database=foo', '-d', 'xxx', 'dumpdir' ], qr/\Qpg_restore: error: option --exclude-database can be used only when restoring an archive created by pg_dumpall\E/, - 'When option --exclude-database is used in pg_restore with dump of pg_dump'); + 'When option --exclude-database is used in pg_restore with dump of pg_dump' +); command_fails_like( [ 'pg_restore', '--globals-only', '-d', 'xxx', 'dumpdir' ], qr/\Qpg_restore: error: option -g\/--globals-only can be used only when restoring an archive created by pg_dumpall\E/, - 'When option --globals-only is not used in pg_restore with dump of pg_dump'); + 'When option --globals-only is not used in pg_restore with dump of pg_dump' +); # also fails for -r and -t, but it seems pointless to add more tests for those. command_fails_like( diff --git a/src/bin/pg_dump/t/002_pg_dump.pl b/src/bin/pg_dump/t/002_pg_dump.pl index e1cfa99874ec4..2485d8f360e5a 100644 --- a/src/bin/pg_dump/t/002_pg_dump.pl +++ b/src/bin/pg_dump/t/002_pg_dump.pl @@ -368,7 +368,7 @@ '--data-only', '--superuser' => 'test_superuser', '--disable-triggers', - '--verbose', # no-op, just make sure it works + '--verbose', # no-op, just make sure it works 'postgres', ], }, @@ -810,8 +810,7 @@ dump_cmd => [ 'pg_dump', '--no-sync', "--file=$tempdir/no_schema.sql", '--no-schema', - '--with-statistics', - 'postgres', + '--with-statistics', 'postgres', ], },); diff --git a/src/bin/pg_dump/t/006_pg_dumpall.pl b/src/bin/pg_dump/t/006_pg_dumpall.pl index 0ea02a3a4a940..c274b777586ad 100644 --- a/src/bin/pg_dump/t/006_pg_dumpall.pl +++ b/src/bin/pg_dump/t/006_pg_dumpall.pl @@ -294,17 +294,17 @@ '--format' => 'directory', '--globals-only', '--file' => "$tempdir/dump_globals_only", - ], - restore_cmd => [ - 'pg_restore', '-C', '--globals-only', - '--format' => 'directory', - '--file' => "$tempdir/dump_globals_only.sql", - "$tempdir/dump_globals_only", - ], - like => qr/ + ], + restore_cmd => [ + 'pg_restore', '-C', '--globals-only', + '--format' => 'directory', + '--file' => "$tempdir/dump_globals_only.sql", + "$tempdir/dump_globals_only", + ], + like => qr/ ^\s*\QCREATE ROLE dumpall;\E\s*\n /xm - }, ); + },); # First execute the setup_sql foreach my $run (sort keys %pgdumpall_runs) @@ -339,7 +339,8 @@ # pg_restore --file output file. my $output_file = slurp_file("$tempdir/${run}.sql"); - if (!($pgdumpall_runs{$run}->{like}) && !($pgdumpall_runs{$run}->{unlike})) + if ( !($pgdumpall_runs{$run}->{like}) + && !($pgdumpall_runs{$run}->{unlike})) { die "missing \"like\" or \"unlike\" in test \"$run\""; } @@ -361,30 +362,38 @@ # Some negative test case with dump of pg_dumpall and restore using pg_restore # test case 1: when -C is not used in pg_restore with dump of pg_dumpall $node->command_fails_like( - [ 'pg_restore', - "$tempdir/format_custom", - '--format' => 'custom', - '--file' => "$tempdir/error_test.sql", ], - qr/\Qpg_restore: error: option -C\/--create must be specified when restoring an archive created by pg_dumpall\E/, - 'When -C is not used in pg_restore with dump of pg_dumpall'); + [ + 'pg_restore', + "$tempdir/format_custom", + '--format' => 'custom', + '--file' => "$tempdir/error_test.sql", + ], + qr/\Qpg_restore: error: option -C\/--create must be specified when restoring an archive created by pg_dumpall\E/, + 'When -C is not used in pg_restore with dump of pg_dumpall'); # test case 2: When --list option is used with dump of pg_dumpall $node->command_fails_like( - [ 'pg_restore', + [ + 'pg_restore', "$tempdir/format_custom", '-C', - '--format' => 'custom', '--list', - '--file' => "$tempdir/error_test.sql", ], + '--format' => 'custom', + '--list', + '--file' => "$tempdir/error_test.sql", + ], qr/\Qpg_restore: error: option -l\/--list cannot be used when restoring an archive created by pg_dumpall\E/, 'When --list is used in pg_restore with dump of pg_dumpall'); # test case 3: When non-exist database is given with -d option $node->command_fails_like( - [ 'pg_restore', + [ + 'pg_restore', "$tempdir/format_custom", '-C', '--format' => 'custom', - '-d' => 'dbpq', ], + '-d' => 'dbpq', + ], qr/\Qpg_restore: error: could not connect to database "dbpq"\E/, - 'When non-existent database is given with -d option in pg_restore with dump of pg_dumpall'); + 'When non-existent database is given with -d option in pg_restore with dump of pg_dumpall' +); $node->stop('fast'); diff --git a/src/bin/pg_rewind/t/RewindTest.pm b/src/bin/pg_rewind/t/RewindTest.pm index 3efab8317978a..b0234ebfaf218 100644 --- a/src/bin/pg_rewind/t/RewindTest.pm +++ b/src/bin/pg_rewind/t/RewindTest.pm @@ -285,7 +285,7 @@ sub run_pg_rewind # Check that pg_rewind with dbname and --write-recovery-conf # wrote the dbname in the generated primary_conninfo value. like(slurp_file("$primary_pgdata/postgresql.auto.conf"), - qr/dbname=postgres/m, 'recovery conf file sets dbname'); + qr/dbname=postgres/m, 'recovery conf file sets dbname'); # Check that standby.signal is here as recovery configuration # was requested. diff --git a/src/bin/pg_upgrade/t/004_subscription.pl b/src/bin/pg_upgrade/t/004_subscription.pl index c545abf65816e..e46f02c6cc612 100644 --- a/src/bin/pg_upgrade/t/004_subscription.pl +++ b/src/bin/pg_upgrade/t/004_subscription.pl @@ -53,7 +53,8 @@ $old_sub->stop; -$new_sub->append_conf('postgresql.conf', "max_active_replication_origins = 0"); +$new_sub->append_conf('postgresql.conf', + "max_active_replication_origins = 0"); # pg_upgrade will fail because the new cluster has insufficient # max_active_replication_origins. @@ -80,7 +81,8 @@ ); # Reset max_active_replication_origins -$new_sub->append_conf('postgresql.conf', "max_active_replication_origins = 10"); +$new_sub->append_conf('postgresql.conf', + "max_active_replication_origins = 10"); # Cleanup $publisher->safe_psql('postgres', "DROP PUBLICATION regress_pub1"); diff --git a/src/bin/pg_upgrade/t/006_transfer_modes.pl b/src/bin/pg_upgrade/t/006_transfer_modes.pl index 550a63fdf7d47..58fe8a8c7dcea 100644 --- a/src/bin/pg_upgrade/t/006_transfer_modes.pl +++ b/src/bin/pg_upgrade/t/006_transfer_modes.pl @@ -13,7 +13,8 @@ sub test_mode { my ($mode) = @_; - my $old = PostgreSQL::Test::Cluster->new('old', install_path => $ENV{oldinstall}); + my $old = + PostgreSQL::Test::Cluster->new('old', install_path => $ENV{oldinstall}); my $new = PostgreSQL::Test::Cluster->new('new'); # --swap can't be used to upgrade from versions older than 10, so just skip @@ -40,9 +41,11 @@ sub test_mode # Create a small variety of simple test objects on the old cluster. We'll # check that these reach the new version after upgrading. $old->start; - $old->safe_psql('postgres', "CREATE TABLE test1 AS SELECT generate_series(1, 100)"); + $old->safe_psql('postgres', + "CREATE TABLE test1 AS SELECT generate_series(1, 100)"); $old->safe_psql('postgres', "CREATE DATABASE testdb1"); - $old->safe_psql('testdb1', "CREATE TABLE test2 AS SELECT generate_series(200, 300)"); + $old->safe_psql('testdb1', + "CREATE TABLE test2 AS SELECT generate_series(200, 300)"); $old->safe_psql('testdb1', "VACUUM FULL test2"); $old->safe_psql('testdb1', "CREATE SEQUENCE testseq START 5432"); @@ -51,10 +54,15 @@ sub test_mode if (defined($ENV{oldinstall})) { my $tblspc = PostgreSQL::Test::Utils::tempdir_short(); - $old->safe_psql('postgres', "CREATE TABLESPACE test_tblspc LOCATION '$tblspc'"); - $old->safe_psql('postgres', "CREATE DATABASE testdb2 TABLESPACE test_tblspc"); - $old->safe_psql('postgres', "CREATE TABLE test3 TABLESPACE test_tblspc AS SELECT generate_series(300, 401)"); - $old->safe_psql('testdb2', "CREATE TABLE test4 AS SELECT generate_series(400, 502)"); + $old->safe_psql('postgres', + "CREATE TABLESPACE test_tblspc LOCATION '$tblspc'"); + $old->safe_psql('postgres', + "CREATE DATABASE testdb2 TABLESPACE test_tblspc"); + $old->safe_psql('postgres', + "CREATE TABLE test3 TABLESPACE test_tblspc AS SELECT generate_series(300, 401)" + ); + $old->safe_psql('testdb2', + "CREATE TABLE test4 AS SELECT generate_series(400, 502)"); } $old->stop; @@ -90,9 +98,11 @@ sub test_mode # tablespace. if (defined($ENV{oldinstall})) { - $result = $new->safe_psql('postgres', "SELECT COUNT(*) FROM test3"); + $result = + $new->safe_psql('postgres', "SELECT COUNT(*) FROM test3"); is($result, '102', "test3 data after pg_upgrade $mode"); - $result = $new->safe_psql('testdb2', "SELECT COUNT(*) FROM test4"); + $result = + $new->safe_psql('testdb2', "SELECT COUNT(*) FROM test4"); is($result, '103', "test4 data after pg_upgrade $mode"); } $new->stop; diff --git a/src/bin/scripts/t/100_vacuumdb.pl b/src/bin/scripts/t/100_vacuumdb.pl index 75ac24a7a5539..ff56a13b46bbb 100644 --- a/src/bin/scripts/t/100_vacuumdb.pl +++ b/src/bin/scripts/t/100_vacuumdb.pl @@ -238,62 +238,105 @@ 'cannot use option --all and a dbname as argument at the same time'); $node->safe_psql('postgres', - 'CREATE TABLE regression_vacuumdb_test AS select generate_series(1, 10) a, generate_series(2, 11) b;'); + 'CREATE TABLE regression_vacuumdb_test AS select generate_series(1, 10) a, generate_series(2, 11) b;' +); $node->issues_sql_like( - [ 'vacuumdb', '--analyze-only', '--missing-stats-only', '-t', 'regression_vacuumdb_test', 'postgres' ], + [ + 'vacuumdb', '--analyze-only', + '--missing-stats-only', '-t', + 'regression_vacuumdb_test', 'postgres' + ], qr/statement:\ ANALYZE/sx, '--missing-stats-only with missing stats'); $node->issues_sql_unlike( - [ 'vacuumdb', '--analyze-only', '--missing-stats-only', '-t', 'regression_vacuumdb_test', 'postgres' ], + [ + 'vacuumdb', '--analyze-only', + '--missing-stats-only', '-t', + 'regression_vacuumdb_test', 'postgres' + ], qr/statement:\ ANALYZE/sx, '--missing-stats-only with no missing stats'); $node->safe_psql('postgres', - 'CREATE INDEX regression_vacuumdb_test_idx ON regression_vacuumdb_test (mod(a, 2));'); + 'CREATE INDEX regression_vacuumdb_test_idx ON regression_vacuumdb_test (mod(a, 2));' +); $node->issues_sql_like( - [ 'vacuumdb', '--analyze-in-stages', '--missing-stats-only', '-t', 'regression_vacuumdb_test', 'postgres' ], + [ + 'vacuumdb', '--analyze-in-stages', + '--missing-stats-only', '-t', + 'regression_vacuumdb_test', 'postgres' + ], qr/statement:\ ANALYZE/sx, '--missing-stats-only with missing index expression stats'); $node->issues_sql_unlike( - [ 'vacuumdb', '--analyze-in-stages', '--missing-stats-only', '-t', 'regression_vacuumdb_test', 'postgres' ], + [ + 'vacuumdb', '--analyze-in-stages', + '--missing-stats-only', '-t', + 'regression_vacuumdb_test', 'postgres' + ], qr/statement:\ ANALYZE/sx, '--missing-stats-only with no missing index expression stats'); $node->safe_psql('postgres', - 'CREATE STATISTICS regression_vacuumdb_test_stat ON a, b FROM regression_vacuumdb_test;'); + 'CREATE STATISTICS regression_vacuumdb_test_stat ON a, b FROM regression_vacuumdb_test;' +); $node->issues_sql_like( - [ 'vacuumdb', '--analyze-only', '--missing-stats-only', '-t', 'regression_vacuumdb_test', 'postgres' ], + [ + 'vacuumdb', '--analyze-only', + '--missing-stats-only', '-t', + 'regression_vacuumdb_test', 'postgres' + ], qr/statement:\ ANALYZE/sx, '--missing-stats-only with missing extended stats'); $node->issues_sql_unlike( - [ 'vacuumdb', '--analyze-only', '--missing-stats-only', '-t', 'regression_vacuumdb_test', 'postgres' ], + [ + 'vacuumdb', '--analyze-only', + '--missing-stats-only', '-t', + 'regression_vacuumdb_test', 'postgres' + ], qr/statement:\ ANALYZE/sx, '--missing-stats-only with no missing extended stats'); $node->safe_psql('postgres', "CREATE TABLE regression_vacuumdb_child (a INT) INHERITS (regression_vacuumdb_test);\n" - . "INSERT INTO regression_vacuumdb_child VALUES (1, 2);\n" - . "ANALYZE regression_vacuumdb_child;\n"); + . "INSERT INTO regression_vacuumdb_child VALUES (1, 2);\n" + . "ANALYZE regression_vacuumdb_child;\n"); $node->issues_sql_like( - [ 'vacuumdb', '--analyze-in-stages', '--missing-stats-only', '-t', 'regression_vacuumdb_test', 'postgres' ], + [ + 'vacuumdb', '--analyze-in-stages', + '--missing-stats-only', '-t', + 'regression_vacuumdb_test', 'postgres' + ], qr/statement:\ ANALYZE/sx, '--missing-stats-only with missing inherited stats'); $node->issues_sql_unlike( - [ 'vacuumdb', '--analyze-in-stages', '--missing-stats-only', '-t', 'regression_vacuumdb_test', 'postgres' ], + [ + 'vacuumdb', '--analyze-in-stages', + '--missing-stats-only', '-t', + 'regression_vacuumdb_test', 'postgres' + ], qr/statement:\ ANALYZE/sx, '--missing-stats-only with no missing inherited stats'); $node->safe_psql('postgres', "CREATE TABLE regression_vacuumdb_parted (a INT) PARTITION BY LIST (a);\n" - . "CREATE TABLE regression_vacuumdb_part1 PARTITION OF regression_vacuumdb_parted FOR VALUES IN (1);\n" - . "INSERT INTO regression_vacuumdb_parted VALUES (1);\n" - . "ANALYZE regression_vacuumdb_part1;\n"); + . "CREATE TABLE regression_vacuumdb_part1 PARTITION OF regression_vacuumdb_parted FOR VALUES IN (1);\n" + . "INSERT INTO regression_vacuumdb_parted VALUES (1);\n" + . "ANALYZE regression_vacuumdb_part1;\n"); $node->issues_sql_like( - [ 'vacuumdb', '--analyze-only', '--missing-stats-only', '-t', 'regression_vacuumdb_parted', 'postgres' ], + [ + 'vacuumdb', '--analyze-only', + '--missing-stats-only', '-t', + 'regression_vacuumdb_parted', 'postgres' + ], qr/statement:\ ANALYZE/sx, '--missing-stats-only with missing partition stats'); $node->issues_sql_unlike( - [ 'vacuumdb', '--analyze-only', '--missing-stats-only', '-t', 'regression_vacuumdb_parted', 'postgres' ], + [ + 'vacuumdb', '--analyze-only', + '--missing-stats-only', '-t', + 'regression_vacuumdb_parted', 'postgres' + ], qr/statement:\ ANALYZE/sx, '--missing-stats-only with no missing partition stats'); diff --git a/src/include/catalog/pg_collation.dat b/src/include/catalog/pg_collation.dat index fb76c421931ea..d8b5d5d2d856b 100644 --- a/src/include/catalog/pg_collation.dat +++ b/src/include/catalog/pg_collation.dat @@ -33,7 +33,8 @@ descr => 'sorts by Unicode code point; Unicode and POSIX character semantics', collname => 'pg_c_utf8', collprovider => 'b', collencoding => '6', colllocale => 'C.UTF-8', collversion => '1' }, -{ oid => '9535', descr => 'sorts by Unicode code point; Unicode character semantics', +{ oid => '9535', + descr => 'sorts by Unicode code point; Unicode character semantics', collname => 'pg_unicode_fast', collprovider => 'b', collencoding => '6', colllocale => 'PG_UNICODE_FAST', collversion => '1' }, diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index d3d28a263fa99..4efc1bc499ab5 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -1190,14 +1190,14 @@ proname => 'bytea', proleakproof => 't', prorettype => 'bytea', proargtypes => 'int8', prosrc => 'int8_bytea' }, { oid => '8580', descr => 'convert bytea to int2', - proname => 'int2', prorettype => 'int2', - proargtypes => 'bytea', prosrc => 'bytea_int2' }, + proname => 'int2', prorettype => 'int2', proargtypes => 'bytea', + prosrc => 'bytea_int2' }, { oid => '8581', descr => 'convert bytea to int4', - proname => 'int4', prorettype => 'int4', - proargtypes => 'bytea', prosrc => 'bytea_int4' }, + proname => 'int4', prorettype => 'int4', proargtypes => 'bytea', + prosrc => 'bytea_int4' }, { oid => '8582', descr => 'convert bytea to int8', - proname => 'int8', prorettype => 'int8', - proargtypes => 'bytea', prosrc => 'bytea_int8' }, + proname => 'int8', prorettype => 'int8', proargtypes => 'bytea', + prosrc => 'bytea_int8' }, { oid => '449', descr => 'hash', proname => 'hashint2', prorettype => 'int4', proargtypes => 'int2', @@ -3597,7 +3597,8 @@ { oid => '8702', descr => 'gamma function', proname => 'gamma', prorettype => 'float8', proargtypes => 'float8', prosrc => 'dgamma' }, -{ oid => '8703', descr => 'natural logarithm of absolute value of gamma function', +{ oid => '8703', + descr => 'natural logarithm of absolute value of gamma function', proname => 'lgamma', prorettype => 'float8', proargtypes => 'float8', prosrc => 'dlgamma' }, @@ -9360,8 +9361,8 @@ proname => 'to_json', provolatile => 's', prorettype => 'json', proargtypes => 'anyelement', prosrc => 'to_json' }, { oid => '3261', descr => 'remove object fields with null values from json', - proname => 'json_strip_nulls', prorettype => 'json', proargtypes => 'json bool', - prosrc => 'json_strip_nulls' }, + proname => 'json_strip_nulls', prorettype => 'json', + proargtypes => 'json bool', prosrc => 'json_strip_nulls' }, { oid => '3947', proname => 'json_object_field', prorettype => 'json', @@ -9483,17 +9484,19 @@ proname => 'uuid_hash_extended', prorettype => 'int8', proargtypes => 'uuid int8', prosrc => 'uuid_hash_extended' }, { oid => '3432', descr => 'generate random UUID', - proname => 'gen_random_uuid', provolatile => 'v', - prorettype => 'uuid', proargtypes => '', prosrc => 'gen_random_uuid' }, + proname => 'gen_random_uuid', provolatile => 'v', prorettype => 'uuid', + proargtypes => '', prosrc => 'gen_random_uuid' }, { oid => '9895', descr => 'generate UUID version 4', - proname => 'uuidv4', provolatile => 'v', - prorettype => 'uuid', proargtypes => '', prosrc => 'gen_random_uuid' }, + proname => 'uuidv4', provolatile => 'v', prorettype => 'uuid', + proargtypes => '', prosrc => 'gen_random_uuid' }, { oid => '9896', descr => 'generate UUID version 7', - proname => 'uuidv7', provolatile => 'v', - prorettype => 'uuid', proargtypes => '', prosrc => 'uuidv7' }, -{ oid => '9897', descr => 'generate UUID version 7 with a timestamp shifted by specified interval', - proname => 'uuidv7', provolatile => 'v', proargnames => '{shift}', - prorettype => 'uuid', proargtypes => 'interval', prosrc => 'uuidv7_interval' }, + proname => 'uuidv7', provolatile => 'v', prorettype => 'uuid', + proargtypes => '', prosrc => 'uuidv7' }, +{ oid => '9897', + descr => 'generate UUID version 7 with a timestamp shifted by specified interval', + proname => 'uuidv7', provolatile => 'v', prorettype => 'uuid', + proargtypes => 'interval', proargnames => '{shift}', + prosrc => 'uuidv7_interval' }, { oid => '6342', descr => 'extract timestamp from UUID', proname => 'uuid_extract_timestamp', proleakproof => 't', prorettype => 'timestamptz', proargtypes => 'uuid', @@ -10299,8 +10302,8 @@ prorettype => 'jsonb', proargtypes => '', prosrc => 'jsonb_build_object_noargs' }, { oid => '3262', descr => 'remove object fields with null values from jsonb', - proname => 'jsonb_strip_nulls', prorettype => 'jsonb', proargtypes => 'jsonb bool', - prosrc => 'jsonb_strip_nulls' }, + proname => 'jsonb_strip_nulls', prorettype => 'jsonb', + proargtypes => 'jsonb bool', prosrc => 'jsonb_strip_nulls' }, { oid => '3478', proname => 'jsonb_object_field', prorettype => 'jsonb', @@ -12508,34 +12511,22 @@ proargnames => '{summarized_tli,summarized_lsn,pending_lsn,summarizer_pid}', prosrc => 'pg_get_wal_summarizer_state' }, # Statistics Import -{ oid => '8459', - descr => 'restore statistics on relation', - proname => 'pg_restore_relation_stats', provolatile => 'v', proisstrict => 'f', - provariadic => 'any', - proparallel => 'u', prorettype => 'bool', - proargtypes => 'any', - proargnames => '{kwargs}', - proargmodes => '{v}', - prosrc => 'pg_restore_relation_stats' }, -{ oid => '9160', - descr => 'clear statistics on relation', - proname => 'pg_clear_relation_stats', provolatile => 'v', proisstrict => 'f', - proparallel => 'u', prorettype => 'void', - proargtypes => 'text text', - proargnames => '{schemaname,relname}', - prosrc => 'pg_clear_relation_stats' }, -{ oid => '8461', - descr => 'restore statistics on attribute', - proname => 'pg_restore_attribute_stats', provolatile => 'v', proisstrict => 'f', - provariadic => 'any', - proparallel => 'u', prorettype => 'bool', - proargtypes => 'any', - proargnames => '{kwargs}', - proargmodes => '{v}', - prosrc => 'pg_restore_attribute_stats' }, -{ oid => '9162', - descr => 'clear statistics on attribute', - proname => 'pg_clear_attribute_stats', provolatile => 'v', proisstrict => 'f', +{ oid => '8459', descr => 'restore statistics on relation', + proname => 'pg_restore_relation_stats', provariadic => 'any', + proisstrict => 'f', provolatile => 'v', proparallel => 'u', + prorettype => 'bool', proargtypes => 'any', proargmodes => '{v}', + proargnames => '{kwargs}', prosrc => 'pg_restore_relation_stats' }, +{ oid => '9160', descr => 'clear statistics on relation', + proname => 'pg_clear_relation_stats', proisstrict => 'f', provolatile => 'v', + proparallel => 'u', prorettype => 'void', proargtypes => 'text text', + proargnames => '{schemaname,relname}', prosrc => 'pg_clear_relation_stats' }, +{ oid => '8461', descr => 'restore statistics on attribute', + proname => 'pg_restore_attribute_stats', provariadic => 'any', + proisstrict => 'f', provolatile => 'v', proparallel => 'u', + prorettype => 'bool', proargtypes => 'any', proargmodes => '{v}', + proargnames => '{kwargs}', prosrc => 'pg_restore_attribute_stats' }, +{ oid => '9162', descr => 'clear statistics on attribute', + proname => 'pg_clear_attribute_stats', proisstrict => 'f', provolatile => 'v', proparallel => 'u', prorettype => 'void', proargtypes => 'text text text bool', proargnames => '{schemaname,relname,attname,inherited}', @@ -12544,13 +12535,13 @@ # GiST stratnum implementations { oid => '8047', descr => 'GiST support', proname => 'gist_translate_cmptype_common', prorettype => 'int2', - proargtypes => 'int4', - prosrc => 'gist_translate_cmptype_common' }, + proargtypes => 'int4', prosrc => 'gist_translate_cmptype_common' }, # AIO related functions { oid => '9200', descr => 'information about in-progress asynchronous IOs', proname => 'pg_get_aios', prorows => '100', proretset => 't', - provolatile => 'v', proparallel => 'r', prorettype => 'record', proargtypes => '', + provolatile => 'v', proparallel => 'r', prorettype => 'record', + proargtypes => '', proallargtypes => '{int4,int4,int8,text,text,int8,int8,text,int2,int4,text,text,bool,bool,bool}', proargmodes => '{o,o,o,o,o,o,o,o,o,o,o,o,o,o,o}', proargnames => '{pid,io_id,io_generation,state,operation,off,length,target,handle_data_len,raw_result,result,target_desc,f_sync,f_localmem,f_buffered}', diff --git a/src/test/authentication/t/001_password.pl b/src/test/authentication/t/001_password.pl index 37d96d95a1aeb..a16e9a563f3fd 100644 --- a/src/test/authentication/t/001_password.pl +++ b/src/test/authentication/t/001_password.pl @@ -79,39 +79,40 @@ sub test_conn # other tests are added to this file in the future $node->safe_psql('postgres', "CREATE DATABASE test_log_connections"); -my $log_connections = $node->safe_psql('test_log_connections', q(SHOW log_connections;)); +my $log_connections = + $node->safe_psql('test_log_connections', q(SHOW log_connections;)); is($log_connections, 'on', qq(check log connections has expected value 'on')); -$node->connect_ok('test_log_connections', +$node->connect_ok( + 'test_log_connections', qq(log_connections 'on' works as expected for backwards compatibility), log_like => [ qr/connection received/, qr/connection authenticated/, qr/connection authorized: user=\S+ database=test_log_connections/, ], - log_unlike => [ - qr/connection ready/, - ],); + log_unlike => [ qr/connection ready/, ],); -$node->safe_psql('test_log_connections', +$node->safe_psql( + 'test_log_connections', q[ALTER SYSTEM SET log_connections = receipt,authorization,setup_durations; SELECT pg_reload_conf();]); -$node->connect_ok('test_log_connections', +$node->connect_ok( + 'test_log_connections', q(log_connections with subset of specified options logs only those aspects), log_like => [ qr/connection received/, qr/connection authorized: user=\S+ database=test_log_connections/, qr/connection ready/, ], - log_unlike => [ - qr/connection authenticated/, - ],); + log_unlike => [ qr/connection authenticated/, ],); $node->safe_psql('test_log_connections', qq(ALTER SYSTEM SET log_connections = 'all'; SELECT pg_reload_conf();)); -$node->connect_ok('test_log_connections', +$node->connect_ok( + 'test_log_connections', qq(log_connections 'all' logs all available connection aspects), log_like => [ qr/connection received/, diff --git a/src/test/modules/libpq_pipeline/t/001_libpq_pipeline.pl b/src/test/modules/libpq_pipeline/t/001_libpq_pipeline.pl index 61524bdbd8f28..f967885307045 100644 --- a/src/test/modules/libpq_pipeline/t/001_libpq_pipeline.pl +++ b/src/test/modules/libpq_pipeline/t/001_libpq_pipeline.pl @@ -53,7 +53,8 @@ BEGIN $node->command_ok( [ 'libpq_pipeline', @extraargs, - $testname, $node->connstr('postgres') . " max_protocol_version=latest" + $testname, + $node->connstr('postgres') . " max_protocol_version=latest" ], "libpq_pipeline $testname"); @@ -76,7 +77,8 @@ BEGIN # test separately that it still works the old protocol version too. $node->command_ok( [ - 'libpq_pipeline', 'cancel', $node->connstr('postgres') . " max_protocol_version=3.0" + 'libpq_pipeline', 'cancel', + $node->connstr('postgres') . " max_protocol_version=3.0" ], "libpq_pipeline cancel with protocol 3.0"); diff --git a/src/test/modules/test_aio/t/001_aio.pl b/src/test/modules/test_aio/t/001_aio.pl index 4527c70785d34..82ffffc058f75 100644 --- a/src/test/modules/test_aio/t/001_aio.pl +++ b/src/test/modules/test_aio/t/001_aio.pl @@ -1123,7 +1123,8 @@ sub test_zero { # Create a corruption and then read the block without waiting for # completion. - $psql_a->query(qq( + $psql_a->query( + qq( SELECT modify_rel_block('tbl_zero', 1, corrupt_header=>true); SELECT read_rel_block_ll('tbl_zero', 1, wait_complete=>false, zero_on_error=>true) )); @@ -1133,7 +1134,8 @@ sub test_zero $psql_b, "$persistency: test completing read by other session doesn't generate warning", qq(SELECT count(*) > 0 FROM tbl_zero;), - qr/^t$/, qr/^$/); + qr/^t$/, + qr/^$/); } # Clean up @@ -1355,18 +1357,24 @@ sub test_ignore_checksum )); $psql->query_safe($invalidate_sql); - psql_like($io_method, $psql, + psql_like( + $io_method, + $psql, "reading block w/ wrong checksum with ignore_checksum_failure=off fails", - $count_sql, qr/^$/, qr/ERROR: invalid page in block/); + $count_sql, + qr/^$/, + qr/ERROR: invalid page in block/); $psql->query_safe("SET ignore_checksum_failure=on"); $psql->query_safe($invalidate_sql); - psql_like($io_method, $psql, - "reading block w/ wrong checksum with ignore_checksum_failure=off succeeds", - $count_sql, - qr/^$expect$/, - qr/WARNING: ignoring (checksum failure|\d checksum failures)/); + psql_like( + $io_method, + $psql, + "reading block w/ wrong checksum with ignore_checksum_failure=off succeeds", + $count_sql, + qr/^$expect$/, + qr/WARNING: ignoring (checksum failure|\d checksum failures)/); # Verify that ignore_checksum_failure=off works in multi-block reads @@ -1432,19 +1440,22 @@ sub test_ignore_checksum # file. $node->wait_for_log(qr/LOG: ignoring checksum failure in block 2/, - $log_location); + $log_location); ok(1, "$io_method: found information about checksum failure in block 2"); - $node->wait_for_log(qr/LOG: invalid page in block 3 of relation base.*; zeroing out page/, - $log_location); + $node->wait_for_log( + qr/LOG: invalid page in block 3 of relation base.*; zeroing out page/, + $log_location); ok(1, "$io_method: found information about invalid page in block 3"); - $node->wait_for_log(qr/LOG: invalid page in block 4 of relation base.*; zeroing out page/, - $log_location); + $node->wait_for_log( + qr/LOG: invalid page in block 4 of relation base.*; zeroing out page/, + $log_location); ok(1, "$io_method: found information about checksum failure in block 4"); - $node->wait_for_log(qr/LOG: invalid page in block 5 of relation base.*; zeroing out page/, - $log_location); + $node->wait_for_log( + qr/LOG: invalid page in block 5 of relation base.*; zeroing out page/, + $log_location); ok(1, "$io_method: found information about checksum failure in block 5"); @@ -1462,8 +1473,7 @@ sub test_ignore_checksum qq( SELECT read_rel_block_ll('tbl_cs_fail', 3, nblocks=>1, zero_on_error=>false);), qr/^$/, - qr/^psql::\d+: ERROR: invalid page in block 3 of relation/ - ); + qr/^psql::\d+: ERROR: invalid page in block 3 of relation/); psql_like( $io_method, diff --git a/src/test/postmaster/t/002_connection_limits.pl b/src/test/postmaster/t/002_connection_limits.pl index 6442500fc379a..4a7fb16261f86 100644 --- a/src/test/postmaster/t/002_connection_limits.pl +++ b/src/test/postmaster/t/002_connection_limits.pl @@ -68,7 +68,8 @@ sub connect_fails_wait my $log_location = -s $node->logfile; $node->connect_fails($connstr, $test_name, %params); - $node->wait_for_log(qr/DEBUG: (00000: )?client backend.*exited with exit code 1/, + $node->wait_for_log( + qr/DEBUG: (00000: )?client backend.*exited with exit code 1/, $log_location); ok(1, "$test_name: client backend process exited"); } diff --git a/src/test/recovery/t/040_standby_failover_slots_sync.pl b/src/test/recovery/t/040_standby_failover_slots_sync.pl index 9c8b49e942d88..2c61c51e914df 100644 --- a/src/test/recovery/t/040_standby_failover_slots_sync.pl +++ b/src/test/recovery/t/040_standby_failover_slots_sync.pl @@ -941,8 +941,7 @@ 'synced slot retained on the new primary'); # Commit the prepared transaction -$standby1->safe_psql('postgres', - "COMMIT PREPARED 'test_twophase_slotsync';"); +$standby1->safe_psql('postgres', "COMMIT PREPARED 'test_twophase_slotsync';"); $standby1->wait_for_catchup('regress_mysub1'); # Confirm that the prepared transaction is replicated to the subscriber diff --git a/src/test/recovery/t/048_vacuum_horizon_floor.pl b/src/test/recovery/t/048_vacuum_horizon_floor.pl index d48a6ef7e0f24..e56fce59d58ea 100644 --- a/src/test/recovery/t/048_vacuum_horizon_floor.pl +++ b/src/test/recovery/t/048_vacuum_horizon_floor.pl @@ -47,7 +47,7 @@ $node_primary->background_psql($test_db, on_error_stop => 1); # Long-running Primary Session B -my $psql_primaryB = +my $psql_primaryB = $node_primary->background_psql($test_db, on_error_stop => 1); # Our test relies on two rounds of index vacuuming for reasons elaborated @@ -81,7 +81,8 @@ # insert and delete enough rows that we force at least one round of index # vacuuming before getting to a dead tuple which was killed after the standby # is disconnected. -$node_primary->safe_psql($test_db, qq[ +$node_primary->safe_psql( + $test_db, qq[ CREATE TABLE ${table1}(col1 int) WITH (autovacuum_enabled=false, fillfactor=10); INSERT INTO $table1 VALUES(7); @@ -98,21 +99,24 @@ $node_primary->wait_for_catchup($node_replica, 'replay', $primary_lsn); # Test that the WAL receiver is up and running. -$node_replica->poll_query_until($test_db, qq[ - SELECT EXISTS (SELECT * FROM pg_stat_wal_receiver);] , 't'); +$node_replica->poll_query_until( + $test_db, qq[ + SELECT EXISTS (SELECT * FROM pg_stat_wal_receiver);], 't'); # Set primary_conninfo to something invalid on the replica and reload the # config. Once the config is reloaded, the startup process will force the WAL # receiver to restart and it will be unable to reconnect because of the # invalid connection information. -$node_replica->safe_psql($test_db, qq[ +$node_replica->safe_psql( + $test_db, qq[ ALTER SYSTEM SET primary_conninfo = ''; SELECT pg_reload_conf(); ]); # Wait until the WAL receiver has shut down and been unable to start up again. -$node_replica->poll_query_until($test_db, qq[ - SELECT EXISTS (SELECT * FROM pg_stat_wal_receiver);] , 'f'); +$node_replica->poll_query_until( + $test_db, qq[ + SELECT EXISTS (SELECT * FROM pg_stat_wal_receiver);], 'f'); # Now insert and update a tuple which will be visible to the vacuum on the # primary but which will have xmax newer than the oldest xmin on the standby @@ -123,7 +127,7 @@ UPDATE $table1 SET col1 = 100 WHERE col1 = 99; SELECT 'after_update'; ] - ); +); # Make sure the UPDATE finished like($res, qr/^after_update$/m, "UPDATE occurred on primary session A"); @@ -148,7 +152,7 @@ DECLARE $primary_cursor1 CURSOR FOR SELECT * FROM $table1 WHERE col1 = 7; FETCH $primary_cursor1; ] - ); +); is($res, 7, qq[Cursor query returned $res. Expected value 7.]); @@ -183,7 +187,8 @@ # just waiting on the lock to start vacuuming. We don't want the standby to # re-establish a connection to the primary and push the horizon back until # we've saved initial values in GlobalVisState and calculated OldestXmin. -$node_primary->poll_query_until($test_db, +$node_primary->poll_query_until( + $test_db, qq[ SELECT count(*) >= 1 FROM pg_stat_activity WHERE pid = $vacuum_pid @@ -192,8 +197,9 @@ 't'); # Ensure the WAL receiver is still not active on the replica. -$node_replica->poll_query_until($test_db, qq[ - SELECT EXISTS (SELECT * FROM pg_stat_wal_receiver);] , 'f'); +$node_replica->poll_query_until( + $test_db, qq[ + SELECT EXISTS (SELECT * FROM pg_stat_wal_receiver);], 'f'); # Allow the WAL receiver connection to re-establish. $node_replica->safe_psql( @@ -203,15 +209,17 @@ ]); # Ensure the new WAL receiver has connected. -$node_replica->poll_query_until($test_db, qq[ - SELECT EXISTS (SELECT * FROM pg_stat_wal_receiver);] , 't'); +$node_replica->poll_query_until( + $test_db, qq[ + SELECT EXISTS (SELECT * FROM pg_stat_wal_receiver);], 't'); # Once the WAL sender is shown on the primary, the replica should have # connected with the primary and pushed the horizon backward. Primary Session # A won't see that until the VACUUM FREEZE proceeds and does its first round # of index vacuuming. -$node_primary->poll_query_until($test_db, qq[ - SELECT EXISTS (SELECT * FROM pg_stat_replication);] , 't'); +$node_primary->poll_query_until( + $test_db, qq[ + SELECT EXISTS (SELECT * FROM pg_stat_replication);], 't'); # Move the cursor forward to the next 7. We inserted the 7 much later, so # advancing the cursor should allow vacuum to proceed vacuuming most pages of @@ -225,20 +233,21 @@ # Prevent the test from incorrectly passing by confirming that we did indeed # do a pass of index vacuuming. -$node_primary->poll_query_until($test_db, qq[ +$node_primary->poll_query_until( + $test_db, qq[ SELECT index_vacuum_count > 0 FROM pg_stat_progress_vacuum WHERE datname='$test_db' AND relid::regclass = '$table1'::regclass; - ] , 't'); + ], 't'); # Commit the transaction with the open cursor so that the VACUUM can finish. $psql_primaryB->query_until( - qr/^commit$/m, - qq[ + qr/^commit$/m, + qq[ COMMIT; \\echo commit ] - ); +); # VACUUM proceeds with pruning and does a visibility check on each tuple. In # older versions of Postgres, pruning found our final dead tuple @@ -252,7 +261,8 @@ # With the fix, VACUUM should finish successfully, incrementing the table # vacuum_count. -$node_primary->poll_query_until($test_db, +$node_primary->poll_query_until( + $test_db, qq[ SELECT vacuum_count > 0 FROM pg_stat_all_tables WHERE relname = '${table1}'; diff --git a/src/test/ssl/t/SSL/Server.pm b/src/test/ssl/t/SSL/Server.pm index 96f0f201e9c0b..efbd0dafaf60d 100644 --- a/src/test/ssl/t/SSL/Server.pm +++ b/src/test/ssl/t/SSL/Server.pm @@ -318,7 +318,8 @@ sub switch_server_cert $node->append_conf('sslconfig.conf', "ssl=on"); $node->append_conf('sslconfig.conf', $backend->set_server_cert(\%params)); # use lists of ECDH curves and cipher suites for syntax testing - $node->append_conf('sslconfig.conf', 'ssl_groups=X25519:prime256v1:secp521r1'); + $node->append_conf('sslconfig.conf', + 'ssl_groups=X25519:prime256v1:secp521r1'); $node->append_conf('sslconfig.conf', 'ssl_tls13_ciphers=TLS_AES_256_GCM_SHA384:TLS_AES_128_GCM_SHA256'); diff --git a/src/test/subscription/t/007_ddl.pl b/src/test/subscription/t/007_ddl.pl index 7d12bcbddb687..2a45fb13739b7 100644 --- a/src/test/subscription/t/007_ddl.pl +++ b/src/test/subscription/t/007_ddl.pl @@ -70,7 +70,8 @@ ); # Cleanup -$node_publisher->safe_psql('postgres', qq[ +$node_publisher->safe_psql( + 'postgres', qq[ DROP PUBLICATION mypub; SELECT pg_drop_replication_slot('mysub'); ]); @@ -86,32 +87,38 @@ sub test_swap my ($table_name, $pubname, $appname) = @_; # Confirms tuples can be replicated - $node_publisher->safe_psql('postgres', "INSERT INTO $table_name VALUES (1);"); + $node_publisher->safe_psql('postgres', + "INSERT INTO $table_name VALUES (1);"); $node_publisher->wait_for_catchup($appname); my $result = - $node_subscriber->safe_psql('postgres', "SELECT a FROM $table_name"); - is($result, qq(1), 'check replication worked well before renaming a publication'); + $node_subscriber->safe_psql('postgres', "SELECT a FROM $table_name"); + is($result, qq(1), + 'check replication worked well before renaming a publication'); # Swap the name of publications; $pubname <-> pub_empty - $node_publisher->safe_psql('postgres', qq[ + $node_publisher->safe_psql( + 'postgres', qq[ ALTER PUBLICATION $pubname RENAME TO tap_pub_tmp; ALTER PUBLICATION pub_empty RENAME TO $pubname; ALTER PUBLICATION tap_pub_tmp RENAME TO pub_empty; ]); # Insert the data again - $node_publisher->safe_psql('postgres', "INSERT INTO $table_name VALUES (2);"); + $node_publisher->safe_psql('postgres', + "INSERT INTO $table_name VALUES (2);"); $node_publisher->wait_for_catchup($appname); # Confirms the second tuple won't be replicated because $pubname does not # contains relations anymore. $result = - $node_subscriber->safe_psql('postgres', "SELECT a FROM $table_name ORDER BY a"); + $node_subscriber->safe_psql('postgres', + "SELECT a FROM $table_name ORDER BY a"); is($result, qq(1), 'check the tuple inserted after the RENAME was not replicated'); # Restore the name of publications because it can be called several times - $node_publisher->safe_psql('postgres', qq[ + $node_publisher->safe_psql( + 'postgres', qq[ ALTER PUBLICATION $pubname RENAME TO tap_pub_tmp; ALTER PUBLICATION pub_empty RENAME TO $pubname; ALTER PUBLICATION tap_pub_tmp RENAME TO pub_empty; @@ -124,7 +131,8 @@ sub test_swap $node_subscriber->safe_psql('postgres', $ddl); # Create publications and a subscription -$node_publisher->safe_psql('postgres', qq[ +$node_publisher->safe_psql( + 'postgres', qq[ CREATE PUBLICATION pub_empty; CREATE PUBLICATION pub_for_tab FOR TABLE test1; CREATE PUBLICATION pub_for_all_tables FOR ALL TABLES; @@ -139,19 +147,20 @@ sub test_swap # Switches a publication which includes all tables $node_subscriber->safe_psql('postgres', - "ALTER SUBSCRIPTION tap_sub SET PUBLICATION pub_for_all_tables;" -); + "ALTER SUBSCRIPTION tap_sub SET PUBLICATION pub_for_all_tables;"); $node_subscriber->wait_for_subscription_sync($node_publisher, 'tap_sub'); # Confirms RENAME command works well for ALL TABLES publication test_swap('test2', 'pub_for_all_tables', 'tap_sub'); # Cleanup -$node_publisher->safe_psql('postgres', qq[ +$node_publisher->safe_psql( + 'postgres', qq[ DROP PUBLICATION pub_empty, pub_for_tab, pub_for_all_tables; DROP TABLE test1, test2; ]); -$node_subscriber->safe_psql('postgres', qq[ +$node_subscriber->safe_psql( + 'postgres', qq[ DROP SUBSCRIPTION tap_sub; DROP TABLE test1, test2; ]); diff --git a/src/test/subscription/t/013_partition.pl b/src/test/subscription/t/013_partition.pl index 61b0cb4aa1ac1..4f78dd48815f0 100644 --- a/src/test/subscription/t/013_partition.pl +++ b/src/test/subscription/t/013_partition.pl @@ -51,8 +51,7 @@ ); # make a BRIN index to test aminsertcleanup logic in subscriber $node_subscriber1->safe_psql('postgres', - "CREATE INDEX tab1_c_brin_idx ON tab1 USING brin (c)" -); + "CREATE INDEX tab1_c_brin_idx ON tab1 USING brin (c)"); $node_subscriber1->safe_psql('postgres', "CREATE TABLE tab1_1 (b text, c text DEFAULT 'sub1_tab1', a int NOT NULL)" ); diff --git a/src/test/subscription/t/024_add_drop_pub.pl b/src/test/subscription/t/024_add_drop_pub.pl index 5298d43197900..b396abe559947 100644 --- a/src/test/subscription/t/024_add_drop_pub.pl +++ b/src/test/subscription/t/024_add_drop_pub.pl @@ -108,11 +108,12 @@ my $offset = -s $node_publisher->logfile; -$node_publisher->safe_psql('postgres',"INSERT INTO tab_3 values(1)"); +$node_publisher->safe_psql('postgres', "INSERT INTO tab_3 values(1)"); # Verify that a warning is logged. $node_publisher->wait_for_log( - qr/WARNING: ( [A-Z0-9]+:)? skipped loading publication "tap_pub_3"/, $offset); + qr/WARNING: ( [A-Z0-9]+:)? skipped loading publication "tap_pub_3"/, + $offset); $node_publisher->safe_psql('postgres', "CREATE PUBLICATION tap_pub_3 FOR TABLE tab_3"); @@ -128,10 +129,11 @@ # Verify that the insert operation gets replicated to subscriber after # publication is created. -$result = $node_subscriber->safe_psql('postgres', - "SELECT * FROM tab_3"); -is($result, qq(1 -2), 'check that the incremental data is replicated after the publication is created'); +$result = $node_subscriber->safe_psql('postgres', "SELECT * FROM tab_3"); +is( $result, qq(1 +2), + 'check that the incremental data is replicated after the publication is created' +); # shutdown $node_subscriber->stop('fast'); diff --git a/src/test/subscription/t/035_conflicts.pl b/src/test/subscription/t/035_conflicts.pl index 2a7a8239a2966..d78a6bac16aeb 100644 --- a/src/test/subscription/t/035_conflicts.pl +++ b/src/test/subscription/t/035_conflicts.pl @@ -26,7 +26,8 @@ "CREATE TABLE conf_tab (a int PRIMARY KEY, b int UNIQUE, c int UNIQUE);"); $node_publisher->safe_psql('postgres', - "CREATE TABLE conf_tab_2 (a int PRIMARY KEY, b int UNIQUE, c int UNIQUE);"); + "CREATE TABLE conf_tab_2 (a int PRIMARY KEY, b int UNIQUE, c int UNIQUE);" +); # Create same table on subscriber $node_subscriber->safe_psql('postgres', From 9c5b9a280cb6089c011a01797868da83f97d0230 Mon Sep 17 00:00:00 2001 From: Joe Conway Date: Sun, 29 Jun 2025 21:43:39 -0400 Subject: [PATCH 172/602] Do pre-release housekeeping on catalog data. Run renumber_oids.pl to move high-numbered OIDs down, as per pre-beta tasks specified by RELEASE_CHANGES. For reference, the command was ./renumber_oids.pl --first-mapped-oid 8000 --target-oid 6300 This should have been done prior to beta1, but it was forgotten. This will ensure we get the correct numbering for beta2 onward. --- src/include/catalog/catversion.h | 2 +- src/include/catalog/pg_authid.dat | 2 +- src/include/catalog/pg_collation.dat | 2 +- src/include/catalog/pg_index.h | 2 +- src/include/catalog/pg_proc.dat | 162 +++++++++++++-------------- 5 files changed, 85 insertions(+), 85 deletions(-) diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index d63db42ed7b37..479629825f5b7 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -57,6 +57,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 202506251 +#define CATALOG_VERSION_NO 202506291 #endif diff --git a/src/include/catalog/pg_authid.dat b/src/include/catalog/pg_authid.dat index eb4dab5c6aa77..c881c13adf1bc 100644 --- a/src/include/catalog/pg_authid.dat +++ b/src/include/catalog/pg_authid.dat @@ -99,7 +99,7 @@ rolcreaterole => 'f', rolcreatedb => 'f', rolcanlogin => 'f', rolreplication => 'f', rolbypassrls => 'f', rolconnlimit => '-1', rolpassword => '_null_', rolvaliduntil => '_null_' }, -{ oid => '8916', oid_symbol => 'ROLE_PG_SIGNAL_AUTOVACUUM_WORKER', +{ oid => '6392', oid_symbol => 'ROLE_PG_SIGNAL_AUTOVACUUM_WORKER', rolname => 'pg_signal_autovacuum_worker', rolsuper => 'f', rolinherit => 't', rolcreaterole => 'f', rolcreatedb => 'f', rolcanlogin => 'f', rolreplication => 'f', rolbypassrls => 'f', rolconnlimit => '-1', diff --git a/src/include/catalog/pg_collation.dat b/src/include/catalog/pg_collation.dat index d8b5d5d2d856b..8cfd09f03140e 100644 --- a/src/include/catalog/pg_collation.dat +++ b/src/include/catalog/pg_collation.dat @@ -33,7 +33,7 @@ descr => 'sorts by Unicode code point; Unicode and POSIX character semantics', collname => 'pg_c_utf8', collprovider => 'b', collencoding => '6', colllocale => 'C.UTF-8', collversion => '1' }, -{ oid => '9535', +{ oid => '6411', descr => 'sorts by Unicode code point; Unicode character semantics', collname => 'pg_unicode_fast', collprovider => 'b', collencoding => '6', colllocale => 'PG_UNICODE_FAST', collversion => '1' }, diff --git a/src/include/catalog/pg_index.h b/src/include/catalog/pg_index.h index 4392b9d221d5a..731d3938169e6 100644 --- a/src/include/catalog/pg_index.h +++ b/src/include/catalog/pg_index.h @@ -69,7 +69,7 @@ CATALOG(pg_index,2610,IndexRelationId) BKI_SCHEMA_MACRO */ typedef FormData_pg_index *Form_pg_index; -DECLARE_TOAST_WITH_MACRO(pg_index, 8149, 8150, PgIndexToastTable, PgIndexToastIndex); +DECLARE_TOAST_WITH_MACRO(pg_index, 6351, 6352, PgIndexToastTable, PgIndexToastIndex); DECLARE_INDEX(pg_index_indrelid_index, 2678, IndexIndrelidIndexId, pg_index, btree(indrelid oid_ops)); DECLARE_UNIQUE_INDEX_PKEY(pg_index_indexrelid_index, 2679, IndexRelidIndexId, pg_index, btree(indexrelid oid_ops)); diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 4efc1bc499ab5..fb4f7f50350ad 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -1004,7 +1004,7 @@ { oid => '3129', descr => 'sort support', proname => 'btint2sortsupport', prorettype => 'void', proargtypes => 'internal', prosrc => 'btint2sortsupport' }, -{ oid => '9290', descr => 'skip support', +{ oid => '6402', descr => 'skip support', proname => 'btint2skipsupport', prorettype => 'void', proargtypes => 'internal', prosrc => 'btint2skipsupport' }, { oid => '351', descr => 'less-equal-greater', @@ -1013,7 +1013,7 @@ { oid => '3130', descr => 'sort support', proname => 'btint4sortsupport', prorettype => 'void', proargtypes => 'internal', prosrc => 'btint4sortsupport' }, -{ oid => '9291', descr => 'skip support', +{ oid => '6403', descr => 'skip support', proname => 'btint4skipsupport', prorettype => 'void', proargtypes => 'internal', prosrc => 'btint4skipsupport' }, { oid => '842', descr => 'less-equal-greater', @@ -1022,7 +1022,7 @@ { oid => '3131', descr => 'sort support', proname => 'btint8sortsupport', prorettype => 'void', proargtypes => 'internal', prosrc => 'btint8sortsupport' }, -{ oid => '9292', descr => 'skip support', +{ oid => '6404', descr => 'skip support', proname => 'btint8skipsupport', prorettype => 'void', proargtypes => 'internal', prosrc => 'btint8skipsupport' }, { oid => '354', descr => 'less-equal-greater', @@ -1043,7 +1043,7 @@ { oid => '3134', descr => 'sort support', proname => 'btoidsortsupport', prorettype => 'void', proargtypes => 'internal', prosrc => 'btoidsortsupport' }, -{ oid => '9293', descr => 'skip support', +{ oid => '6405', descr => 'skip support', proname => 'btoidskipsupport', prorettype => 'void', proargtypes => 'internal', prosrc => 'btoidskipsupport' }, { oid => '404', descr => 'less-equal-greater', @@ -1052,7 +1052,7 @@ { oid => '358', descr => 'less-equal-greater', proname => 'btcharcmp', proleakproof => 't', prorettype => 'int4', proargtypes => 'char char', prosrc => 'btcharcmp' }, -{ oid => '9294', descr => 'skip support', +{ oid => '6406', descr => 'skip support', proname => 'btcharskipsupport', prorettype => 'void', proargtypes => 'internal', prosrc => 'btcharskipsupport' }, { oid => '359', descr => 'less-equal-greater', @@ -1180,22 +1180,22 @@ proname => 'name', proleakproof => 't', prorettype => 'name', proargtypes => 'bpchar', prosrc => 'bpchar_name' }, -{ oid => '8577', descr => 'convert int2 to bytea', +{ oid => '6367', descr => 'convert int2 to bytea', proname => 'bytea', proleakproof => 't', prorettype => 'bytea', proargtypes => 'int2', prosrc => 'int2_bytea' }, -{ oid => '8578', descr => 'convert int4 to bytea', +{ oid => '6368', descr => 'convert int4 to bytea', proname => 'bytea', proleakproof => 't', prorettype => 'bytea', proargtypes => 'int4', prosrc => 'int4_bytea' }, -{ oid => '8579', descr => 'convert int8 to bytea', +{ oid => '6369', descr => 'convert int8 to bytea', proname => 'bytea', proleakproof => 't', prorettype => 'bytea', proargtypes => 'int8', prosrc => 'int8_bytea' }, -{ oid => '8580', descr => 'convert bytea to int2', +{ oid => '6370', descr => 'convert bytea to int2', proname => 'int2', prorettype => 'int2', proargtypes => 'bytea', prosrc => 'bytea_int2' }, -{ oid => '8581', descr => 'convert bytea to int4', +{ oid => '6371', descr => 'convert bytea to int4', proname => 'int4', prorettype => 'int4', proargtypes => 'bytea', prosrc => 'bytea_int4' }, -{ oid => '8582', descr => 'convert bytea to int8', +{ oid => '6372', descr => 'convert bytea to int8', proname => 'int8', prorettype => 'int8', proargtypes => 'bytea', prosrc => 'bytea_int8' }, @@ -1259,10 +1259,10 @@ { oid => '772', descr => 'hash', proname => 'hashvarlenaextended', prorettype => 'int8', proargtypes => 'internal int8', prosrc => 'hashvarlenaextended' }, -{ oid => '9708', descr => 'hash', +{ oid => '6413', descr => 'hash', proname => 'hashbytea', prorettype => 'int4', proargtypes => 'bytea', prosrc => 'hashbytea' }, -{ oid => '9709', descr => 'hash', +{ oid => '6414', descr => 'hash', proname => 'hashbyteaextended', prorettype => 'int8', proargtypes => 'bytea int8', prosrc => 'hashbyteaextended' }, { oid => '457', descr => 'hash', @@ -1301,34 +1301,34 @@ { oid => '781', descr => 'hash', proname => 'hashmacaddr8extended', prorettype => 'int8', proargtypes => 'macaddr8 int8', prosrc => 'hashmacaddr8extended' }, -{ oid => '9710', descr => 'hash', +{ oid => '6415', descr => 'hash', proname => 'hashdate', prorettype => 'int4', proargtypes => 'date', prosrc => 'hashdate' }, -{ oid => '9711', descr => 'hash', +{ oid => '6416', descr => 'hash', proname => 'hashdateextended', prorettype => 'int8', proargtypes => 'date int8', prosrc => 'hashdateextended' }, -{ oid => '9712', descr => 'hash', +{ oid => '6417', descr => 'hash', proname => 'hashbool', prorettype => 'int4', proargtypes => 'bool', prosrc => 'hashbool' }, -{ oid => '9713', descr => 'hash', +{ oid => '6418', descr => 'hash', proname => 'hashboolextended', prorettype => 'int8', proargtypes => 'bool int8', prosrc => 'hashboolextended' }, -{ oid => '9714', descr => 'hash', +{ oid => '6419', descr => 'hash', proname => 'hashxid', prorettype => 'int4', proargtypes => 'xid', prosrc => 'hashxid' }, -{ oid => '9715', descr => 'hash', +{ oid => '6420', descr => 'hash', proname => 'hashxidextended', prorettype => 'int8', proargtypes => 'xid int8', prosrc => 'hashxidextended' }, -{ oid => '9716', descr => 'hash', +{ oid => '6421', descr => 'hash', proname => 'hashxid8', prorettype => 'int4', proargtypes => 'xid8', prosrc => 'hashxid8' }, -{ oid => '9717', descr => 'hash', +{ oid => '6422', descr => 'hash', proname => 'hashxid8extended', prorettype => 'int8', proargtypes => 'xid8 int8', prosrc => 'hashxid8extended' }, -{ oid => '9718', descr => 'hash', +{ oid => '6423', descr => 'hash', proname => 'hashcid', prorettype => 'int4', proargtypes => 'cid', prosrc => 'hashcid' }, -{ oid => '9719', descr => 'hash', +{ oid => '6424', descr => 'hash', proname => 'hashcidextended', prorettype => 'int8', proargtypes => 'cid int8', prosrc => 'hashcidextended' }, @@ -1348,10 +1348,10 @@ proname => 'text_smaller', proleakproof => 't', prorettype => 'text', proargtypes => 'text text', prosrc => 'text_smaller' }, -{ oid => '8920', descr => 'larger of two', +{ oid => '6393', descr => 'larger of two', proname => 'bytea_larger', proleakproof => 't', prorettype => 'bytea', proargtypes => 'bytea bytea', prosrc => 'bytea_larger' }, -{ oid => '8921', descr => 'smaller of two', +{ oid => '6394', descr => 'smaller of two', proname => 'bytea_smaller', proleakproof => 't', prorettype => 'bytea', proargtypes => 'bytea bytea', prosrc => 'bytea_smaller' }, @@ -1533,7 +1533,7 @@ { oid => '6163', descr => 'number of set bits', proname => 'bit_count', prorettype => 'int8', proargtypes => 'bytea', prosrc => 'bytea_bit_count' }, -{ oid => '8694', descr => 'reverse bytea', +{ oid => '6382', descr => 'reverse bytea', proname => 'reverse', prorettype => 'bytea', proargtypes => 'bytea', prosrc => 'bytea_reverse' }, @@ -1638,7 +1638,7 @@ proname => 'array_append', prosupport => 'array_append_support', proisstrict => 'f', prorettype => 'anycompatiblearray', proargtypes => 'anycompatiblearray anycompatible', prosrc => 'array_append' }, -{ oid => '8680', descr => 'planner support for array_append', +{ oid => '6378', descr => 'planner support for array_append', proname => 'array_append_support', prorettype => 'internal', proargtypes => 'internal', prosrc => 'array_append_support' }, { oid => '379', descr => 'prepend element onto front of array', @@ -1646,7 +1646,7 @@ proisstrict => 'f', prorettype => 'anycompatiblearray', proargtypes => 'anycompatible anycompatiblearray', prosrc => 'array_prepend' }, -{ oid => '8681', descr => 'planner support for array_prepend', +{ oid => '6379', descr => 'planner support for array_prepend', proname => 'array_prepend_support', prorettype => 'internal', proargtypes => 'internal', prosrc => 'array_prepend_support' }, { oid => '383', @@ -1784,17 +1784,17 @@ { oid => '6216', descr => 'take samples from array', proname => 'array_sample', provolatile => 'v', prorettype => 'anyarray', proargtypes => 'anyarray int4', prosrc => 'array_sample' }, -{ oid => '8686', descr => 'reverse array', +{ oid => '6381', descr => 'reverse array', proname => 'array_reverse', prorettype => 'anyarray', proargtypes => 'anyarray', prosrc => 'array_reverse' }, -{ oid => '8810', descr => 'sort array', +{ oid => '6388', descr => 'sort array', proname => 'array_sort', prorettype => 'anyarray', proargtypes => 'anyarray', prosrc => 'array_sort' }, -{ oid => '8811', descr => 'sort array', +{ oid => '6389', descr => 'sort array', proname => 'array_sort', prorettype => 'anyarray', proargtypes => 'anyarray bool', proargnames => '{array,descending}', prosrc => 'array_sort_order' }, -{ oid => '8812', descr => 'sort array', +{ oid => '6390', descr => 'sort array', proname => 'array_sort', prorettype => 'anyarray', proargtypes => 'anyarray bool bool', proargnames => '{array,descending,nulls_first}', @@ -2315,7 +2315,7 @@ { oid => '3136', descr => 'sort support', proname => 'date_sortsupport', prorettype => 'void', proargtypes => 'internal', prosrc => 'date_sortsupport' }, -{ oid => '9295', descr => 'skip support', +{ oid => '6407', descr => 'skip support', proname => 'date_skipsupport', prorettype => 'void', proargtypes => 'internal', prosrc => 'date_skipsupport' }, { oid => '4133', descr => 'window RANGE support', @@ -3433,7 +3433,7 @@ proname => 'pg_sequence_last_value', provolatile => 'v', proparallel => 'u', prorettype => 'int8', proargtypes => 'regclass', prosrc => 'pg_sequence_last_value' }, -{ oid => '9876', descr => 'return sequence tuple, for use by pg_dump', +{ oid => '6427', descr => 'return sequence tuple, for use by pg_dump', proname => 'pg_get_sequence_data', provolatile => 'v', proparallel => 'u', prorettype => 'record', proargtypes => 'regclass', proallargtypes => '{regclass,int8,bool}', proargmodes => '{i,o,o}', @@ -3594,10 +3594,10 @@ proname => 'erfc', prorettype => 'float8', proargtypes => 'float8', prosrc => 'derfc' }, -{ oid => '8702', descr => 'gamma function', +{ oid => '6383', descr => 'gamma function', proname => 'gamma', prorettype => 'float8', proargtypes => 'float8', prosrc => 'dgamma' }, -{ oid => '8703', +{ oid => '6384', descr => 'natural logarithm of absolute value of gamma function', proname => 'lgamma', prorettype => 'float8', proargtypes => 'float8', prosrc => 'dlgamma' }, @@ -3689,7 +3689,7 @@ { oid => '872', descr => 'capitalize each word', proname => 'initcap', prorettype => 'text', proargtypes => 'text', prosrc => 'initcap' }, -{ oid => '9569', descr => 'fold case', +{ oid => '6412', descr => 'fold case', proname => 'casefold', prorettype => 'text', proargtypes => 'text', prosrc => 'casefold' }, { oid => '873', descr => 'left-pad string to length', @@ -4516,7 +4516,7 @@ { oid => '1693', descr => 'less-equal-greater', proname => 'btboolcmp', proleakproof => 't', prorettype => 'int4', proargtypes => 'bool bool', prosrc => 'btboolcmp' }, -{ oid => '9296', descr => 'skip support', +{ oid => '6408', descr => 'skip support', proname => 'btboolskipsupport', prorettype => 'void', proargtypes => 'internal', prosrc => 'btboolskipsupport' }, @@ -5451,17 +5451,17 @@ prorettype => 'bool', proargtypes => 'oid text', prosrc => 'has_any_column_privilege_id' }, -{ oid => '8048', +{ oid => '6348', descr => 'user privilege on large object by username, large object oid', proname => 'has_largeobject_privilege', procost => '10', provolatile => 's', prorettype => 'bool', proargtypes => 'name oid text', prosrc => 'has_largeobject_privilege_name_id' }, -{ oid => '8049', +{ oid => '6349', descr => 'current user privilege on large object by large object oid', proname => 'has_largeobject_privilege', procost => '10', provolatile => 's', prorettype => 'bool', proargtypes => 'oid text', prosrc => 'has_largeobject_privilege_id' }, -{ oid => '8050', +{ oid => '6350', descr => 'user privilege on large object by user oid, large object oid', proname => 'has_largeobject_privilege', procost => '10', provolatile => 's', prorettype => 'bool', proargtypes => 'oid oid text', @@ -5612,19 +5612,19 @@ proname => 'pg_stat_get_autoanalyze_count', provolatile => 's', proparallel => 'r', prorettype => 'int8', proargtypes => 'oid', prosrc => 'pg_stat_get_autoanalyze_count' }, -{ oid => '8406', descr => 'total vacuum time, in milliseconds', +{ oid => '6358', descr => 'total vacuum time, in milliseconds', proname => 'pg_stat_get_total_vacuum_time', provolatile => 's', proparallel => 'r', prorettype => 'float8', proargtypes => 'oid', prosrc => 'pg_stat_get_total_vacuum_time' }, -{ oid => '8407', descr => 'total autovacuum time, in milliseconds', +{ oid => '6359', descr => 'total autovacuum time, in milliseconds', proname => 'pg_stat_get_total_autovacuum_time', provolatile => 's', proparallel => 'r', prorettype => 'float8', proargtypes => 'oid', prosrc => 'pg_stat_get_total_autovacuum_time' }, -{ oid => '8408', descr => 'total analyze time, in milliseconds', +{ oid => '6360', descr => 'total analyze time, in milliseconds', proname => 'pg_stat_get_total_analyze_time', provolatile => 's', proparallel => 'r', prorettype => 'float8', proargtypes => 'oid', prosrc => 'pg_stat_get_total_analyze_time' }, -{ oid => '8409', descr => 'total autoanalyze time, in milliseconds', +{ oid => '6361', descr => 'total autoanalyze time, in milliseconds', proname => 'pg_stat_get_total_autoanalyze_time', provolatile => 's', proparallel => 'r', prorettype => 'float8', proargtypes => 'oid', prosrc => 'pg_stat_get_total_autoanalyze_time' }, @@ -5901,12 +5901,12 @@ proname => 'pg_stat_get_db_sessions_killed', provolatile => 's', proparallel => 'r', prorettype => 'int8', proargtypes => 'oid', prosrc => 'pg_stat_get_db_sessions_killed' }, -{ oid => '8403', +{ oid => '6355', descr => 'statistics: number of parallel workers planned to be launched by queries', proname => 'pg_stat_get_db_parallel_workers_to_launch', provolatile => 's', proparallel => 'r', prorettype => 'int8', proargtypes => 'oid', prosrc => 'pg_stat_get_db_parallel_workers_to_launch' }, -{ oid => '8404', +{ oid => '6356', descr => 'statistics: number of parallel workers effectively launched by queries', proname => 'pg_stat_get_db_parallel_workers_launched', provolatile => 's', proparallel => 'r', prorettype => 'int8', proargtypes => 'oid', @@ -5928,7 +5928,7 @@ proname => 'pg_stat_get_checkpointer_num_requested', provolatile => 's', proparallel => 'r', prorettype => 'int8', proargtypes => '', prosrc => 'pg_stat_get_checkpointer_num_requested' }, -{ oid => '8599', +{ oid => '6377', descr => 'statistics: number of checkpoints performed by the checkpointer', proname => 'pg_stat_get_checkpointer_num_performed', provolatile => 's', proparallel => 'r', prorettype => 'int8', proargtypes => '', @@ -5955,7 +5955,7 @@ proname => 'pg_stat_get_checkpointer_buffers_written', provolatile => 's', proparallel => 'r', prorettype => 'int8', proargtypes => '', prosrc => 'pg_stat_get_checkpointer_buffers_written' }, -{ oid => '8573', +{ oid => '6366', descr => 'statistics: number of SLRU buffers written during checkpoints and restartpoints', proname => 'pg_stat_get_checkpointer_slru_written', provolatile => 's', proparallel => 'r', prorettype => 'int8', proargtypes => '', @@ -6001,7 +6001,7 @@ proargnames => '{backend_type,object,context,reads,read_bytes,read_time,writes,write_bytes,write_time,writebacks,writeback_time,extends,extend_bytes,extend_time,hits,evictions,reuses,fsyncs,fsync_time,stats_reset}', prosrc => 'pg_stat_get_io' }, -{ oid => '8806', descr => 'statistics: backend IO statistics', +{ oid => '6386', descr => 'statistics: backend IO statistics', proname => 'pg_stat_get_backend_io', prorows => '5', proretset => 't', provolatile => 'v', proparallel => 'r', prorettype => 'record', proargtypes => 'int4', @@ -6017,7 +6017,7 @@ proargmodes => '{o,o,o,o,o}', proargnames => '{wal_records,wal_fpi,wal_bytes,wal_buffers_full,stats_reset}', prosrc => 'pg_stat_get_wal' }, -{ oid => '8037', descr => 'statistics: backend WAL activity', +{ oid => '6313', descr => 'statistics: backend WAL activity', proname => 'pg_stat_get_backend_wal', provolatile => 'v', proparallel => 'r', prorettype => 'record', proargtypes => 'int4', proallargtypes => '{int4,int8,int8,numeric,int8,timestamptz}', @@ -6156,7 +6156,7 @@ proname => 'pg_stat_reset_single_function_counters', provolatile => 'v', prorettype => 'void', proargtypes => 'oid', prosrc => 'pg_stat_reset_single_function_counters' }, -{ oid => '8807', descr => 'statistics: reset statistics for a single backend', +{ oid => '6387', descr => 'statistics: reset statistics for a single backend', proname => 'pg_stat_reset_backend_stats', provolatile => 'v', prorettype => 'void', proargtypes => 'int4', prosrc => 'pg_stat_reset_backend_stats' }, @@ -6370,10 +6370,10 @@ { oid => '3411', descr => 'hash', proname => 'timestamp_hash_extended', prorettype => 'int8', proargtypes => 'timestamp int8', prosrc => 'timestamp_hash_extended' }, -{ oid => '9720', descr => 'hash', +{ oid => '6425', descr => 'hash', proname => 'timestamptz_hash', prorettype => 'int4', proargtypes => 'timestamptz', prosrc => 'timestamptz_hash' }, -{ oid => '9721', descr => 'hash', +{ oid => '6426', descr => 'hash', proname => 'timestamptz_hash_extended', prorettype => 'int8', proargtypes => 'timestamptz int8', prosrc => 'timestamptz_hash_extended' }, { oid => '2041', descr => 'intervals overlap?', @@ -6398,7 +6398,7 @@ { oid => '3137', descr => 'sort support', proname => 'timestamp_sortsupport', prorettype => 'void', proargtypes => 'internal', prosrc => 'timestamp_sortsupport' }, -{ oid => '9297', descr => 'skip support', +{ oid => '6409', descr => 'skip support', proname => 'timestamp_skipsupport', prorettype => 'void', proargtypes => 'internal', prosrc => 'timestamp_skipsupport' }, @@ -6594,7 +6594,7 @@ proname => 'pg_describe_object', provolatile => 's', prorettype => 'text', proargtypes => 'oid oid int4', prosrc => 'pg_describe_object' }, -{ oid => '8730', descr => 'get ACL for SQL object', +{ oid => '6385', descr => 'get ACL for SQL object', proname => 'pg_get_acl', provolatile => 's', prorettype => '_aclitem', proargtypes => 'oid oid int4', proargnames => '{classid,objid,objsubid}', prosrc => 'pg_get_acl' }, @@ -6793,7 +6793,7 @@ proargnames => '{rm_id, rm_name, rm_builtin}', prosrc => 'pg_get_wal_resource_managers' }, -{ oid => '8303', descr => 'get info about loaded modules', +{ oid => '6353', descr => 'get info about loaded modules', proname => 'pg_get_loaded_modules', prorows => '10', proretset => 't', provolatile => 'v', proparallel => 'r', prorettype => 'record', proargtypes => '', proallargtypes => '{text,text,text}', @@ -6993,7 +6993,7 @@ proname => 'max', prokind => 'a', proisstrict => 'f', prorettype => 'anyarray', proargtypes => 'anyarray', prosrc => 'aggregate_dummy' }, -{ oid => '8595', descr => 'maximum value of all record input values', +{ oid => '6373', descr => 'maximum value of all record input values', proname => 'max', prokind => 'a', proisstrict => 'f', prorettype => 'record', proargtypes => 'record', prosrc => 'aggregate_dummy' }, { oid => '2244', descr => 'maximum value of all bpchar input values', @@ -7011,7 +7011,7 @@ { oid => '5099', descr => 'maximum value of all xid8 input values', proname => 'max', prokind => 'a', proisstrict => 'f', prorettype => 'xid8', proargtypes => 'xid8', prosrc => 'aggregate_dummy' }, -{ oid => '8922', descr => 'maximum value of all bytea input values', +{ oid => '6395', descr => 'maximum value of all bytea input values', proname => 'max', prokind => 'a', proisstrict => 'f', prorettype => 'bytea', proargtypes => 'bytea', prosrc => 'aggregate_dummy' }, @@ -7069,7 +7069,7 @@ proname => 'min', prokind => 'a', proisstrict => 'f', prorettype => 'anyarray', proargtypes => 'anyarray', prosrc => 'aggregate_dummy' }, -{ oid => '8596', descr => 'minimum value of all record input values', +{ oid => '6374', descr => 'minimum value of all record input values', proname => 'min', prokind => 'a', proisstrict => 'f', prorettype => 'record', proargtypes => 'record', prosrc => 'aggregate_dummy' }, { oid => '2245', descr => 'minimum value of all bpchar input values', @@ -7087,7 +7087,7 @@ { oid => '5100', descr => 'minimum value of all xid8 input values', proname => 'min', prokind => 'a', proisstrict => 'f', prorettype => 'xid8', proargtypes => 'xid8', prosrc => 'aggregate_dummy' }, -{ oid => '8923', descr => 'minimum value of all bytea input values', +{ oid => '6396', descr => 'minimum value of all bytea input values', proname => 'min', prokind => 'a', proisstrict => 'f', prorettype => 'bytea', proargtypes => 'bytea', prosrc => 'aggregate_dummy' }, @@ -7950,10 +7950,10 @@ proargtypes => 'internal', prosrc => 'tsm_system_handler' }, # CRC variants -{ oid => '8571', descr => 'CRC-32 value', +{ oid => '6364', descr => 'CRC-32 value', proname => 'crc32', proleakproof => 't', prorettype => 'int8', proargtypes => 'bytea', prosrc => 'crc32_bytea' }, -{ oid => '8572', descr => 'CRC-32C value', +{ oid => '6365', descr => 'CRC-32C value', proname => 'crc32c', proleakproof => 't', prorettype => 'int8', proargtypes => 'bytea', prosrc => 'crc32c_bytea' }, @@ -8497,7 +8497,7 @@ proargmodes => '{o,o,o,o,o,o}', proargnames => '{name,statement,is_holdable,is_binary,is_scrollable,creation_time}', prosrc => 'pg_cursor' }, -{ oid => '9221', descr => 'get abbreviations from current timezone', +{ oid => '6401', descr => 'get abbreviations from current timezone', proname => 'pg_timezone_abbrevs_zone', prorows => '10', proretset => 't', provolatile => 's', prorettype => 'record', proargtypes => '', proallargtypes => '{text,interval,bool}', proargmodes => '{o,o,o}', @@ -8609,7 +8609,7 @@ prosupport => 'generate_series_numeric_support', proretset => 't', prorettype => 'numeric', proargtypes => 'numeric numeric', prosrc => 'generate_series_numeric' }, -{ oid => '8405', descr => 'planner support for generate_series', +{ oid => '6357', descr => 'planner support for generate_series', proname => 'generate_series_numeric_support', prorettype => 'internal', proargtypes => 'internal', prosrc => 'generate_series_numeric_support' }, { oid => '938', descr => 'non-persistent series generator', @@ -8629,7 +8629,7 @@ prorettype => 'timestamptz', proargtypes => 'timestamptz timestamptz interval text', prosrc => 'generate_series_timestamptz_at_zone' }, -{ oid => '8402', descr => 'planner support for generate_series', +{ oid => '6354', descr => 'planner support for generate_series', proname => 'generate_series_timestamp_support', prorettype => 'internal', proargtypes => 'internal', prosrc => 'generate_series_timestamp_support' }, @@ -9468,7 +9468,7 @@ { oid => '3300', descr => 'sort support', proname => 'uuid_sortsupport', prorettype => 'void', proargtypes => 'internal', prosrc => 'uuid_sortsupport' }, -{ oid => '9298', descr => 'skip support', +{ oid => '6410', descr => 'skip support', proname => 'uuid_skipsupport', prorettype => 'void', proargtypes => 'internal', prosrc => 'uuid_skipsupport' }, { oid => '2961', descr => 'I/O', @@ -9486,13 +9486,13 @@ { oid => '3432', descr => 'generate random UUID', proname => 'gen_random_uuid', provolatile => 'v', prorettype => 'uuid', proargtypes => '', prosrc => 'gen_random_uuid' }, -{ oid => '9895', descr => 'generate UUID version 4', +{ oid => '6428', descr => 'generate UUID version 4', proname => 'uuidv4', provolatile => 'v', prorettype => 'uuid', proargtypes => '', prosrc => 'gen_random_uuid' }, -{ oid => '9896', descr => 'generate UUID version 7', +{ oid => '6429', descr => 'generate UUID version 7', proname => 'uuidv7', provolatile => 'v', prorettype => 'uuid', proargtypes => '', prosrc => 'uuidv7' }, -{ oid => '9897', +{ oid => '6430', descr => 'generate UUID version 7 with a timestamp shifted by specified interval', proname => 'uuidv7', provolatile => 'v', prorettype => 'uuid', proargtypes => 'interval', proargnames => '{shift}', @@ -10654,10 +10654,10 @@ { oid => '2987', descr => 'less-equal-greater', proname => 'btrecordcmp', prorettype => 'int4', proargtypes => 'record record', prosrc => 'btrecordcmp' }, -{ oid => '8597', descr => 'larger of two', +{ oid => '6375', descr => 'larger of two', proname => 'record_larger', prorettype => 'record', proargtypes => 'record record', prosrc => 'record_larger' }, -{ oid => '8598', descr => 'smaller of two', +{ oid => '6376', descr => 'smaller of two', proname => 'record_smaller', prorettype => 'record', proargtypes => 'record record', prosrc => 'record_smaller' }, @@ -10897,7 +10897,7 @@ { oid => '3870', descr => 'less-equal-greater', proname => 'range_cmp', prorettype => 'int4', proargtypes => 'anyrange anyrange', prosrc => 'range_cmp' }, -{ oid => '8849', descr => 'sort support', +{ oid => '6391', descr => 'sort support', proname => 'range_sortsupport', prorettype => 'void', proargtypes => 'internal', prosrc => 'range_sortsupport' }, { oid => '3871', @@ -12316,7 +12316,7 @@ proname => 'array_subscript_handler', prosupport => 'array_subscript_handler_support', prorettype => 'internal', proargtypes => 'internal', prosrc => 'array_subscript_handler' }, -{ oid => '8682', descr => 'planner support for array_subscript_handler', +{ oid => '6380', descr => 'planner support for array_subscript_handler', proname => 'array_subscript_handler_support', prorettype => 'internal', proargtypes => 'internal', prosrc => 'array_subscript_handler_support' }, { oid => '6180', descr => 'raw array subscripting support', @@ -12355,7 +12355,7 @@ provolatile => 'v', prorettype => 'record', proargtypes => '', proallargtypes => '{text,int8,timestamptz}', proargmodes => '{o,o,o}', proargnames => '{name,size,modification}', prosrc => 'pg_ls_waldir' }, -{ oid => '9220', descr => 'list of files in the pg_wal/summaries directory', +{ oid => '6400', descr => 'list of files in the pg_wal/summaries directory', proname => 'pg_ls_summariesdir', procost => '10', prorows => '20', proretset => 't', provolatile => 'v', prorettype => 'record', proargtypes => '', proallargtypes => '{text,int8,timestamptz}', @@ -12511,21 +12511,21 @@ proargnames => '{summarized_tli,summarized_lsn,pending_lsn,summarizer_pid}', prosrc => 'pg_get_wal_summarizer_state' }, # Statistics Import -{ oid => '8459', descr => 'restore statistics on relation', +{ oid => '6362', descr => 'restore statistics on relation', proname => 'pg_restore_relation_stats', provariadic => 'any', proisstrict => 'f', provolatile => 'v', proparallel => 'u', prorettype => 'bool', proargtypes => 'any', proargmodes => '{v}', proargnames => '{kwargs}', prosrc => 'pg_restore_relation_stats' }, -{ oid => '9160', descr => 'clear statistics on relation', +{ oid => '6397', descr => 'clear statistics on relation', proname => 'pg_clear_relation_stats', proisstrict => 'f', provolatile => 'v', proparallel => 'u', prorettype => 'void', proargtypes => 'text text', proargnames => '{schemaname,relname}', prosrc => 'pg_clear_relation_stats' }, -{ oid => '8461', descr => 'restore statistics on attribute', +{ oid => '6363', descr => 'restore statistics on attribute', proname => 'pg_restore_attribute_stats', provariadic => 'any', proisstrict => 'f', provolatile => 'v', proparallel => 'u', prorettype => 'bool', proargtypes => 'any', proargmodes => '{v}', proargnames => '{kwargs}', prosrc => 'pg_restore_attribute_stats' }, -{ oid => '9162', descr => 'clear statistics on attribute', +{ oid => '6398', descr => 'clear statistics on attribute', proname => 'pg_clear_attribute_stats', proisstrict => 'f', provolatile => 'v', proparallel => 'u', prorettype => 'void', proargtypes => 'text text text bool', @@ -12533,12 +12533,12 @@ prosrc => 'pg_clear_attribute_stats' }, # GiST stratnum implementations -{ oid => '8047', descr => 'GiST support', +{ oid => '6347', descr => 'GiST support', proname => 'gist_translate_cmptype_common', prorettype => 'int2', proargtypes => 'int4', prosrc => 'gist_translate_cmptype_common' }, # AIO related functions -{ oid => '9200', descr => 'information about in-progress asynchronous IOs', +{ oid => '6399', descr => 'information about in-progress asynchronous IOs', proname => 'pg_get_aios', prorows => '100', proretset => 't', provolatile => 'v', proparallel => 'r', prorettype => 'record', proargtypes => '', From 2652835d3efa003439ecc23d5fc3cf089c5952a6 Mon Sep 17 00:00:00 2001 From: Joe Conway Date: Sun, 29 Jun 2025 22:28:10 -0400 Subject: [PATCH 173/602] Stamp HEAD as 19devel. Let the hacking begin ... --- configure | 18 +- configure.ac | 2 +- doc/src/sgml/filelist.sgml | 2 +- doc/src/sgml/release-18.sgml | 4200 ---------------------------------- doc/src/sgml/release-19.sgml | 16 + doc/src/sgml/release.sgml | 2 +- meson.build | 2 +- src/tools/git_changelog | 1 + src/tools/version_stamp.pl | 2 +- 9 files changed, 31 insertions(+), 4214 deletions(-) delete mode 100644 doc/src/sgml/release-18.sgml create mode 100644 doc/src/sgml/release-19.sgml diff --git a/configure b/configure index 3d3d3db97a456..16ef5b58d1a87 100755 --- a/configure +++ b/configure @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.69 for PostgreSQL 18beta1. +# Generated by GNU Autoconf 2.69 for PostgreSQL 19devel. # # Report bugs to . # @@ -582,8 +582,8 @@ MAKEFLAGS= # Identity of this package. PACKAGE_NAME='PostgreSQL' PACKAGE_TARNAME='postgresql' -PACKAGE_VERSION='18beta1' -PACKAGE_STRING='PostgreSQL 18beta1' +PACKAGE_VERSION='19devel' +PACKAGE_STRING='PostgreSQL 19devel' PACKAGE_BUGREPORT='pgsql-bugs@lists.postgresql.org' PACKAGE_URL='https://www.postgresql.org/' @@ -1468,7 +1468,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures PostgreSQL 18beta1 to adapt to many kinds of systems. +\`configure' configures PostgreSQL 19devel to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1533,7 +1533,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of PostgreSQL 18beta1:";; + short | recursive ) echo "Configuration of PostgreSQL 19devel:";; esac cat <<\_ACEOF @@ -1724,7 +1724,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -PostgreSQL configure 18beta1 +PostgreSQL configure 19devel generated by GNU Autoconf 2.69 Copyright (C) 2012 Free Software Foundation, Inc. @@ -2477,7 +2477,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by PostgreSQL $as_me 18beta1, which was +It was created by PostgreSQL $as_me 19devel, which was generated by GNU Autoconf 2.69. Invocation command line was $ $0 $@ @@ -20063,7 +20063,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by PostgreSQL $as_me 18beta1, which was +This file was extended by PostgreSQL $as_me 19devel, which was generated by GNU Autoconf 2.69. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -20134,7 +20134,7 @@ _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_version="\\ -PostgreSQL config.status 18beta1 +PostgreSQL config.status 19devel configured by $0, generated by GNU Autoconf 2.69, with options \\"\$ac_cs_config\\" diff --git a/configure.ac b/configure.ac index 4b8335dc6138e..b3efc49c97a9d 100644 --- a/configure.ac +++ b/configure.ac @@ -17,7 +17,7 @@ dnl Read the Autoconf manual for details. dnl m4_pattern_forbid(^PGAC_)dnl to catch undefined macros -AC_INIT([PostgreSQL], [18beta1], [pgsql-bugs@lists.postgresql.org], [], [https://www.postgresql.org/]) +AC_INIT([PostgreSQL], [19devel], [pgsql-bugs@lists.postgresql.org], [], [https://www.postgresql.org/]) m4_if(m4_defn([m4_PACKAGE_VERSION]), [2.69], [], [m4_fatal([Autoconf version 2.69 is required. Untested combinations of 'autoconf' and PostgreSQL versions are not diff --git a/doc/src/sgml/filelist.sgml b/doc/src/sgml/filelist.sgml index fef9584f908ec..bcde3cfd0374a 100644 --- a/doc/src/sgml/filelist.sgml +++ b/doc/src/sgml/filelist.sgml @@ -180,7 +180,7 @@ - + diff --git a/doc/src/sgml/release-18.sgml b/doc/src/sgml/release-18.sgml deleted file mode 100644 index 66a6817a2be0f..0000000000000 --- a/doc/src/sgml/release-18.sgml +++ /dev/null @@ -1,4200 +0,0 @@ - - - - - Release 18 - - - Release date: - 2025-??-??, CURRENT AS OF 2025-06-20 - - - - Overview - - - PostgreSQL 18 contains many new features - and enhancements, including: - - - - - - - (to be completed) - - - - - - The above items and other new features of - PostgreSQL 18 are explained in more detail - in the sections below. - - - - - - - Migration to Version 18 - - - A dump/restore using or use of - or logical replication is required for - those wishing to migrate data from any previous release. See for general information on migrating to new - major releases. - - - - Version 18 contains a number of changes that may affect compatibility - with previous releases. Observe the following incompatibilities: - - - - - - - - - Change default to enable data checksums - (Greg Sabino Mullane) - § - - - - Checksums can be disabled with the - new initdb option - . - requires matching cluster checksum settings, so this new option can - be useful to upgrade non-checksum old clusters. - - - - - - - - Change time zone abbreviation handling (Tom Lane) - § - - - - The system will now favor the current session's time - zone abbreviations before checking the server variable - . Previously - timezone_abbreviations was checked first. - - - - - - - - Deprecate MD5 password - authentication (Nathan Bossart) - § - - - - Support for MD5 passwords will be removed in a future major - version release. and now emit deprecation warnings when - setting MD5 passwords. These warnings can be disabled by setting - the parameter to - off. - - - - - - - - Change and - to process the inheritance children of a parent (Michael Harris) - § - - - - The previous behavior can be performed by using the new - ONLY option. - - - - - - - - Prevent COPY FROM - from treating \. as an end-of-file marker when - reading CSV files (Daniel Vérité, Tom Lane) - § - § - - - - will still treat - \. as an end-of-file marker when reading - CSV files from STDIN. - Older psql clients connecting to - PostgreSQL 18 servers might experience \copy - problems. This release also enforces that \. - must appear alone on a line. - - - - - - - - Disallow unlogged partitioned tables (Michael Paquier) - § - - - - Previously ALTER TABLE SET - [UN]LOGGED did nothing, and the creation of an - unlogged partitioned table did not cause its children to be unlogged. - - - - - - - - Execute AFTER triggers as the role that was active when - trigger events were queued (Laurenz Albe) - § - - - - Previously such triggers were run as the role that was active at - trigger execution time (e.g., at ). - This is significant for cases where the role is changed between queue - time and transaction commit. - - - - - - - - Remove non-functional support for rule privileges in / (Fujii Masao) - § - - - - These have been non-functional since - PostgreSQL 8.2. - - - - - - - - Remove column pg_backend_memory_contexts.parent - (Melih Mutlu) - § - - - - This is no longer needed since - pg_backend_memory_contexts.path - was added. - - - - - - - - Change - pg_backend_memory_contexts.level - and pg_log_backend_memory_contexts() - to be one-based (Melih Mutlu, Atsushi Torikoshi, David Rowley, - Fujii Masao) - § - § - § - - - - These were previously zero-based. - - - - - - - - - Changes - - - Below you will find a detailed account of the changes between - PostgreSQL 18 and the previous major - release. - - - - Server - - - Optimizer - - - - - - - - Automatically remove some unnecessary table self-joins (Andrey - Lepikhov, Alexander Kuzmenkov, Alexander Korotkov, Alena Rybakina) - § - - - - This optimization can be disabled using server variable . - - - - - - - - Convert some IN (VALUES - ...) to x = ANY ... for better - optimizer statistics (Alena Rybakina, Andrei Lepikhov) - § - - - - - - - - Allow transforming OR-clauses - to arrays for faster index processing (Alexander Korotkov, Andrey - Lepikhov) - § - - - - - - - - Speed up the processing of INTERSECT, - EXCEPT, window aggregates, and view column aliases (Tom Lane, - David Rowley) - § - § - § - § - - - - - - - - Allow the keys of SELECT - DISTINCT to be internally reordered to avoid sorting - (Richard Guo) - § - - - - This optimization can be disabled using . - - - - - - - - Ignore GROUP BY - columns that are functionally dependent on other columns (Zhang - Mingli, Jian He, David Rowley) - § - - - - If a GROUP BY clause includes all columns of - a unique index, as well as other columns of the same table, those - other columns are redundant and can be dropped from the grouping. - This was already true for non-deferred primary keys. - - - - - - - - Allow some HAVING clauses - on GROUPING - SETS to be pushed to WHERE clauses - (Richard Guo) - § - § - § - § - - - - This allows earlier row filtering. This release also fixes some - GROUPING SETS queries that used to return - incorrect results. - - - - - - - - Improve row estimates for generate_series() - using numeric - and timestamp - values (David Rowley, Song Jinzhou) - § - § - - - - - - - - Allow the optimizer to use Right Semi Join plans - (Richard Guo) - § - - - - Semi-joins are used when needing to find if there is at least - one match. - - - - - - - - Allow merge joins to use incremental sorts - (Richard Guo) - § - - - - - - - - Improve the efficiency of planning queries accessing many partitions - (Ashutosh Bapat, Yuya Watari, David Rowley) - § - § - - - - - - - - Allow partitionwise - joins in more cases, and reduce its memory usage (Richard Guo, - Tom Lane, Ashutosh Bapat) - § - § - - - - - - - - Improve cost estimates of partition queries (Nikita Malakhov, - Andrei Lepikhov) - § - - - - - - - - Improve SQL-language - function plan caching (Alexander Pyhalov, Tom Lane) - § - § - - - - - - - - Improve handling of disabled optimizer features (Robert Haas) - § - - - - - - - - - Indexes - - - - - - - - Allow skip scans of btree indexes - (Peter Geoghegan) - § - § - - - - This allows multi-column btree indexes to be used by queries that - only equality-reference the second or later indexed columns. - - - - - - - - Allow non-btree unique indexes to be used as partition keys and in - materialized views (Mark Dilger) - § - § - - - - The index type must still support equality. - - - - - - - - Allow GIN indexes to - be created in parallel (Tomas Vondra, Matthias van de Meent) - § - - - - - - - - Allow values to be sorted to speed range-type GiST and btree - index builds (Bernd Helmle) - § - - - - - - - - - General Performance - - - - - - - - Add an asynchronous I/O subsystem (Andres Freund, Thomas Munro, - Nazir Bilal Yavuz, Melanie Plageman) - § - § - § - § - § - § - § - § - § - § - § - - - - This feature allows backends to queue multiple read requests, - which allows for more efficient sequential scans, bitmap - heap scans, vacuums, etc. This is enabled by server - variable , with server - variables and added to control it. - This also enables - and - values greater than zero for systems without - fadvise() support. The new system view pg_aios - shows the file handles being used for asynchronous I/O. - - - - - - - - Improve the locking performance of queries that access many relations - (Tomas Vondra) - § - - - - - - - - Improve the performance and reduce memory usage of hash joins and - GROUP BY - (David Rowley, Jeff Davis) - § - § - § - § - § - - - - This also improves hash set operations used by EXCEPT, and hash - lookups of subplan values. - - - - - - - - Allow normal vacuums to freeze some pages, even though they are - all-visible (Melanie Plageman) - § - § - - - - This reduces the overhead of later full-relation - freezing. The aggressiveness of this can be - controlled by server variable and per-table setting . Previously - vacuum never processed all-visible pages until freezing was required. - - - - - - - - Add server variable to control - file truncation during (Nathan Bossart, - Gurjeet Singh) - § - - - - A storage-level parameter with the same name and behavior already - existed. - - - - - - - - Increase server variables 's and 's default values to 16 - (Melanie Plageman) - § - § - - - - This more accurately reflects modern hardware. - - - - - - - - - Monitoring - - - - - - - - Increase the logging granularity of server variable (Melanie Plageman) - § - - - - This server variable was previously only boolean, which is still - supported. - - - - - - - - Add log_connections option to report the duration - of connection stages (Melanie Plageman) - § - - - - - - - - Add escape - %L to output the client IP - address (Greg Sabino Mullane) - § - - - - - - - - Add server variable to log - lock acquisition failures (Yuki Seino, Fujii Masao) - § - § - - - - Specifically it reports SELECT - ... NOWAIT lock failures. - - - - - - - - Modify pg_stat_all_tables - and its variants to report the time spent in , , and their - automatic variants (Sami Imseih) - § - - - - The new columns are total_vacuum_time, - total_autovacuum_time, - total_analyze_time, and - total_autoanalyze_time. - - - - - - - - Add delay time reporting to and (Bertrand Drouvot, Nathan Bossart) - § - § - - - - This information appears in the server log, the system views pg_stat_progress_vacuum - and pg_stat_progress_analyze, - and the output of and when in VERBOSE - mode; tracking must be enabled with the server variable . - - - - - - - - Add WAL, CPU, and average - read statistics output to ANALYZE VERBOSE - (Anthonin Bonnefoy) - § - § - - - - - - - - Add full WAL buffer count to - VACUUM/ANALYZE (VERBOSE) - and autovacuum log output (Bertrand Drouvot) - § - - - - - - - - Add per-backend I/O statistics reporting (Bertrand Drouvot) - § - § - - - - The statistics are accessed via pg_stat_get_backend_io(). - Per-backend I/O statistics can be cleared via pg_stat_reset_backend_stats(). - - - - - - - - Add pg_stat_io - columns to report I/O activity in bytes (Nazir Bilal Yavuz) - § - - - - The new columns are read_bytes, - write_bytes, and - extend_bytes. The - op_bytes column, which always equaled - BLCKSZ, - has been removed. - - - - - - - - Add WAL I/O activity rows to - pg_stat_io (Nazir Bilal Yavuz, Bertrand - Drouvot, Michael Paquier) - § - § - § - - - - This includes WAL receiver activity and a wait - event for such writes. - - - - - - - - - Change server variable - to control tracking WAL timing - in pg_stat_io instead of pg_stat_wal - (Bertrand Drouvot) - § - - - - - - - - Remove read/sync columns from pg_stat_wal - (Bertrand Drouvot) - § - § - - - - This removes columns wal_write, - wal_sync, - wal_write_time, and - wal_sync_time. - - - - - - - - Add function pg_stat_get_backend_wal() - to return per-backend WAL statistics (Bertrand - Drouvot) - § - - - - Per-backend WAL - statistics can be cleared via pg_stat_reset_backend_stats(). - - - - - - - - Add function pg_ls_summariesdir() - to specifically list the contents of PGDATA/pg_wal/summaries - (Yushi Ogiwara) - § - - - - - - - - Add column pg_stat_checkpointer.num_done - to report the number of completed checkpoints (Anton A. Melnikov) - § - - - - Columns num_timed and - num_requested count both completed and - skipped checkpoints. - - - - - - - - Add column - pg_stat_checkpointer.slru_written - to report SLRU buffers written (Nitin Jadhav) - § - - - - Also, modify the checkpoint server log message to report separate - shared buffer and SLRU buffer values. - - - - - - - - Add columns to pg_stat_database - to report parallel worker activity (Benoit Lobréau) - § - - - - The new columns are - parallel_workers_to_launch and - parallel_workers_launched. - - - - - - - - Have query id computation - of arrays consider only the first and last array elements (Dmitry - Dolgov, Sami Imseih) - § - § - - - - Jumbling is used by . - - - - - - - - Adjust query id computations to group together queries using the - same relation name (Michael Paquier, Sami Imseih) - § - - - - This is true even if the tables in different schemas have different - column names. - - - - - - - - Add column pg_backend_memory_contexts.type - to report the type of memory context (David Rowley) - § - - - - - - - - Add column - pg_backend_memory_contexts.path - to show memory context parents (Melih Mutlu) - § - - - - - - - - - Privileges - - - - - - - - Add function pg_get_acl() - to retrieve database access control details (Joel Jacobson) - § - § - - - - - - - - Add function has_largeobject_privilege() - to check large object privileges (Yugo Nagata) - § - - - - - - - - Allow to define - large object default privileges (Takatsuka Haruka, Yugo Nagata, - Laurenz Albe) - § - - - - - - - - Add predefined role pg_signal_autovacuum_worker - (Kirill Reshke) - § - - - - This allows sending signals to autovacuum workers. - - - - - - - - - Server Configuration - - - - - - - - Add support for the OAuth authentication - method (Jacob Champion, Daniel Gustafsson, Thomas Munro) - § - - - - This adds an oauth authentication method to pg_hba.conf, - libpq OAuth options, a server variable to load - token validation libraries, and a configure flag - to add the required compile-time libraries. - - - - - - - - Add server variable to allow - specification of multiple colon-separated TLSv1.3 cipher suites - (Erica Zhang, Daniel Gustafsson) - § - - - - - - - - Change server variable 's default - to include elliptic curve X25519 (Daniel Gustafsson, Jacob Champion) - § - - - - - - - - Rename server variable ssl_ecdh_curve to and allow multiple colon-separated - ECDH curves to be specified (Erica Zhang, - Daniel Gustafsson) - § - - - - The previous name still works. - - - - - - - - Make cancel request - keys 256 bits (Heikki Linnakangas, Jelte Fennema-Nio) - § - § - - - - This is only possible when the server and client support wire - protocol version 3.2, introduced in this release. - - - - - - - - Add server variable - to specify the maximum number of background workers (Nathan Bossart) - § - - - - With this variable set, - can be adjusted at runtime up to this maximum without a server - restart. - - - - - - - - Allow specification of the fixed number of dead tuples that will - trigger an autovacuum (Nathan - Bossart, Frédéric Yhuel) - § - - - - The server variable is . Percentages are - still used for triggering. - - - - - - - - Change server variable - to limit only files opened by a backend (Andres Freund) - § - - - - Previously files opened by the postmaster were also counted toward - this limit. - - - - - - - - Add server variable to - report the required number of semaphores (Nathan Bossart) - § - - - - This is useful for operating system configuration. - - - - - - - - Add server variable to - specify the location of extension control files (Peter Eisentraut, - Matheus Alcantara) - § - § - - - - - - - - - Streaming Replication and Recovery - - - - - - - - Allow inactive replication slots to be automatically invalided using - server variable - (Nisha Moond, Bharath Rupireddy) - § - - - - - - - - Add server variable to control the - maximum active replication origins (Euler Taveira) - § - - - - This was previously controlled by , but this new setting allows - a higher origin count in cases where fewer slots are required. - - - - - - - - - <link linkend="logical-replication">Logical Replication</link> - - - - - - - - Allow the values of generated - columns to be logically replicated (Shubham Khanna, Vignesh C, - Zhijie Hou, Shlok Kyal, Peter Smith) - § - § - § - § - - - - If the publication specifies a column list, all specified - columns, generated and non-generated, are published. - Without a specified column list, publication option - publish_generated_columns controls whether - generated columns are published. Previously generated columns - were not replicated and the subscriber had to compute - the values if possible; this is particularly useful for - non-PostgreSQL subscribers which lack - such a capability. - - - - - - - - Change the default streaming - option from off to parallel - (Vignesh C) - § - - - - - - - - Allow to change the - replication slot's two-phase commit behavior (Hayato Kuroda, Ajin - Cherian, Amit Kapila, Zhijie Hou) - § - § - - - - - - - - Log conflicts while - applying logical replication changes (Zhijie Hou, Nisha Moond) - § - § - § - § - § - - - - Also report in new columns of pg_stat_subscription_stats. - - - - - - - - - - - Utility Commands - - - - - - - - Allow generated - columns to be virtual, and make them the default (Peter - Eisentraut, Jian He, Richard Guo, Dean Rasheed) - § - § - § - - - - Virtual generated columns generate their values when the columns - are read, not written. The write behavior can still be specified - via the STORED option. - - - - - - - - Add OLD/NEW support to RETURNING in - DML queries (Dean Rasheed) - § - - - - Previously RETURNING only returned new values for - and , and old - values for ; - would return the appropriate value for the internal query executed. - This new syntax allows the RETURNING list of - INSERT/UPDATE/DELETE/MERGE - to explicitly return old and new values by using the special aliases - old and new. These aliases - can be renamed to avoid identifier conflicts. - - - - - - - - Allow foreign tables to be created like existing local tables - (Zhang Mingli) - § - - - - The syntax is CREATE - FOREIGN TABLE ... LIKE. - - - - - - - - Allow LIKE - with nondeterministic - collations (Peter Eisentraut) - § - - - - - - - - Allow text position search functions with nondeterministic collations - (Peter Eisentraut) - § - - - - These used to generate an error. - - - - - - - - Add builtin collation provider PG_UNICODE_FAST - (Jeff Davis) - § - - - - This locale supports case mapping, but sorts in code point order, - not natural language order. - - - - - - - - Allow and - to process partitioned tables without processing their children - (Michael Harris) - § - - - - This is enabled with the new ONLY option. This is - useful since autovacuum does not process partitioned tables, just - its children. - - - - - - - - Add functions to modify per-relation and per-column optimizer - statistics (Corey Huinker) - § - § - § - - - - The functions are pg_restore_relation_stats(), - pg_restore_attribute_stats(), - pg_clear_relation_stats(), and - pg_clear_attribute_stats(). - - - - - - - - - Add server variable to control - the file copying method (Nazir Bilal Yavuz) - § - - - - This controls whether CREATE DATABASE - ... STRATEGY=FILE_COPY and ALTER DATABASE ... SET - TABLESPACE uses file copy or clone. - - - - - - - <link linkend="ddl-constraints">Constraints</link> - - - - - - - - Allow the specification of non-overlapping PRIMARY - KEY and UNIQUE - constraints (Paul A. Jungwirth) - § - - - - This is specified by WITHOUT OVERLAPS on the - last specified column. - - - - - - - - Allow CHECK - and foreign - key constraints to be specified as NOT - ENFORCED (Amul Sul) - § - § - - - - This also adds column pg_constraint.conenforced. - - - - - - - - Require primary/foreign key - relationships to use either deterministic collations or the the - same nondeterministic collations (Peter Eisentraut) - § - - - - The restore of a , also used by , will fail if these requirements are not met; - schema changes must be made for these upgrade methods to succeed. - - - - - - - - Store column NOT - NULL specifications in pg_constraint - (Álvaro Herrera, Bernd Helmle) - § - - - - This allows names to be specified for NOT NULL - constraint. This also adds NOT NULL constraints - to foreign tables and NOT NULL inheritance - control to local tables. - - - - - - - - Allow to set the NOT - VALID attribute of NOT NULL constraints - (Rushabh Lathia, Jian He) - § - - - - - - - - Allow modification of the inheritability of NOT - NULL constraints (Suraj Kharage, Álvaro Herrera) - § - § - - - - The syntax is ALTER TABLE - ... ALTER CONSTRAINT ... [NO] INHERIT. - - - - - - - - Allow NOT VALID foreign key constraints on - partitioned tables (Amul Sul) - § - - - - - - - - Allow dropping - of constraints ONLY on partitioned tables - (Álvaro Herrera) - § - - - - This was previously erroneously prohibited. - - - - - - - - <xref linkend="sql-copy"/> - - - - - - - - Add REJECT_LIMIT to control the number of invalid - rows COPY FROM can ignore (Atsushi Torikoshi) - § - - - - This is available when ON_ERROR = 'ignore'. - - - - - - - - Allow COPY TO to copy rows from populated - materialized views (Jian He) - § - - - - - - - - Add COPY LOG_VERBOSITY level - silent to suppress log output of ignored rows - (Atsushi Torikoshi) - § - - - - This new level suppresses output for discarded input rows when - on_error = 'ignore'. - - - - - - - - Disallow COPY FREEZE on foreign tables (Nathan - Bossart) - § - - - - Previously, the COPY worked but the - FREEZE was ignored, so disallow this command. - - - - - - - - <xref linkend="sql-explain"/> - - - - - - - - Automatically include BUFFERS output in - EXPLAIN ANALYZE (Guillaume Lelarge, David Rowley) - § - - - - - - - - Add full WAL buffer count to EXPLAIN - (WAL) output (Bertrand Drouvot) - § - - - - - - - - In EXPLAIN ANALYZE, report the number of index - lookups used per index scan node (Peter Geoghegan) - § - - - - - - - - Modify EXPLAIN to output fractional row counts - (Ibrar Ahmed, Ilia Evdokimov, Robert Haas) - § - § - - - - - - - - Add memory and disk usage details to Material, - Window Aggregate, and common table expression - nodes to EXPLAIN output (David Rowley, Tatsuo - Ishii) - § - § - § - § - - - - - - - - - Add details about window function arguments to - EXPLAIN output (Tom Lane) - § - - - - - - - - Add Parallel Bitmap Heap Scan worker cache - statistics to EXPLAIN ANALYZE (David Geier, - Heikki Linnakangas, Donghang Lin, Alena Rybakina, David Rowley) - § - - - - - - - - Indicate disabled nodes in EXPLAIN ANALYZE output - (Robert Haas, David Rowley, Laurenz Albe) - § - § - § - - - - - - - - - - - Data Types - - - - - - - - Improve Unicode - full case mapping and conversion (Jeff Davis) - § - § - - - - This adds the ability to do conditional and title case mapping, - and case map single characters to multiple characters. - - - - - - - - Allow jsonb - null values to be cast to scalar types as - NULL (Tom Lane) - § - - - - Previously such casts generated an error. - - - - - - - - Add optional parameter to json{b}_strip_nulls - to allow removal of null array elements (Florents Tselai) - § - - - - - - - - Add function array_sort() - which sorts an array's first dimension (Junwang Zhao, Jian He) - § - - - - - - - - Add function array_reverse() - which reverses an array's first dimension (Aleksander Alekseev) - § - - - - - - - - Add function reverse() - to reverse bytea bytes (Aleksander Alekseev) - § - - - - - - - - Allow casting between integer types and bytea (Aleksander - Alekseev) - § - - - - The integer values are stored as bytea two's complement - values. - - - - - - - - Update Unicode data to Unicode 16.0.0 (Peter - Eisentraut) - § - - - - - - - - Add full text search stemming for Estonian - (Tom Lane) - § - - - - - - - - Improve the XML - error codes to more closely match the SQL standard - (Tom Lane) - § - - - - These errors are reported via SQLSTATE. - - - - - - - - - Functions - - - - - - - - Add function casefold() - to allow for more sophisticated case-insensitive matching (Jeff Davis) - § - - - - This allows more accurate comparisons, i.e., a character can have - multiple upper or lower case equivalents, or upper or lower case - conversion changes the number of characters. - - - - - - - - Allow MIN()/MAX() - aggregates on arrays and composite types (Aleksander Alekseev, - Marat Buharov) - § - § - - - - - - - - Add a WEEK option to EXTRACT() - (Tom Lane) - § - - - - - - - - Improve the output EXTRACT(QUARTER ...) for - negative values (Tom Lane) - § - - - - - - - - Add roman numeral support to to_number() - (Hunaid Sohail) - § - - - - This is accessed via the RN pattern. - - - - - - - - Add UUID - version 7 generation function uuidv7() - (Andrey Borodin) - § - - - - This UUID value is - temporally sortable. Function alias uuidv4() - has been added to explicitly generate version 4 UUIDs. - - - - - - - - Add functions crc32() - and crc32c() - to compute CRC values (Aleksander Alekseev) - § - - - - - - - - Add math functions gamma() - and lgamma() - (Dean Rasheed) - § - - - - - - - - Allow => syntax for named cursor arguments in - PL/pgSQL (Pavel Stehule) - § - - - - We previously only accepted :=. - - - - - - - - Allow regexp_match[es]()/regexp_like()/regexp_replace()/regexp_count()/regexp_instr()/regexp_substr()/regexp_split_to_table()/regexp_split_to_array() - to use named arguments (Jian He) - § - - - - - - - - - <xref linkend="libpq"/> - - - - - - - - Add function PQfullProtocolVersion() - to report the full, including minor, protocol version number (Jacob - Champion, Jelte Fennema-Nio) - § - - - - - - - - Add libpq connection parameters - and environment variables to - specify the minimum and maximum acceptable protocol version for - connections (Jelte Fennema-Nio) - § - § - - - - - - - - Add libpq function PQservice() - to return the connection service name (Michael Banck) - § - - - - - - - - Report changes to the client - (Alexander Kukushkin, Jelte Fennema-Nio, Tomas Vondra) - § - § - - - - - - - - Add PQtrace() output - for all message types, including authentication (Jelte Fennema-Nio) - § - § - § - § - § - - - - - - - - Add libpq connection parameter sslkeylogfile - which dumps out SSL key material (Abhishek Chanda, - Daniel Gustafsson) - § - - - - This is useful for debugging. - - - - - - - - Modify some libpq function signatures to use - int64_t (Thomas Munro) - § - - - - These previously used pg_int64, which is now - deprecated. - - - - - - - - - <xref linkend="app-psql"/> - - - - - - - - Allow psql to parse, bind, and close - named prepared statements (Anthonin Bonnefoy, Michael Paquier) - § - - - - This is accomplished with new commands \parse, - \bind_named, - and \close_prepared. - - - - - - - - Add psql backslash commands to allowing - issuance of pipeline queries (Anthonin Bonnefoy) - § - § - § - - - - The new commands are \startpipeline, - \syncpipeline, \sendpipeline, - \endpipeline, \flushrequest, - \flush, and \getresults. - - - - - - - - Allow adding pipeline status to the psql - prompt and add related state variables (Anthonin Bonnefoy) - § - - - - The new prompt character is %P and - the new psql variables are PIPELINE_SYNC_COUNT, - PIPELINE_COMMAND_COUNT, - and PIPELINE_RESULT_COUNT. - - - - - - - - Allow adding the connection service name to the - psql prompt or access it via - psql variable (Michael Banck) - § - - - - - - - - Add psql option to use expanded mode on - all list commands (Dean Rasheed) - § - - - - Adding backslash suffix x enables this. - - - - - - - - Change psql's to use tabular format - and include more information (Álvaro Herrera, Maiquel Grassi, - Hunaid Sohail) - § - - - - - - - - Add function's leakproof indicator - to psql's \df+, - \do+, \dAo+, and - \dC+ outputs (Yugo Nagata) - § - - - - - - - - Add access method details for partitioned relations in \dP+ - (Justin Pryzby) - § - - - - - - - - Add default_version - to the psql \dx - extension output (Magnus Hagander) - § - - - - - - - - Add psql variable to set the default \watch - wait time (Daniel Gustafsson) - § - - - - - - - - - Server Applications - - - - - - - - Change to default to enabling checksums - (Greg Sabino Mullane) - § - § - - - - The new initdb option - disables checksums. - - - - - - - - Add initdb option - to avoid syncing heap/index - files (Nathan Bossart) - § - - - - initdb option - is still available to avoid syncing any files. - - - - - - - - Add option - to compute only missing - optimizer statistics (Corey Huinker, Nathan Bossart) - § - § - - - - This option can only be used by - and . - - - - - - - - Add option - / to enable hard linking - (Israel Barth Rubio, Robert Haas) - § - - - - Only some files can be hard linked. This should not be used if the - backups will be used independently. - - - - - - - - Allow to verify tar-format - backups (Amul Sul) - § - - - - - - - - If 's - specifies a database name, use it in - output (Masahiko Sawada) - § - - - - - - - - Add option - to change the default - char signedness (Masahiko Sawada) - § - - - - - - - - <link - linkend="app-pgdump"><application>pg_dump</application></link>/<link - linkend="app-pg-dumpall"><application>pg_dumpall</application></link>/<link - linkend="app-pgrestore"><application>pg_restore</application></link> - - - - - - - - Allow to dump in the same output - formats as pg_dump supports (Mahendra - Singh Thalor, Andrew Dunstan) - § - - - - Also modify to handle such dumps. - Previously pg_dumpall only supported - text format. - - - - - - - - Add options - , , - and (Jeff Davis) - § - - - - - - - - Add pg_dump and option to - dump sequence data that would normally be excluded (Nathan Bossart) - § - § - - - - - - - - Add , , - and options - , , - , and - (Corey Huinker, Jeff Davis) - § - - - - - - - - Add option to disable row level - security policy processing in , - , - (Nikolay Samokhvalov) - § - - - - This is useful for migrating to systems with different policies. - - - - - - - - - <xref linkend="pgupgrade"/> - - - - - - - - Allow pg_upgrade to preserve optimizer - statistics (Corey Huinker, Jeff Davis, Nathan Bossart) - § - § - § - § - - - - Extended statistics are not preserved. Also add - pg_upgrade option - to disable statistics preservation. - - - - - - - - Allow pg_upgrade to process database - checks in parallel (Nathan Bossart) - § - § - § - § - § - § - § - § - § - § - § - - - - This is controlled by the existing option. - - - - - - - - Add pg_upgrade option - to swap directories rather than copy, clone, - or link files (Nathan Bossart) - § - - - - This mode is potentially the fastest. - - - - - - - - Add pg_upgrade option - to set the default - char signedness of new cluster (Masahiko Sawada) - § - § - - - - This is to handle cases where a - pre-PostgreSQL 18 cluster's default - CPU signedness does not match the new cluster. - - - - - - - - - Logical Replication Applications - - - - - - - - Add option - to create logical replicas for all databases - (Shubham Khanna) - § - - - - - - - - Add pg_createsubscriber option - to remove publications (Shubham Khanna) - § - - - - - - - - Add pg_createsubscriber option - to enable prepared transactions - (Shubham Khanna) - § - - - - - - - - Add option - to specify failover slots (Hayato Kuroda) - § - - - - - - - - Allow pg_recvlogical - to work without - (Hayato Kuroda) - § - - - - - - - - - - - Source Code - - - - - - - - Separate the loading and running of injection points - (Michael Paquier, Heikki Linnakangas) - § - § - - - - Injection points can now be created, but not run, via INJECTION_POINT_LOAD(), - and such injection points can be run via INJECTION_POINT_CACHED(). - - - - - - - - Support runtime arguments in injection points (Michael Paquier) - § - - - - - - - - Allow inline injection point test code with IS_INJECTION_POINT_ATTACHED() - (Heikki Linnakangas) - § - - - - - - - - Improve the performance of processing long JSON strings using - SIMD (Single Instruction Multiple Data) (David - Rowley) - § - - - - - - - - Speed up CRC32C calculations using x86 AVX-512 - instructions (Raghuveer Devulapalli, Paul Amonson) - § - - - - - - - - Add ARM Neon and SVE CPU - intrinsics for popcount (integer bit counting) (Chiranmoy - Bhattacharya, Devanga Susmitha, Rama Malladi) - § - § - - - - - - - - Improve the speed of numeric multiplication and division (Joel - Jacobson, Dean Rasheed) - § - § - § - § - - - - - - - - Add configure option - to enable NUMA awareness (Jakub Wartak, Bertrand - Drouvot) - § - § - § - - - - The function pg_numa_available() - reports on NUMA awareness, and system views pg_shmem_allocations_numa - and pg_buffercache_numa - which report on shared memory distribution across - NUMA nodes. - - - - - - - - Add TOAST table to pg_index - to allow for very large expression indexes (Nathan Bossart) - § - - - - - - - - Remove column pg_attribute.attcacheoff - (David Rowley) - § - - - - - - - - Add column pg_class.relallfrozen - (Melanie Plageman) - § - - - - - - - - Add amgettreeheight, - amconsistentequality, and - amconsistentordering to the index access method - API (Mark Dilger) - § - § - - - - - - - - Add GiST support function stratnum() - (Paul A. Jungwirth) - § - - - - - - - - Record the default CPU signedness of - char in - (Masahiko Sawada) - § - - - - - - - - Add support for Python "Limited API" in (Peter Eisentraut) - § - § - - - - This helps prevent problems caused by - Python 3.x version mismatches. - - - - - - - - Change the minimum supported Python - version to 3.6.8 (Jacob Champion) - § - - - - - - - - Remove support for OpenSSL versions older - than 1.1.1 (Daniel Gustafsson) - § - § - - - - - - - - If LLVM is enabled, require version 14 - or later (Thomas Munro) - § - - - - - - - - Add macro PG_MODULE_MAGIC_EXT - to allow extensions to report their name and version (Andrei Lepikhov) - § - - - - This information can be access via the new function pg_get_loaded_modules(). - - - - - - - - Document that SPI_connect()/SPI_connect_ext() - always returns success (SPI_OK_CONNECT) (Stepan - Neretin) - § - - - - Errors are always reported via ereport(). - - - - - - - - Add documentation - section about API and ABI - compatibility (David Wheeler, Peter Eisentraut) - § - - - - - - - - Remove the experimental designation of - Meson builds on Windows (Aleksander Alekseev) - § - - - - - - - - Remove configure options and - (Thomas Munro) - § - § - - - - Thirty-two-bit atomic operations are now required. - - - - - - - - Remove support for the - HPPA/PA-RISC architecture - (Tom Lane) - § - - - - - - - - - Additional Modules - - - - - - - - Add extension to inspect logical - snapshots (Bertrand Drouvot) - § - - - - - - - - Add extension which adds debug details - to EXPLAIN - output (Robert Haas) - § - - - - - - - - Add output columns to postgres_fdw_get_connections() - (Hayato Kuroda, Sagar Dilip Shedge) - § - § - § - § - - - - New output column used_in_xact indicates - if the foreign data wrapper is being used by a current transaction, - closed indicates if it is closed, - user_name indicates the user name, and - remote_backend_pid indicates the remote - backend process identifier. - - - - - - - - Allow SCRAM - authentication from the client to be passed to servers (Matheus Alcantara, Peter Eisentraut) - § - - - - This avoids storing postgres_fdw - authentication information in the database, and is - enabled with the postgres_fdw use_scram_passthrough - connection option. libpq uses new connection parameters - and . - - - - - - - - Allow SCRAM authentication from the client to be - passed to servers (Matheus Alcantara) - § - - - - - - - - Add on_error and log_verbosity - options to (Atsushi Torikoshi) - § - - - - These control how file_fdw handles and - reports invalid file rows. - - - - - - - - Add reject_limit to control the number of - invalid rows file_fdw can ignore (Atsushi - Torikoshi) - § - - - - This is active when ON_ERROR = 'ignore'. - - - - - - - - Add configurable variable min_password_length to - (Emanuele Musella, Maurizio Boriani) - § - - - - This controls the minimum password length. - - - - - - - - Have report the number of failed, retried, - or skipped transactions in per-script reports (Yugo Nagata) - § - - - - - - - - Add server variable weak - to control invalid check digit acceptance (Viktor Holmberg) - § - - - - This was previously only controlled by function isn_weak(). - - - - - - - - Allow values to be sorted to speed - index builds (Bernd Helmle, Andrey Borodin) - § - - - - - - - - Add check function gin_index_check() - to verify GIN indexes (Grigory Kryachko, Heikki - Linnakangas, Andrey Borodin) - § - - - - - - - - Add functions pg_buffercache_evict_relation() - and pg_buffercache_evict_all() - to evict unpinned shared buffers (Nazir Bilal Yavuz) - § - - - - The existing function pg_buffercache_evict() - now returns the buffer flush status. - - - - - - - - Allow extensions to install custom - options (Robert Haas, Sami Imseih) - § - § - § - - - - - - - - Allow extensions to use the server's cumulative statistics - API (Michael Paquier) - § - § - - - - - - - <xref linkend="pgstatstatements"/> - - - - - - - - Allow the queries of - and to be tracked by - pg_stat_statements (Anthonin Bonnefoy) - § - - - - They are also now assigned query ids. - - - - - - - - Allow the parameterization of values in - pg_stat_statements (Greg Sabino Mullane, - Michael Paquier) - § - - - - This reduces the bloat caused by SET statements - with differing constants. - - - - - - - - Add pg_stat_statements - columns to report parallel activity (Guillaume Lelarge) - § - - - - The new columns are - parallel_workers_to_launch and - parallel_workers_launched. - - - - - - - - Add - pg_stat_statements.wal_buffers_full - to report full WAL buffers (Bertrand Drouvot) - § - - - - - - - - - <xref linkend="pgcrypto"/> - - - - - - - - Add pgcrypto algorithms sha256crypt - and sha512crypt - (Bernd Helmle) - § - - - - - - - - Add CFB mode - to pgcrypto encryption and decryption - (Umar Hayat) - § - - - - - - - - Add function fips_mode() - to report the server's FIPS mode (Daniel - Gustafsson) - § - - - - - - - - Add pgcrypto server variable builtin_crypto_enabled - to allow disabling builtin non-FIPS mode - cryptographic functions (Daniel Gustafsson, Joe Conway) - § - - - - This is useful for guaranteeing FIPS mode behavior. - - - - - - - - - - - - Acknowledgments - - - The following individuals (in alphabetical order) have contributed - to this release as patch authors, committers, reviewers, testers, - or reporters of issues. - - - - (to be completed) - - - - diff --git a/doc/src/sgml/release-19.sgml b/doc/src/sgml/release-19.sgml new file mode 100644 index 0000000000000..8d242b5b28141 --- /dev/null +++ b/doc/src/sgml/release-19.sgml @@ -0,0 +1,16 @@ + + + + + Release 19 + + + Release date: + 2026-??-?? + + + + This is just a placeholder for now. + + + diff --git a/doc/src/sgml/release.sgml b/doc/src/sgml/release.sgml index cee577ff8d353..a659d382db95c 100644 --- a/doc/src/sgml/release.sgml +++ b/doc/src/sgml/release.sgml @@ -70,7 +70,7 @@ For new features, add links to the documentation sections. All the active branches have to be edited concurrently when doing that. --> -&release-18; +&release-19; Prior Releases diff --git a/meson.build b/meson.build index 6ffe7b4727556..36e168a1a2ace 100644 --- a/meson.build +++ b/meson.build @@ -8,7 +8,7 @@ project('postgresql', ['c'], - version: '18beta1', + version: '19devel', license: 'PostgreSQL', # We want < 0.56 for python 3.5 compatibility on old platforms. EPEL for diff --git a/src/tools/git_changelog b/src/tools/git_changelog index b8bd874f20858..dccf938685a3a 100755 --- a/src/tools/git_changelog +++ b/src/tools/git_changelog @@ -59,6 +59,7 @@ require IPC::Open2; # (We could get this from "git branches", but not worth the trouble.) # NB: master must be first! my @BRANCHES = qw(master + REL_18_STABLE REL_17_STABLE REL_16_STABLE REL_15_STABLE REL_14_STABLE REL_13_STABLE REL_12_STABLE REL_11_STABLE REL_10_STABLE REL9_6_STABLE REL9_5_STABLE REL9_4_STABLE REL9_3_STABLE REL9_2_STABLE REL9_1_STABLE REL9_0_STABLE diff --git a/src/tools/version_stamp.pl b/src/tools/version_stamp.pl index c3509474d83b2..a9d2d0910f3af 100755 --- a/src/tools/version_stamp.pl +++ b/src/tools/version_stamp.pl @@ -25,7 +25,7 @@ # Major version is hard-wired into the script. We update it when we branch # a new development version. -my $majorversion = 18; +my $majorversion = 19; # Validate argument and compute derived variables my $minor = shift; From 5ba00e175a4eaefa4dc38ea14c667bbeb13af305 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Mon, 30 Jun 2025 13:56:31 +0900 Subject: [PATCH 174/602] Align log_line_prefix in CI and TAP tests with pg_regress.c log_line_prefix is changed to include "%b", the backend type in the TAP test configuration. %v and %x are removed from the CI configuration, with the format around %b changed. The lack of backend type in postgresql.conf set by Cluster.pm for the TAP test configuration was something that has been bugging me, beginning the discussion that has led to this change. The change in the CI has come up during the discussion, to become consistent with pg_regress.c, %v and %x not being that useful to have. Reviewed-by: Andres Freund Reviewed-by: Fujii Masao Reviewed-by: Daniel Gustafsson Reviewed-by: Tom Lane Discussion: https://postgr.es/m/aC0VaIWAXLgXcHVP@paquier.xyz --- src/test/perl/PostgreSQL/Test/Cluster.pm | 2 +- src/tools/ci/pg_ci_base.conf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/test/perl/PostgreSQL/Test/Cluster.pm b/src/test/perl/PostgreSQL/Test/Cluster.pm index 1c11750ac1d07..49b2c86b29cbf 100644 --- a/src/test/perl/PostgreSQL/Test/Cluster.pm +++ b/src/test/perl/PostgreSQL/Test/Cluster.pm @@ -684,7 +684,7 @@ sub init print $conf "\n# Added by PostgreSQL::Test::Cluster.pm\n"; print $conf "fsync = off\n"; print $conf "restart_after_crash = off\n"; - print $conf "log_line_prefix = '%m [%p] %q%a '\n"; + print $conf "log_line_prefix = '%m %b[%p] %q%a '\n"; print $conf "log_statement = all\n"; print $conf "log_replication_commands = on\n"; print $conf "wal_retrieve_retry_interval = '500ms'\n"; diff --git a/src/tools/ci/pg_ci_base.conf b/src/tools/ci/pg_ci_base.conf index 9cec5c2910d80..695e0a0d6ec9e 100644 --- a/src/tools/ci/pg_ci_base.conf +++ b/src/tools/ci/pg_ci_base.conf @@ -10,5 +10,5 @@ log_autovacuum_min_duration = 0 log_checkpoints = true log_connections = all log_disconnections = true -log_line_prefix = '%m [%p][%b] %q[%a][%v:%x] ' +log_line_prefix = '%m %b[%p] %q%a ' log_lock_waits = true From 2252fcd4276cfeabae8786ab7c5a421dd674743e Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Mon, 30 Jun 2025 15:42:50 +0900 Subject: [PATCH 175/602] Rationalize handling of VacuumParams This commit refactors the vacuum routines that rely on VacuumParams, adding const markers where necessary to force a new policy in the code. This structure should not use a pointer as it may be used across multiple relations, and its contents should never be updated. vacuum_rel() stands as an exception as it touches the "index_cleanup" and "truncate" options. VacuumParams has been introduced in 0d831389749a, and 661643dedad9 has fixed a bug impacting VACUUM operating on multiple relations. The changes done in tableam.h break ABI compatibility, so this commit can only happen on HEAD. Author: Shihao Zhong Co-authored-by: Michael Paquier Reviewed-by: Nathan Bossart Reviewed-by: Junwang Zhao Discussion: https://postgr.es/m/CAGRkXqTo+aK=GTy5pSc-9cy8H2F2TJvcrZ-zXEiNJj93np1UUw@mail.gmail.com --- src/backend/access/heap/vacuumlazy.c | 44 +++++----- src/backend/commands/analyze.c | 26 +++--- src/backend/commands/cluster.c | 2 +- src/backend/commands/vacuum.c | 118 ++++++++++++--------------- src/backend/postmaster/autovacuum.c | 2 +- src/include/access/heapam.h | 4 +- src/include/access/tableam.h | 6 +- src/include/commands/vacuum.h | 6 +- 8 files changed, 98 insertions(+), 110 deletions(-) diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index 4111a8996b5a1..75979530897cd 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -423,7 +423,7 @@ typedef struct LVSavedErrInfo /* non-export function prototypes */ static void lazy_scan_heap(LVRelState *vacrel); static void heap_vacuum_eager_scan_setup(LVRelState *vacrel, - VacuumParams *params); + const VacuumParams params); static BlockNumber heap_vac_scan_next_block(ReadStream *stream, void *callback_private_data, void *per_buffer_data); @@ -485,7 +485,7 @@ static void restore_vacuum_error_info(LVRelState *vacrel, * vacuum options or for relfrozenxid/relminmxid advancement. */ static void -heap_vacuum_eager_scan_setup(LVRelState *vacrel, VacuumParams *params) +heap_vacuum_eager_scan_setup(LVRelState *vacrel, const VacuumParams params) { uint32 randseed; BlockNumber allvisible; @@ -504,7 +504,7 @@ heap_vacuum_eager_scan_setup(LVRelState *vacrel, VacuumParams *params) vacrel->eager_scan_remaining_successes = 0; /* If eager scanning is explicitly disabled, just return. */ - if (params->max_eager_freeze_failure_rate == 0) + if (params.max_eager_freeze_failure_rate == 0) return; /* @@ -581,11 +581,11 @@ heap_vacuum_eager_scan_setup(LVRelState *vacrel, VacuumParams *params) vacrel->next_eager_scan_region_start = randseed % EAGER_SCAN_REGION_SIZE; - Assert(params->max_eager_freeze_failure_rate > 0 && - params->max_eager_freeze_failure_rate <= 1); + Assert(params.max_eager_freeze_failure_rate > 0 && + params.max_eager_freeze_failure_rate <= 1); vacrel->eager_scan_max_fails_per_region = - params->max_eager_freeze_failure_rate * + params.max_eager_freeze_failure_rate * EAGER_SCAN_REGION_SIZE; /* @@ -612,7 +612,7 @@ heap_vacuum_eager_scan_setup(LVRelState *vacrel, VacuumParams *params) * and locked the relation. */ void -heap_vacuum_rel(Relation rel, VacuumParams *params, +heap_vacuum_rel(Relation rel, const VacuumParams params, BufferAccessStrategy bstrategy) { LVRelState *vacrel; @@ -634,9 +634,9 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, ErrorContextCallback errcallback; char **indnames = NULL; - verbose = (params->options & VACOPT_VERBOSE) != 0; + verbose = (params.options & VACOPT_VERBOSE) != 0; instrument = (verbose || (AmAutoVacuumWorkerProcess() && - params->log_min_duration >= 0)); + params.log_min_duration >= 0)); if (instrument) { pg_rusage_init(&ru0); @@ -699,9 +699,9 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, * The truncate param allows user to avoid attempting relation truncation, * though it can't force truncation to happen. */ - Assert(params->index_cleanup != VACOPTVALUE_UNSPECIFIED); - Assert(params->truncate != VACOPTVALUE_UNSPECIFIED && - params->truncate != VACOPTVALUE_AUTO); + Assert(params.index_cleanup != VACOPTVALUE_UNSPECIFIED); + Assert(params.truncate != VACOPTVALUE_UNSPECIFIED && + params.truncate != VACOPTVALUE_AUTO); /* * While VacuumFailSafeActive is reset to false before calling this, we @@ -711,14 +711,14 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, vacrel->consider_bypass_optimization = true; vacrel->do_index_vacuuming = true; vacrel->do_index_cleanup = true; - vacrel->do_rel_truncate = (params->truncate != VACOPTVALUE_DISABLED); - if (params->index_cleanup == VACOPTVALUE_DISABLED) + vacrel->do_rel_truncate = (params.truncate != VACOPTVALUE_DISABLED); + if (params.index_cleanup == VACOPTVALUE_DISABLED) { /* Force disable index vacuuming up-front */ vacrel->do_index_vacuuming = false; vacrel->do_index_cleanup = false; } - else if (params->index_cleanup == VACOPTVALUE_ENABLED) + else if (params.index_cleanup == VACOPTVALUE_ENABLED) { /* Force index vacuuming. Note that failsafe can still bypass. */ vacrel->consider_bypass_optimization = false; @@ -726,7 +726,7 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, else { /* Default/auto, make all decisions dynamically */ - Assert(params->index_cleanup == VACOPTVALUE_AUTO); + Assert(params.index_cleanup == VACOPTVALUE_AUTO); } /* Initialize page counters explicitly (be tidy) */ @@ -789,7 +789,7 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, */ vacrel->skippedallvis = false; skipwithvm = true; - if (params->options & VACOPT_DISABLE_PAGE_SKIPPING) + if (params.options & VACOPT_DISABLE_PAGE_SKIPPING) { /* * Force aggressive mode, and disable skipping blocks using the @@ -830,7 +830,7 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, * is already dangerously old.) */ lazy_check_wraparound_failsafe(vacrel); - dead_items_alloc(vacrel, params->nworkers); + dead_items_alloc(vacrel, params.nworkers); /* * Call lazy_scan_heap to perform all required heap pruning, index @@ -947,9 +947,9 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, { TimestampTz endtime = GetCurrentTimestamp(); - if (verbose || params->log_min_duration == 0 || + if (verbose || params.log_min_duration == 0 || TimestampDifferenceExceeds(starttime, endtime, - params->log_min_duration)) + params.log_min_duration)) { long secs_dur; int usecs_dur; @@ -984,10 +984,10 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, * Aggressiveness already reported earlier, in dedicated * VACUUM VERBOSE ereport */ - Assert(!params->is_wraparound); + Assert(!params.is_wraparound); msgfmt = _("finished vacuuming \"%s.%s.%s\": index scans: %d\n"); } - else if (params->is_wraparound) + else if (params.is_wraparound) { /* * While it's possible for a VACUUM to be both is_wraparound diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index 4fffb76e55735..7111d5d5334f2 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -76,7 +76,7 @@ static BufferAccessStrategy vac_strategy; static void do_analyze_rel(Relation onerel, - VacuumParams *params, List *va_cols, + const VacuumParams params, List *va_cols, AcquireSampleRowsFunc acquirefunc, BlockNumber relpages, bool inh, bool in_outer_xact, int elevel); static void compute_index_stats(Relation onerel, double totalrows, @@ -107,7 +107,7 @@ static Datum ind_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull); */ void analyze_rel(Oid relid, RangeVar *relation, - VacuumParams *params, List *va_cols, bool in_outer_xact, + const VacuumParams params, List *va_cols, bool in_outer_xact, BufferAccessStrategy bstrategy) { Relation onerel; @@ -116,7 +116,7 @@ analyze_rel(Oid relid, RangeVar *relation, BlockNumber relpages = 0; /* Select logging level */ - if (params->options & VACOPT_VERBOSE) + if (params.options & VACOPT_VERBOSE) elevel = INFO; else elevel = DEBUG2; @@ -138,8 +138,8 @@ analyze_rel(Oid relid, RangeVar *relation, * * Make sure to generate only logs for ANALYZE in this case. */ - onerel = vacuum_open_relation(relid, relation, params->options & ~(VACOPT_VACUUM), - params->log_min_duration >= 0, + onerel = vacuum_open_relation(relid, relation, params.options & ~(VACOPT_VACUUM), + params.log_min_duration >= 0, ShareUpdateExclusiveLock); /* leave if relation could not be opened or locked */ @@ -155,7 +155,7 @@ analyze_rel(Oid relid, RangeVar *relation, */ if (!vacuum_is_permitted_for_relation(RelationGetRelid(onerel), onerel->rd_rel, - params->options & ~VACOPT_VACUUM)) + params.options & ~VACOPT_VACUUM)) { relation_close(onerel, ShareUpdateExclusiveLock); return; @@ -227,7 +227,7 @@ analyze_rel(Oid relid, RangeVar *relation, else { /* No need for a WARNING if we already complained during VACUUM */ - if (!(params->options & VACOPT_VACUUM)) + if (!(params.options & VACOPT_VACUUM)) ereport(WARNING, (errmsg("skipping \"%s\" --- cannot analyze non-tables or special system tables", RelationGetRelationName(onerel)))); @@ -275,7 +275,7 @@ analyze_rel(Oid relid, RangeVar *relation, * appropriate acquirefunc for each child table. */ static void -do_analyze_rel(Relation onerel, VacuumParams *params, +do_analyze_rel(Relation onerel, const VacuumParams params, List *va_cols, AcquireSampleRowsFunc acquirefunc, BlockNumber relpages, bool inh, bool in_outer_xact, int elevel) @@ -309,9 +309,9 @@ do_analyze_rel(Relation onerel, VacuumParams *params, PgStat_Counter startreadtime = 0; PgStat_Counter startwritetime = 0; - verbose = (params->options & VACOPT_VERBOSE) != 0; + verbose = (params.options & VACOPT_VERBOSE) != 0; instrument = (verbose || (AmAutoVacuumWorkerProcess() && - params->log_min_duration >= 0)); + params.log_min_duration >= 0)); if (inh) ereport(elevel, (errmsg("analyzing \"%s.%s\" inheritance tree", @@ -706,7 +706,7 @@ do_analyze_rel(Relation onerel, VacuumParams *params, * amvacuumcleanup() when called in ANALYZE-only mode. The only exception * among core index AMs is GIN/ginvacuumcleanup(). */ - if (!(params->options & VACOPT_VACUUM)) + if (!(params.options & VACOPT_VACUUM)) { for (ind = 0; ind < nindexes; ind++) { @@ -736,9 +736,9 @@ do_analyze_rel(Relation onerel, VacuumParams *params, { TimestampTz endtime = GetCurrentTimestamp(); - if (verbose || params->log_min_duration == 0 || + if (verbose || params.log_min_duration == 0 || TimestampDifferenceExceeds(starttime, endtime, - params->log_min_duration)) + params.log_min_duration)) { long delay_in_ms; WalUsage walusage; diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c index 54a08e4102e14..b55221d44cd00 100644 --- a/src/backend/commands/cluster.c +++ b/src/backend/commands/cluster.c @@ -917,7 +917,7 @@ copy_table_data(Relation NewHeap, Relation OldHeap, Relation OldIndex, bool verb * not to be aggressive about this. */ memset(¶ms, 0, sizeof(VacuumParams)); - vacuum_get_cutoffs(OldHeap, ¶ms, &cutoffs); + vacuum_get_cutoffs(OldHeap, params, &cutoffs); /* * FreezeXid will become the table's new relfrozenxid, and that mustn't go diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index 02993d320dafc..733ef40ae7c52 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -124,7 +124,7 @@ static void vac_truncate_clog(TransactionId frozenXID, MultiXactId minMulti, TransactionId lastSaneFrozenXid, MultiXactId lastSaneMinMulti); -static bool vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params, +static bool vacuum_rel(Oid relid, RangeVar *relation, VacuumParams params, BufferAccessStrategy bstrategy); static double compute_parallel_delay(void); static VacOptValue get_vacoptval_from_boolean(DefElem *def); @@ -465,7 +465,7 @@ ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel) } /* Now go through the common routine */ - vacuum(vacstmt->rels, ¶ms, bstrategy, vac_context, isTopLevel); + vacuum(vacstmt->rels, params, bstrategy, vac_context, isTopLevel); /* Finally, clean up the vacuum memory context */ MemoryContextDelete(vac_context); @@ -494,7 +494,7 @@ ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel) * memory context that will not disappear at transaction commit. */ void -vacuum(List *relations, VacuumParams *params, BufferAccessStrategy bstrategy, +vacuum(List *relations, const VacuumParams params, BufferAccessStrategy bstrategy, MemoryContext vac_context, bool isTopLevel) { static bool in_vacuum = false; @@ -503,9 +503,7 @@ vacuum(List *relations, VacuumParams *params, BufferAccessStrategy bstrategy, volatile bool in_outer_xact, use_own_xacts; - Assert(params != NULL); - - stmttype = (params->options & VACOPT_VACUUM) ? "VACUUM" : "ANALYZE"; + stmttype = (params.options & VACOPT_VACUUM) ? "VACUUM" : "ANALYZE"; /* * We cannot run VACUUM inside a user transaction block; if we were inside @@ -515,7 +513,7 @@ vacuum(List *relations, VacuumParams *params, BufferAccessStrategy bstrategy, * * ANALYZE (without VACUUM) can run either way. */ - if (params->options & VACOPT_VACUUM) + if (params.options & VACOPT_VACUUM) { PreventInTransactionBlock(isTopLevel, stmttype); in_outer_xact = false; @@ -538,7 +536,7 @@ vacuum(List *relations, VacuumParams *params, BufferAccessStrategy bstrategy, * Build list of relation(s) to process, putting any new data in * vac_context for safekeeping. */ - if (params->options & VACOPT_ONLY_DATABASE_STATS) + if (params.options & VACOPT_ONLY_DATABASE_STATS) { /* We don't process any tables in this case */ Assert(relations == NIL); @@ -554,7 +552,7 @@ vacuum(List *relations, VacuumParams *params, BufferAccessStrategy bstrategy, List *sublist; MemoryContext old_context; - sublist = expand_vacuum_rel(vrel, vac_context, params->options); + sublist = expand_vacuum_rel(vrel, vac_context, params.options); old_context = MemoryContextSwitchTo(vac_context); newrels = list_concat(newrels, sublist); MemoryContextSwitchTo(old_context); @@ -562,7 +560,7 @@ vacuum(List *relations, VacuumParams *params, BufferAccessStrategy bstrategy, relations = newrels; } else - relations = get_all_vacuum_rels(vac_context, params->options); + relations = get_all_vacuum_rels(vac_context, params.options); /* * Decide whether we need to start/commit our own transactions. @@ -578,11 +576,11 @@ vacuum(List *relations, VacuumParams *params, BufferAccessStrategy bstrategy, * transaction block, and also in an autovacuum worker, use own * transactions so we can release locks sooner. */ - if (params->options & VACOPT_VACUUM) + if (params.options & VACOPT_VACUUM) use_own_xacts = true; else { - Assert(params->options & VACOPT_ANALYZE); + Assert(params.options & VACOPT_ANALYZE); if (AmAutoVacuumWorkerProcess()) use_own_xacts = true; else if (in_outer_xact) @@ -633,21 +631,13 @@ vacuum(List *relations, VacuumParams *params, BufferAccessStrategy bstrategy, { VacuumRelation *vrel = lfirst_node(VacuumRelation, cur); - if (params->options & VACOPT_VACUUM) + if (params.options & VACOPT_VACUUM) { - VacuumParams params_copy; - - /* - * vacuum_rel() scribbles on the parameters, so give it a copy - * to avoid affecting other relations. - */ - memcpy(¶ms_copy, params, sizeof(VacuumParams)); - - if (!vacuum_rel(vrel->oid, vrel->relation, ¶ms_copy, bstrategy)) + if (!vacuum_rel(vrel->oid, vrel->relation, params, bstrategy)) continue; } - if (params->options & VACOPT_ANALYZE) + if (params.options & VACOPT_ANALYZE) { /* * If using separate xacts, start one for analyze. Otherwise, @@ -711,8 +701,8 @@ vacuum(List *relations, VacuumParams *params, BufferAccessStrategy bstrategy, StartTransactionCommand(); } - if ((params->options & VACOPT_VACUUM) && - !(params->options & VACOPT_SKIP_DATABASE_STATS)) + if ((params.options & VACOPT_VACUUM) && + !(params.options & VACOPT_SKIP_DATABASE_STATS)) { /* * Update pg_database.datfrozenxid, and truncate pg_xact if possible. @@ -1110,7 +1100,7 @@ get_all_vacuum_rels(MemoryContext vac_context, int options) * minimum). */ bool -vacuum_get_cutoffs(Relation rel, const VacuumParams *params, +vacuum_get_cutoffs(Relation rel, const VacuumParams params, struct VacuumCutoffs *cutoffs) { int freeze_min_age, @@ -1126,10 +1116,10 @@ vacuum_get_cutoffs(Relation rel, const VacuumParams *params, aggressiveMXIDCutoff; /* Use mutable copies of freeze age parameters */ - freeze_min_age = params->freeze_min_age; - multixact_freeze_min_age = params->multixact_freeze_min_age; - freeze_table_age = params->freeze_table_age; - multixact_freeze_table_age = params->multixact_freeze_table_age; + freeze_min_age = params.freeze_min_age; + multixact_freeze_min_age = params.multixact_freeze_min_age; + freeze_table_age = params.freeze_table_age; + multixact_freeze_table_age = params.multixact_freeze_table_age; /* Set pg_class fields in cutoffs */ cutoffs->relfrozenxid = rel->rd_rel->relfrozenxid; @@ -2006,7 +1996,7 @@ vac_truncate_clog(TransactionId frozenXID, * At entry and exit, we are not inside a transaction. */ static bool -vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params, +vacuum_rel(Oid relid, RangeVar *relation, VacuumParams params, BufferAccessStrategy bstrategy) { LOCKMODE lmode; @@ -2019,18 +2009,16 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params, int save_nestlevel; VacuumParams toast_vacuum_params; - Assert(params != NULL); - /* * This function scribbles on the parameters, so make a copy early to * avoid affecting the TOAST table (if we do end up recursing to it). */ - memcpy(&toast_vacuum_params, params, sizeof(VacuumParams)); + memcpy(&toast_vacuum_params, ¶ms, sizeof(VacuumParams)); /* Begin a transaction for vacuuming this relation */ StartTransactionCommand(); - if (!(params->options & VACOPT_FULL)) + if (!(params.options & VACOPT_FULL)) { /* * In lazy vacuum, we can set the PROC_IN_VACUUM flag, which lets @@ -2056,7 +2044,7 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params, */ LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); MyProc->statusFlags |= PROC_IN_VACUUM; - if (params->is_wraparound) + if (params.is_wraparound) MyProc->statusFlags |= PROC_VACUUM_FOR_WRAPAROUND; ProcGlobal->statusFlags[MyProc->pgxactoff] = MyProc->statusFlags; LWLockRelease(ProcArrayLock); @@ -2080,12 +2068,12 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params, * vacuum, but just ShareUpdateExclusiveLock for concurrent vacuum. Either * way, we can be sure that no other backend is vacuuming the same table. */ - lmode = (params->options & VACOPT_FULL) ? + lmode = (params.options & VACOPT_FULL) ? AccessExclusiveLock : ShareUpdateExclusiveLock; /* open the relation and get the appropriate lock on it */ - rel = vacuum_open_relation(relid, relation, params->options, - params->log_min_duration >= 0, lmode); + rel = vacuum_open_relation(relid, relation, params.options, + params.log_min_duration >= 0, lmode); /* leave if relation could not be opened or locked */ if (!rel) @@ -2100,8 +2088,8 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params, * This is only safe to do because we hold a session lock on the main * relation that prevents concurrent deletion. */ - if (OidIsValid(params->toast_parent)) - priv_relid = params->toast_parent; + if (OidIsValid(params.toast_parent)) + priv_relid = params.toast_parent; else priv_relid = RelationGetRelid(rel); @@ -2114,7 +2102,7 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params, */ if (!vacuum_is_permitted_for_relation(priv_relid, rel->rd_rel, - params->options & ~VACOPT_ANALYZE)) + params.options & ~VACOPT_ANALYZE)) { relation_close(rel, lmode); PopActiveSnapshot(); @@ -2185,7 +2173,7 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params, * Set index_cleanup option based on index_cleanup reloption if it wasn't * specified in VACUUM command, or when running in an autovacuum worker */ - if (params->index_cleanup == VACOPTVALUE_UNSPECIFIED) + if (params.index_cleanup == VACOPTVALUE_UNSPECIFIED) { StdRdOptIndexCleanup vacuum_index_cleanup; @@ -2196,23 +2184,23 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params, ((StdRdOptions *) rel->rd_options)->vacuum_index_cleanup; if (vacuum_index_cleanup == STDRD_OPTION_VACUUM_INDEX_CLEANUP_AUTO) - params->index_cleanup = VACOPTVALUE_AUTO; + params.index_cleanup = VACOPTVALUE_AUTO; else if (vacuum_index_cleanup == STDRD_OPTION_VACUUM_INDEX_CLEANUP_ON) - params->index_cleanup = VACOPTVALUE_ENABLED; + params.index_cleanup = VACOPTVALUE_ENABLED; else { Assert(vacuum_index_cleanup == STDRD_OPTION_VACUUM_INDEX_CLEANUP_OFF); - params->index_cleanup = VACOPTVALUE_DISABLED; + params.index_cleanup = VACOPTVALUE_DISABLED; } } #ifdef USE_INJECTION_POINTS - if (params->index_cleanup == VACOPTVALUE_AUTO) + if (params.index_cleanup == VACOPTVALUE_AUTO) INJECTION_POINT("vacuum-index-cleanup-auto", NULL); - else if (params->index_cleanup == VACOPTVALUE_DISABLED) + else if (params.index_cleanup == VACOPTVALUE_DISABLED) INJECTION_POINT("vacuum-index-cleanup-disabled", NULL); - else if (params->index_cleanup == VACOPTVALUE_ENABLED) + else if (params.index_cleanup == VACOPTVALUE_ENABLED) INJECTION_POINT("vacuum-index-cleanup-enabled", NULL); #endif @@ -2222,36 +2210,36 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params, */ if (rel->rd_options != NULL && ((StdRdOptions *) rel->rd_options)->vacuum_max_eager_freeze_failure_rate >= 0) - params->max_eager_freeze_failure_rate = + params.max_eager_freeze_failure_rate = ((StdRdOptions *) rel->rd_options)->vacuum_max_eager_freeze_failure_rate; /* * Set truncate option based on truncate reloption or GUC if it wasn't * specified in VACUUM command, or when running in an autovacuum worker */ - if (params->truncate == VACOPTVALUE_UNSPECIFIED) + if (params.truncate == VACOPTVALUE_UNSPECIFIED) { StdRdOptions *opts = (StdRdOptions *) rel->rd_options; if (opts && opts->vacuum_truncate_set) { if (opts->vacuum_truncate) - params->truncate = VACOPTVALUE_ENABLED; + params.truncate = VACOPTVALUE_ENABLED; else - params->truncate = VACOPTVALUE_DISABLED; + params.truncate = VACOPTVALUE_DISABLED; } else if (vacuum_truncate) - params->truncate = VACOPTVALUE_ENABLED; + params.truncate = VACOPTVALUE_ENABLED; else - params->truncate = VACOPTVALUE_DISABLED; + params.truncate = VACOPTVALUE_DISABLED; } #ifdef USE_INJECTION_POINTS - if (params->truncate == VACOPTVALUE_AUTO) + if (params.truncate == VACOPTVALUE_AUTO) INJECTION_POINT("vacuum-truncate-auto", NULL); - else if (params->truncate == VACOPTVALUE_DISABLED) + else if (params.truncate == VACOPTVALUE_DISABLED) INJECTION_POINT("vacuum-truncate-disabled", NULL); - else if (params->truncate == VACOPTVALUE_ENABLED) + else if (params.truncate == VACOPTVALUE_ENABLED) INJECTION_POINT("vacuum-truncate-enabled", NULL); #endif @@ -2261,9 +2249,9 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params, * automatically rebuilt by cluster_rel so we shouldn't recurse to it, * unless PROCESS_MAIN is disabled. */ - if ((params->options & VACOPT_PROCESS_TOAST) != 0 && - ((params->options & VACOPT_FULL) == 0 || - (params->options & VACOPT_PROCESS_MAIN) == 0)) + if ((params.options & VACOPT_PROCESS_TOAST) != 0 && + ((params.options & VACOPT_FULL) == 0 || + (params.options & VACOPT_PROCESS_MAIN) == 0)) toast_relid = rel->rd_rel->reltoastrelid; else toast_relid = InvalidOid; @@ -2286,16 +2274,16 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params, * table is required (e.g., PROCESS_TOAST is set), we force PROCESS_MAIN * to be set when we recurse to the TOAST table. */ - if (params->options & VACOPT_PROCESS_MAIN) + if (params.options & VACOPT_PROCESS_MAIN) { /* * Do the actual work --- either FULL or "lazy" vacuum */ - if (params->options & VACOPT_FULL) + if (params.options & VACOPT_FULL) { ClusterParams cluster_params = {0}; - if ((params->options & VACOPT_VERBOSE) != 0) + if ((params.options & VACOPT_VERBOSE) != 0) cluster_params.options |= CLUOPT_VERBOSE; /* VACUUM FULL is now a variant of CLUSTER; see cluster.c */ @@ -2342,7 +2330,7 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params, toast_vacuum_params.options |= VACOPT_PROCESS_MAIN; toast_vacuum_params.toast_parent = relid; - vacuum_rel(toast_relid, NULL, &toast_vacuum_params, bstrategy); + vacuum_rel(toast_relid, NULL, toast_vacuum_params, bstrategy); } /* diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c index 451fb90a610a7..9474095f271a1 100644 --- a/src/backend/postmaster/autovacuum.c +++ b/src/backend/postmaster/autovacuum.c @@ -3190,7 +3190,7 @@ autovacuum_do_vac_analyze(autovac_table *tab, BufferAccessStrategy bstrategy) rel_list = list_make1(rel); MemoryContextSwitchTo(old_context); - vacuum(rel_list, &tab->at_params, bstrategy, vac_context, true); + vacuum(rel_list, tab->at_params, bstrategy, vac_context, true); MemoryContextDelete(vac_context); } diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index 3a9424c19c9ae..a2bd5a897f874 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -21,6 +21,7 @@ #include "access/skey.h" #include "access/table.h" /* for backward compatibility */ #include "access/tableam.h" +#include "commands/vacuum.h" #include "nodes/lockoptions.h" #include "nodes/primnodes.h" #include "storage/bufpage.h" @@ -396,9 +397,8 @@ extern void log_heap_prune_and_freeze(Relation relation, Buffer buffer, OffsetNumber *unused, int nunused); /* in heap/vacuumlazy.c */ -struct VacuumParams; extern void heap_vacuum_rel(Relation rel, - struct VacuumParams *params, BufferAccessStrategy bstrategy); + const VacuumParams params, BufferAccessStrategy bstrategy); /* in heap/heapam_visibility.c */ extern bool HeapTupleSatisfiesVisibility(HeapTuple htup, Snapshot snapshot, diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h index 8713e12cbfb99..1c9e802a6b128 100644 --- a/src/include/access/tableam.h +++ b/src/include/access/tableam.h @@ -20,6 +20,7 @@ #include "access/relscan.h" #include "access/sdir.h" #include "access/xact.h" +#include "commands/vacuum.h" #include "executor/tuptable.h" #include "storage/read_stream.h" #include "utils/rel.h" @@ -36,7 +37,6 @@ extern PGDLLIMPORT bool synchronize_seqscans; struct BulkInsertStateData; struct IndexInfo; struct SampleScanState; -struct VacuumParams; struct ValidateIndexState; /* @@ -645,7 +645,7 @@ typedef struct TableAmRoutine * integrate with autovacuum's scheduling. */ void (*relation_vacuum) (Relation rel, - struct VacuumParams *params, + const VacuumParams params, BufferAccessStrategy bstrategy); /* @@ -1664,7 +1664,7 @@ table_relation_copy_for_cluster(Relation OldTable, Relation NewTable, * routine, even if (for ANALYZE) it is part of the same VACUUM command. */ static inline void -table_relation_vacuum(Relation rel, struct VacuumParams *params, +table_relation_vacuum(Relation rel, const VacuumParams params, BufferAccessStrategy bstrategy) { rel->rd_tableam->relation_vacuum(rel, params, bstrategy); diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h index bc37a80dc74fa..14eeccbd71850 100644 --- a/src/include/commands/vacuum.h +++ b/src/include/commands/vacuum.h @@ -336,7 +336,7 @@ extern PGDLLIMPORT int64 parallel_vacuum_worker_delay_ns; /* in commands/vacuum.c */ extern void ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel); -extern void vacuum(List *relations, VacuumParams *params, +extern void vacuum(List *relations, const VacuumParams params, BufferAccessStrategy bstrategy, MemoryContext vac_context, bool isTopLevel); extern void vac_open_indexes(Relation relation, LOCKMODE lockmode, @@ -357,7 +357,7 @@ extern void vac_update_relstats(Relation relation, bool *frozenxid_updated, bool *minmulti_updated, bool in_outer_xact); -extern bool vacuum_get_cutoffs(Relation rel, const VacuumParams *params, +extern bool vacuum_get_cutoffs(Relation rel, const VacuumParams params, struct VacuumCutoffs *cutoffs); extern bool vacuum_xid_failsafe_check(const struct VacuumCutoffs *cutoffs); extern void vac_update_datfrozenxid(void); @@ -398,7 +398,7 @@ extern void parallel_vacuum_main(dsm_segment *seg, shm_toc *toc); /* in commands/analyze.c */ extern void analyze_rel(Oid relid, RangeVar *relation, - VacuumParams *params, List *va_cols, bool in_outer_xact, + const VacuumParams params, List *va_cols, bool in_outer_xact, BufferAccessStrategy bstrategy); extern bool std_typanalyze(VacAttrStats *stats); From c5c4fbb4d482b87c2a6c90337f3b657b2d0002ca Mon Sep 17 00:00:00 2001 From: Daniel Gustafsson Date: Mon, 30 Jun 2025 10:12:31 +0200 Subject: [PATCH 176/602] doc: Fix typo in pg_sync_replication_slots documentation Commit 1546e17f9d0 accidentally misspelled additionally as additionaly. Backpatch to v17 to match where the original commit was backpatched. Author: Daniel Gustafsson Backpatch-through: 17 --- doc/src/sgml/func.sgml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 224d4fe5a9f95..298791858be30 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -29981,7 +29981,7 @@ postgres=# SELECT '0/0'::pg_lsn + pd.segment_number * ps.setting::int + :offset logical decoding and must be dropped after promotion. See for details. Note that this function is primarily intended for testing and - debugging purposes and should be used with caution. Additionaly, + debugging purposes and should be used with caution. Additionally, this function cannot be executed if sync_replication_slots is enabled and the slotsync From 2e640a0fa224e4233220252b360efd33c98b3e90 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Mon, 30 Jun 2025 10:32:26 +0200 Subject: [PATCH 177/602] doc: Some copy-editing around prefix operators When postfix operators where dropped in 1ed6b8956, the CREATE OPERATOR docs were not updated to make the RIGHTARG argument mandatory in the grammar. While at it, make the RIGHTARG docs more concise. Also, the operator docs were mentioning "infix" in the introduction, while using "binary" everywhere else. Author: Christoph Berg Discussion: https://www.postgresql.org/message-id/flat/aAtpbnQphv4LWAye@msg.df7cb.de --- doc/src/sgml/ref/create_operator.sgml | 6 +++--- doc/src/sgml/xoper.sgml | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/src/sgml/ref/create_operator.sgml b/doc/src/sgml/ref/create_operator.sgml index 3553d36454185..d2ffb1b2a500f 100644 --- a/doc/src/sgml/ref/create_operator.sgml +++ b/doc/src/sgml/ref/create_operator.sgml @@ -23,7 +23,7 @@ PostgreSQL documentation CREATE OPERATOR name ( {FUNCTION|PROCEDURE} = function_name - [, LEFTARG = left_type ] [, RIGHTARG = right_type ] + [, LEFTARG = left_type ] , RIGHTARG = right_type [, COMMUTATOR = com_op ] [, NEGATOR = neg_op ] [, RESTRICT = res_proc ] [, JOIN = join_proc ] [, HASHES ] [, MERGES ] @@ -88,8 +88,8 @@ CREATE OPERATOR name ( For binary operators, both LEFTARG and - RIGHTARG must be defined. For prefix operators only - RIGHTARG should be defined. + RIGHTARG must be defined. For prefix operators, only + RIGHTARG must be defined. The function_name function must have been previously defined using CREATE FUNCTION and must be defined to accept the correct number diff --git a/doc/src/sgml/xoper.sgml b/doc/src/sgml/xoper.sgml index 954a90d77d0ed..853b07a9f1489 100644 --- a/doc/src/sgml/xoper.sgml +++ b/doc/src/sgml/xoper.sgml @@ -21,7 +21,7 @@ PostgreSQL supports prefix - and infix operators. Operators can be + and binary (or infix) operators. Operators can be overloaded;overloadingoperators that is, the same operator name can be used for different operators that have different numbers and types of operands. When a query is From 3431e3e4aa3a33e8411f15e76c284cdd4c54ca28 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Mon, 30 Jun 2025 10:45:08 +0200 Subject: [PATCH 178/602] pgbench: Use standard option handling test routines Run program_XXX tests instead of its own tests. This ensures consistency with the test suites of other programs and enforces common policies, such as help line length. Author: Hayato Kuroda Reviewed-by: Fujii Masao Discussion: https://www.postgresql.org/message-id/flat/OSCPR01MB14966247015B7E3D8D340D022F56FA@OSCPR01MB14966.jpnprd01.prod.outlook.com --- src/bin/pgbench/t/002_pgbench_no_server.pl | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/src/bin/pgbench/t/002_pgbench_no_server.pl b/src/bin/pgbench/t/002_pgbench_no_server.pl index f975c73dd758a..2cc59cc8140c3 100644 --- a/src/bin/pgbench/t/002_pgbench_no_server.pl +++ b/src/bin/pgbench/t/002_pgbench_no_server.pl @@ -233,21 +233,9 @@ sub pgbench_scripts 'pgbench option error: ' . $name); } -# Help -pgbench( - '--help', 0, - [ - qr{benchmarking tool for PostgreSQL}, - qr{Usage}, - qr{Initialization options:}, - qr{Common options:}, - qr{Report bugs to} - ], - [qr{^$}], - 'pgbench help'); - -# Version -pgbench('-V', 0, [qr{^pgbench .PostgreSQL. }], [qr{^$}], 'pgbench version'); +program_help_ok('pgbench'); +program_version_ok('pgbench'); +program_options_handling_ok('pgbench'); # list of builtins pgbench( From 960135114629bc89da0dd1d839541098c7e6401a Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Mon, 30 Jun 2025 11:28:11 +0200 Subject: [PATCH 179/602] doc: explain pgstatindex fragmentation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It was quite hard to guess what leaf_fragmentation meant without looking at pgstattuple's code. This patch aims to give to the user a better idea of what it means. Author: Frédéric Yhuel Author: Laurenz Albe Reviewed-by: Bertrand Drouvot Reviewed-by: Benoit Lobréau Discussion: https://postgr.es/m/bf110561-f774-4957-a890-bb6fab6804e0%40dalibo.com Discussion: https://postgr.es/m/4c5dee3a-8381-4e0f-b882-d1bd950e8972@dalibo.com --- doc/src/sgml/pgstattuple.sgml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/doc/src/sgml/pgstattuple.sgml b/doc/src/sgml/pgstattuple.sgml index 4071da4ed941a..c747a5818ab7d 100644 --- a/doc/src/sgml/pgstattuple.sgml +++ b/doc/src/sgml/pgstattuple.sgml @@ -270,6 +270,15 @@ leaf_fragmentation | 0 page than is accounted for by internal_pages + leaf_pages + empty_pages + deleted_pages, because it also includes the index's metapage. + avg_leaf_density is the fraction of the index size that + is taken up by user data. Since indexes have a default fillfactor of 90, + this should be around 90 for newly built indexes of non-negligible size, + but usually deteriorates over time. + leaf_fragmentation represents a measure of disorder. + A higher leaf_fragmentation indicates that the + physical order of the index leaf pages increasingly deviates from their + logical order. This can have a significant impact if a large part + of the index is read from disk. From a4c10de9291291bce3dd2b81bd8b5f0b98649244 Mon Sep 17 00:00:00 2001 From: Fujii Masao Date: Mon, 30 Jun 2025 18:36:24 +0900 Subject: [PATCH 180/602] psql: Improve tab completion for COPY command. Previously, tab completion for COPY only suggested plain tables and partitioned tables, even though materialized views are also valid for COPY TO (since commit 534874fac0b), and foreign tables are valid for COPY FROM. This commit enhances tab completion for COPY to also include materialized views and foreign tables. Views with INSTEAD OF INSERT triggers are supported with COPY FROM but rarely used, so plain views are intentionally excluded from completion. Author: jian he Co-authored-by: Fujii Masao Reviewed-by: Kirill Reshke Reviewed-by: David G. Johnston Discussion: https://postgr.es/m/CACJufxFxnSkikp+GormAGHcMTX1YH2HRXW1+3dJM9w7yY9hdsg@mail.gmail.com --- src/bin/psql/tab-complete.in.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/bin/psql/tab-complete.in.c b/src/bin/psql/tab-complete.in.c index 908eef97c6e28..8c2ea0b95870a 100644 --- a/src/bin/psql/tab-complete.in.c +++ b/src/bin/psql/tab-complete.in.c @@ -889,6 +889,14 @@ static const SchemaQuery Query_for_list_of_analyzables = { .result = "c.relname", }; +/* + * Relations supporting COPY TO/FROM are currently almost the same as + * those supporting ANALYZE. Although views with INSTEAD OF INSERT triggers + * can be used with COPY FROM, they are rarely used for this purpose, + * so plain views are intentionally excluded from this tab completion. + */ +#define Query_for_list_of_tables_for_copy Query_for_list_of_analyzables + /* Relations supporting index creation */ static const SchemaQuery Query_for_list_of_indexables = { .catname = "pg_catalog.pg_class c", @@ -3255,7 +3263,7 @@ match_previous_words(int pattern_id, * backslash command). */ else if (Matches("COPY|\\copy")) - COMPLETE_WITH_SCHEMA_QUERY_PLUS(Query_for_list_of_tables, "("); + COMPLETE_WITH_SCHEMA_QUERY_PLUS(Query_for_list_of_tables_for_copy, "("); /* Complete COPY ( with legal query commands */ else if (Matches("COPY|\\copy", "(")) COMPLETE_WITH("SELECT", "TABLE", "VALUES", "INSERT INTO", "UPDATE", "DELETE FROM", "MERGE INTO", "WITH"); From a6a4641252ed166ba187d7fbe0504ddb5a5f0e33 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Mon, 30 Jun 2025 11:38:18 +0200 Subject: [PATCH 181/602] Fix whitespace --- src/tools/git_changelog | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tools/git_changelog b/src/tools/git_changelog index dccf938685a3a..c25e399a87f5d 100755 --- a/src/tools/git_changelog +++ b/src/tools/git_changelog @@ -59,7 +59,7 @@ require IPC::Open2; # (We could get this from "git branches", but not worth the trouble.) # NB: master must be first! my @BRANCHES = qw(master - REL_18_STABLE + REL_18_STABLE REL_17_STABLE REL_16_STABLE REL_15_STABLE REL_14_STABLE REL_13_STABLE REL_12_STABLE REL_11_STABLE REL_10_STABLE REL9_6_STABLE REL9_5_STABLE REL9_4_STABLE REL9_3_STABLE REL9_2_STABLE REL9_1_STABLE REL9_0_STABLE From cc2ac0e6f99e4efc3ae5710010ff35e646990a60 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Mon, 30 Jun 2025 12:00:00 +0200 Subject: [PATCH 182/602] Remove unused #include's in src/backend/utils/adt/* Author: Aleksander Alekseev Reviewed-by: Tom Lane Discussion: https://postgr.es/m/CAJ7c6TOowVbR-0NEvvDm6a_mag18krR0XJ2FKrc9DHXj7hFRtQ%40mail.gmail.com --- src/backend/utils/adt/network.c | 2 -- src/backend/utils/adt/network_spgist.c | 1 - src/backend/utils/adt/pg_locale.c | 1 - src/backend/utils/adt/pg_locale_builtin.c | 1 - src/backend/utils/adt/ri_triggers.c | 2 -- src/backend/utils/adt/selfuncs.c | 1 - 6 files changed, 8 deletions(-) diff --git a/src/backend/utils/adt/network.c b/src/backend/utils/adt/network.c index f03fcc1147bb0..9fd211b2d4576 100644 --- a/src/backend/utils/adt/network.c +++ b/src/backend/utils/adt/network.c @@ -12,8 +12,6 @@ #include #include -#include "access/stratnum.h" -#include "catalog/pg_opfamily.h" #include "catalog/pg_type.h" #include "common/hashfn.h" #include "common/ip.h" diff --git a/src/backend/utils/adt/network_spgist.c b/src/backend/utils/adt/network_spgist.c index a84747d927586..602276a35c3ea 100644 --- a/src/backend/utils/adt/network_spgist.c +++ b/src/backend/utils/adt/network_spgist.c @@ -37,7 +37,6 @@ #include "catalog/pg_type.h" #include "utils/fmgrprotos.h" #include "utils/inet.h" -#include "varatt.h" static int inet_spg_node_number(const inet *val, int commonbits); diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c index f5e31c433a0de..bf1afb24d7da9 100644 --- a/src/backend/utils/adt/pg_locale.c +++ b/src/backend/utils/adt/pg_locale.c @@ -41,7 +41,6 @@ #include "mb/pg_wchar.h" #include "miscadmin.h" #include "utils/builtins.h" -#include "utils/formatting.h" #include "utils/guc_hooks.h" #include "utils/lsyscache.h" #include "utils/memutils.h" diff --git a/src/backend/utils/adt/pg_locale_builtin.c b/src/backend/utils/adt/pg_locale_builtin.c index f51768830cd7b..ce4914a76a12e 100644 --- a/src/backend/utils/adt/pg_locale_builtin.c +++ b/src/backend/utils/adt/pg_locale_builtin.c @@ -18,7 +18,6 @@ #include "mb/pg_wchar.h" #include "miscadmin.h" #include "utils/builtins.h" -#include "utils/memutils.h" #include "utils/pg_locale.h" #include "utils/syscache.h" diff --git a/src/backend/utils/adt/ri_triggers.c b/src/backend/utils/adt/ri_triggers.c index 6239900fa2892..059fc5ebf601a 100644 --- a/src/backend/utils/adt/ri_triggers.c +++ b/src/backend/utils/adt/ri_triggers.c @@ -30,7 +30,6 @@ #include "access/xact.h" #include "catalog/pg_collation.h" #include "catalog/pg_constraint.h" -#include "catalog/pg_proc.h" #include "commands/trigger.h" #include "executor/executor.h" #include "executor/spi.h" @@ -46,7 +45,6 @@ #include "utils/inval.h" #include "utils/lsyscache.h" #include "utils/memutils.h" -#include "utils/rangetypes.h" #include "utils/rel.h" #include "utils/rls.h" #include "utils/ruleutils.h" diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index a96b1b9c0bc69..1e0f2de0336b0 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -103,7 +103,6 @@ #include "access/table.h" #include "access/tableam.h" #include "access/visibilitymap.h" -#include "catalog/pg_am.h" #include "catalog/pg_collation.h" #include "catalog/pg_operator.h" #include "catalog/pg_statistic.h" From 40a96cd1484fdf3ab57e8cb7b09767ec7a7f73b1 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Mon, 30 Jun 2025 12:23:33 +0200 Subject: [PATCH 183/602] pgflex: propagate environment to flex subprocess MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Python's subprocess.run docs say that if the env argument is not None, it will be used "instead of the default behavior of inheriting the current process’ environment". However, the environment should be preserved, only adding FLEX_TMP_DIR to it. Author: Javier Maestro Discussion: https://www.postgresql.org/message-id/flat/CABvji06GUpmrTqqiCr6_F9vRL2-JUSVAh8ChgWa6k47FUCvYmA%40mail.gmail.com --- src/tools/pgflex | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tools/pgflex b/src/tools/pgflex index 3986b06874e75..b8d9aa0086fbb 100755 --- a/src/tools/pgflex +++ b/src/tools/pgflex @@ -48,7 +48,7 @@ os.chdir(args.privatedir) # contents. Set FLEX_TMP_DIR to the target private directory to avoid # that. That environment variable isn't consulted on other platforms, so we # don't even need to make this conditional. -env = {'FLEX_TMP_DIR': args.privatedir} +os.environ['FLEX_TMP_DIR'] = args.privatedir # build flex invocation command = [args.flex, '-o', args.output_file] @@ -58,7 +58,7 @@ command += args.flex_flags command += [args.input_file] # create .c file from .l file -sp = subprocess.run(command, env=env) +sp = subprocess.run(command) if sp.returncode != 0: sys.exit(sp.returncode) From c3e28e9fd936b83dbb6dfb5003b6221d98f8469c Mon Sep 17 00:00:00 2001 From: Andrew Dunstan Date: Mon, 30 Jun 2025 09:49:31 -0400 Subject: [PATCH 184/602] Avoid uninitialized value error in TAP tests' Cluster->psql If the method is called in scalar context and we didn't pass in a stderr handle, one won't be created. However, some error paths assume that it exists, so in this case create a dummy stderr to avoid the resulting perl error. Per gripe from Oleg Tselebrovskiy and adapted from his patch. Discussion: https://postgr.es/m/378eac5de4b8ecb5be7bcdf2db9d2c4d@postgrespro.ru --- src/test/perl/PostgreSQL/Test/Cluster.pm | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/test/perl/PostgreSQL/Test/Cluster.pm b/src/test/perl/PostgreSQL/Test/Cluster.pm index 49b2c86b29cbf..301766d2ed93c 100644 --- a/src/test/perl/PostgreSQL/Test/Cluster.pm +++ b/src/test/perl/PostgreSQL/Test/Cluster.pm @@ -2199,6 +2199,14 @@ sub psql $ret = $?; }; my $exc_save = $@; + + # we need a dummy $stderr from hereon, if we didn't collect it + if (! defined $stderr) + { + my $errtxt = ""; + $stderr = \$errtxt; + } + if ($exc_save) { From f20a347e1a613cfc9053e7bc3d254608ae968386 Mon Sep 17 00:00:00 2001 From: Andres Freund Date: Mon, 30 Jun 2025 10:20:14 -0400 Subject: [PATCH 185/602] aio: Fix reference to outdated name Reported-by: Antonin Houska Author: Antonin Houska Discussion: https://postgr.es/m/5250.1751266701@localhost Backpatch-through: 18, where da7226993fd4 introduced this --- src/include/storage/aio_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/include/storage/aio_types.h b/src/include/storage/aio_types.h index 181833660778e..afee85c787b44 100644 --- a/src/include/storage/aio_types.h +++ b/src/include/storage/aio_types.h @@ -107,7 +107,7 @@ typedef struct PgAioResult /* of type PgAioResultStatus, see above */ uint32 status:PGAIO_RESULT_STATUS_BITS; - /* meaning defined by callback->error */ + /* meaning defined by callback->report */ uint32 error_data:PGAIO_RESULT_ERROR_BITS; int32 result; From bd09f024a1bbdd7a7e2ca944595a9d4b6c90fb83 Mon Sep 17 00:00:00 2001 From: Nathan Bossart Date: Mon, 30 Jun 2025 15:38:54 -0500 Subject: [PATCH 186/602] Add new OID alias type regdatabase. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This provides a convenient way to look up a database's OID. For example, the query SELECT * FROM pg_shdepend WHERE dbid = (SELECT oid FROM pg_database WHERE datname = current_database()); can now be simplified to SELECT * FROM pg_shdepend WHERE dbid = current_database()::regdatabase; Like the regrole type, regdatabase has cluster-wide scope, so we disallow regdatabase constants from appearing in stored expressions. Bumps catversion. Author: Ian Lawrence Barwick Reviewed-by: Greg Sabino Mullane Reviewed-by: Jian He Reviewed-by: Fabrízio de Royes Mello Reviewed-by: Tom Lane Discussion: https://postgr.es/m/aBpjJhyHpM2LYcG0%40nathan --- contrib/postgres_fdw/deparse.c | 6 + doc/src/sgml/datatype.sgml | 15 +- doc/src/sgml/func.sgml | 17 +++ doc/src/sgml/ref/pgupgrade.sgml | 3 +- src/backend/bootstrap/bootstrap.c | 2 + src/backend/catalog/dependency.c | 11 ++ src/backend/utils/adt/regproc.c | 118 +++++++++++++++ src/backend/utils/adt/selfuncs.c | 2 + src/backend/utils/cache/catcache.c | 1 + src/bin/pg_upgrade/check.c | 1 + src/include/catalog/catversion.h | 2 +- src/include/catalog/pg_cast.dat | 14 ++ src/include/catalog/pg_proc.dat | 17 +++ src/include/catalog/pg_type.dat | 5 + src/test/regress/expected/regproc.out | 174 ++++++++++++++++++++++ src/test/regress/expected/type_sanity.out | 1 + src/test/regress/sql/regproc.sql | 38 +++++ src/test/regress/sql/type_sanity.sql | 1 + 18 files changed, 424 insertions(+), 4 deletions(-) diff --git a/contrib/postgres_fdw/deparse.c b/contrib/postgres_fdw/deparse.c index d9970dd675336..9351835b5e4f8 100644 --- a/contrib/postgres_fdw/deparse.c +++ b/contrib/postgres_fdw/deparse.c @@ -39,6 +39,7 @@ #include "catalog/pg_aggregate.h" #include "catalog/pg_authid.h" #include "catalog/pg_collation.h" +#include "catalog/pg_database.h" #include "catalog/pg_namespace.h" #include "catalog/pg_operator.h" #include "catalog/pg_opfamily.h" @@ -455,6 +456,11 @@ foreign_expr_walker(Node *node, AuthIdRelationId, fpinfo)) return false; break; + case REGDATABASEOID: + if (!is_shippable(DatumGetObjectId(c->constvalue), + DatabaseRelationId, fpinfo)) + return false; + break; } } diff --git a/doc/src/sgml/datatype.sgml b/doc/src/sgml/datatype.sgml index 09309ba0390b7..49a7c180a803e 100644 --- a/doc/src/sgml/datatype.sgml +++ b/doc/src/sgml/datatype.sgml @@ -4737,6 +4737,10 @@ INSERT INTO mytable VALUES(-1); -- fails regconfig + + regdatabase + + regdictionary @@ -4878,6 +4882,13 @@ SELECT * FROM pg_attribute english + + regdatabase + pg_database + database name + template1 + + regdictionary pg_ts_dict @@ -5049,8 +5060,8 @@ WHERE ... be dropped without first removing the default expression. The alternative of nextval('my_seq'::text) does not create a dependency. - (regrole is an exception to this property. Constants of this - type are not allowed in stored expressions.) + (regdatabase and regrole are exceptions to this + property. Constants of these types are not allowed in stored expressions.) diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 298791858be30..126b8cfbad81e 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -26750,6 +26750,23 @@ SELECT currval(pg_get_serial_sequence('sometable', 'id')); + + + + to_regdatabase + + to_regdatabase ( text ) + regdatabase + + + Translates a textual database name to its OID. A similar result is + obtained by casting the string to type regdatabase (see + ); however, this function will return + NULL rather than throwing an error if the name is + not found. + + + diff --git a/doc/src/sgml/ref/pgupgrade.sgml b/doc/src/sgml/ref/pgupgrade.sgml index aeeed297437e6..5ddf3a8ae9257 100644 --- a/doc/src/sgml/ref/pgupgrade.sgml +++ b/doc/src/sgml/ref/pgupgrade.sgml @@ -1110,7 +1110,8 @@ psql --username=postgres --file=script.sql postgres regproc regprocedure - (regclass, regrole, and regtype can be upgraded.) + (regclass, regdatabase, regrole, and + regtype can be upgraded.) diff --git a/src/backend/bootstrap/bootstrap.c b/src/backend/bootstrap/bootstrap.c index 6db864892d0dd..fc8638c1b61b6 100644 --- a/src/backend/bootstrap/bootstrap.c +++ b/src/backend/bootstrap/bootstrap.c @@ -109,6 +109,8 @@ static const struct typinfo TypInfo[] = { F_REGROLEIN, F_REGROLEOUT}, {"regnamespace", REGNAMESPACEOID, 0, 4, true, TYPALIGN_INT, TYPSTORAGE_PLAIN, InvalidOid, F_REGNAMESPACEIN, F_REGNAMESPACEOUT}, + {"regdatabase", REGDATABASEOID, 0, 4, true, TYPALIGN_INT, TYPSTORAGE_PLAIN, InvalidOid, + F_REGDATABASEIN, F_REGDATABASEOUT}, {"text", TEXTOID, 0, -1, false, TYPALIGN_INT, TYPSTORAGE_EXTENDED, DEFAULT_COLLATION_OID, F_TEXTIN, F_TEXTOUT}, {"oid", OIDOID, 0, 4, true, TYPALIGN_INT, TYPSTORAGE_PLAIN, InvalidOid, diff --git a/src/backend/catalog/dependency.c b/src/backend/catalog/dependency.c index 18316a3968bcf..7dded634eb810 100644 --- a/src/backend/catalog/dependency.c +++ b/src/backend/catalog/dependency.c @@ -1850,6 +1850,17 @@ find_expr_references_walker(Node *node, errmsg("constant of the type %s cannot be used here", "regrole"))); break; + + /* + * Dependencies for regdatabase should be shared among all + * databases, so explicitly inhibit to have dependencies. + */ + case REGDATABASEOID: + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("constant of the type %s cannot be used here", + "regdatabase"))); + break; } } return false; diff --git a/src/backend/utils/adt/regproc.c b/src/backend/utils/adt/regproc.c index 5ee608a2b3921..b8bbe95e82eb8 100644 --- a/src/backend/utils/adt/regproc.c +++ b/src/backend/utils/adt/regproc.c @@ -30,6 +30,7 @@ #include "catalog/pg_ts_config.h" #include "catalog/pg_ts_dict.h" #include "catalog/pg_type.h" +#include "commands/dbcommands.h" #include "lib/stringinfo.h" #include "mb/pg_wchar.h" #include "miscadmin.h" @@ -1763,6 +1764,123 @@ regnamespacesend(PG_FUNCTION_ARGS) return oidsend(fcinfo); } +/* + * regdatabasein - converts database name to database OID + * + * We also accept a numeric OID, for symmetry with the output routine. + * + * '-' signifies unknown (OID 0). In all other cases, the input must + * match an existing pg_database entry. + */ +Datum +regdatabasein(PG_FUNCTION_ARGS) +{ + char *db_name_or_oid = PG_GETARG_CSTRING(0); + Node *escontext = fcinfo->context; + Oid result; + List *names; + + /* Handle "-" or numeric OID */ + if (parseDashOrOid(db_name_or_oid, &result, escontext)) + PG_RETURN_OID(result); + + /* The rest of this wouldn't work in bootstrap mode */ + if (IsBootstrapProcessingMode()) + elog(ERROR, "regdatabase values must be OIDs in bootstrap mode"); + + /* Normal case: see if the name matches any pg_database entry. */ + names = stringToQualifiedNameList(db_name_or_oid, escontext); + if (names == NIL) + PG_RETURN_NULL(); + + if (list_length(names) != 1) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_NAME), + errmsg("invalid name syntax"))); + + result = get_database_oid(strVal(linitial(names)), true); + + if (!OidIsValid(result)) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("database \"%s\" does not exist", + strVal(linitial(names))))); + + PG_RETURN_OID(result); +} + +/* + * to_regdatabase - converts database name to database OID + * + * If the name is not found, we return NULL. + */ +Datum +to_regdatabase(PG_FUNCTION_ARGS) +{ + char *db_name = text_to_cstring(PG_GETARG_TEXT_PP(0)); + Datum result; + ErrorSaveContext escontext = {T_ErrorSaveContext}; + + if (!DirectInputFunctionCallSafe(regdatabasein, db_name, + InvalidOid, -1, + (Node *) &escontext, + &result)) + PG_RETURN_NULL(); + PG_RETURN_DATUM(result); +} + +/* + * regdatabaseout - converts database OID to database name + */ +Datum +regdatabaseout(PG_FUNCTION_ARGS) +{ + Oid dboid = PG_GETARG_OID(0); + char *result; + + if (dboid == InvalidOid) + { + result = pstrdup("-"); + PG_RETURN_CSTRING(result); + } + + result = get_database_name(dboid); + + if (result) + { + /* pstrdup is not really necessary, but it avoids a compiler warning */ + result = pstrdup(quote_identifier(result)); + } + else + { + /* If OID doesn't match any database, return it numerically */ + result = (char *) palloc(NAMEDATALEN); + snprintf(result, NAMEDATALEN, "%u", dboid); + } + + PG_RETURN_CSTRING(result); +} + +/* + * regdatabaserecv - converts external binary format to regdatabase + */ +Datum +regdatabaserecv(PG_FUNCTION_ARGS) +{ + /* Exactly the same as oidrecv, so share code */ + return oidrecv(fcinfo); +} + +/* + * regdatabasesend - converts regdatabase to binary format + */ +Datum +regdatabasesend(PG_FUNCTION_ARGS) +{ + /* Exactly the same as oidsend, so share code */ + return oidsend(fcinfo); +} + /* * text_regclass: convert text to regclass * diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 1e0f2de0336b0..ce6a626eba283 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -4619,6 +4619,7 @@ convert_to_scalar(Datum value, Oid valuetypid, Oid collid, double *scaledvalue, case REGDICTIONARYOID: case REGROLEOID: case REGNAMESPACEOID: + case REGDATABASEOID: *scaledvalue = convert_numeric_to_scalar(value, valuetypid, &failure); *scaledlobound = convert_numeric_to_scalar(lobound, boundstypid, @@ -4751,6 +4752,7 @@ convert_numeric_to_scalar(Datum value, Oid typid, bool *failure) case REGDICTIONARYOID: case REGROLEOID: case REGNAMESPACEOID: + case REGDATABASEOID: /* we can treat OIDs as integers... */ return (double) DatumGetObjectId(value); } diff --git a/src/backend/utils/cache/catcache.c b/src/backend/utils/cache/catcache.c index 657648996c235..d1b25214376ed 100644 --- a/src/backend/utils/cache/catcache.c +++ b/src/backend/utils/cache/catcache.c @@ -317,6 +317,7 @@ GetCCHashEqFuncs(Oid keytype, CCHashFN *hashfunc, RegProcedure *eqfunc, CCFastEq case REGDICTIONARYOID: case REGROLEOID: case REGNAMESPACEOID: + case REGDATABASEOID: *hashfunc = int4hashfast; *fasteqfunc = int4eqfast; *eqfunc = F_OIDEQ; diff --git a/src/bin/pg_upgrade/check.c b/src/bin/pg_upgrade/check.c index 81865cd3e4859..fb063a2de4286 100644 --- a/src/bin/pg_upgrade/check.c +++ b/src/bin/pg_upgrade/check.c @@ -168,6 +168,7 @@ static DataTypesUsageChecks data_types_usage_checks[] = /* pg_class.oid is preserved, so 'regclass' is OK */ " 'regcollation', " " 'regconfig', " + /* pg_database.oid is preserved, so 'regdatabase' is OK */ " 'regdictionary', " " 'regnamespace', " " 'regoper', " diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index 479629825f5b7..ff9ffd9d47498 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -57,6 +57,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 202506291 +#define CATALOG_VERSION_NO 202506301 #endif diff --git a/src/include/catalog/pg_cast.dat b/src/include/catalog/pg_cast.dat index ab46be606f03d..fbfd669587f07 100644 --- a/src/include/catalog/pg_cast.dat +++ b/src/include/catalog/pg_cast.dat @@ -281,6 +281,20 @@ castcontext => 'a', castmethod => 'f' }, { castsource => 'regnamespace', casttarget => 'int4', castfunc => '0', castcontext => 'a', castmethod => 'b' }, +{ castsource => 'oid', casttarget => 'regdatabase', castfunc => '0', + castcontext => 'i', castmethod => 'b' }, +{ castsource => 'regdatabase', casttarget => 'oid', castfunc => '0', + castcontext => 'i', castmethod => 'b' }, +{ castsource => 'int8', casttarget => 'regdatabase', castfunc => 'oid', + castcontext => 'i', castmethod => 'f' }, +{ castsource => 'int2', casttarget => 'regdatabase', castfunc => 'int4(int2)', + castcontext => 'i', castmethod => 'f' }, +{ castsource => 'int4', casttarget => 'regdatabase', castfunc => '0', + castcontext => 'i', castmethod => 'b' }, +{ castsource => 'regdatabase', casttarget => 'int8', castfunc => 'int8(oid)', + castcontext => 'a', castmethod => 'f' }, +{ castsource => 'regdatabase', casttarget => 'int4', castfunc => '0', + castcontext => 'a', castmethod => 'b' }, # String category { castsource => 'text', casttarget => 'bpchar', castfunc => '0', diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index fb4f7f50350ad..d4650947c63a5 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -7455,6 +7455,17 @@ prorettype => 'regnamespace', proargtypes => 'text', prosrc => 'to_regnamespace' }, +{ oid => '8321', descr => 'I/O', + proname => 'regdatabasein', provolatile => 's', prorettype => 'regdatabase', + proargtypes => 'cstring', prosrc => 'regdatabasein' }, +{ oid => '8322', descr => 'I/O', + proname => 'regdatabaseout', provolatile => 's', prorettype => 'cstring', + proargtypes => 'regdatabase', prosrc => 'regdatabaseout' }, +{ oid => '8323', descr => 'convert database name to regdatabase', + proname => 'to_regdatabase', provolatile => 's', + prorettype => 'regdatabase', proargtypes => 'text', + prosrc => 'to_regdatabase' }, + { oid => '6210', descr => 'test whether string is valid input for data type', proname => 'pg_input_is_valid', provolatile => 's', prorettype => 'bool', proargtypes => 'text text', prosrc => 'pg_input_is_valid' }, @@ -8313,6 +8324,12 @@ { oid => '4088', descr => 'I/O', proname => 'regnamespacesend', prorettype => 'bytea', proargtypes => 'regnamespace', prosrc => 'regnamespacesend' }, +{ oid => '8324', descr => 'I/O', + proname => 'regdatabaserecv', prorettype => 'regdatabase', + proargtypes => 'internal', prosrc => 'regdatabaserecv' }, +{ oid => '8325', descr => 'I/O', + proname => 'regdatabasesend', prorettype => 'bytea', + proargtypes => 'regdatabase', prosrc => 'regdatabasesend' }, { oid => '2456', descr => 'I/O', proname => 'bit_recv', prorettype => 'bit', proargtypes => 'internal oid int4', prosrc => 'bit_recv' }, diff --git a/src/include/catalog/pg_type.dat b/src/include/catalog/pg_type.dat index 6dca77e0a22f7..29e4ffffc9806 100644 --- a/src/include/catalog/pg_type.dat +++ b/src/include/catalog/pg_type.dat @@ -399,6 +399,11 @@ typinput => 'regnamespacein', typoutput => 'regnamespaceout', typreceive => 'regnamespacerecv', typsend => 'regnamespacesend', typalign => 'i' }, +{ oid => '8326', array_type_oid => '8327', descr => 'registered database', + typname => 'regdatabase', typlen => '4', typbyval => 't', typcategory => 'N', + typinput => 'regdatabasein', typoutput => 'regdatabaseout', + typreceive => 'regdatabaserecv', typsend => 'regdatabasesend', + typalign => 'i' }, # uuid { oid => '2950', array_type_oid => '2951', descr => 'UUID', diff --git a/src/test/regress/expected/regproc.out b/src/test/regress/expected/regproc.out index 97b917502cabb..84c84aef4207f 100644 --- a/src/test/regress/expected/regproc.out +++ b/src/test/regress/expected/regproc.out @@ -192,6 +192,18 @@ SELECT regnamespace('"pg_catalog"'); pg_catalog (1 row) +SELECT regdatabase('template1'); + regdatabase +------------- + template1 +(1 row) + +SELECT regdatabase('"template1"'); + regdatabase +------------- + template1 +(1 row) + SELECT to_regrole('regress_regrole_test'); to_regrole ---------------------- @@ -216,6 +228,132 @@ SELECT to_regnamespace('"pg_catalog"'); pg_catalog (1 row) +SELECT to_regdatabase('template1'); + to_regdatabase +---------------- + template1 +(1 row) + +SELECT to_regdatabase('"template1"'); + to_regdatabase +---------------- + template1 +(1 row) + +-- special "single dash" case +SELECT regproc('-')::oid; + regproc +--------- + 0 +(1 row) + +SELECT regprocedure('-')::oid; + regprocedure +-------------- + 0 +(1 row) + +SELECT regclass('-')::oid; + regclass +---------- + 0 +(1 row) + +SELECT regcollation('-')::oid; + regcollation +-------------- + 0 +(1 row) + +SELECT regtype('-')::oid; + regtype +--------- + 0 +(1 row) + +SELECT regconfig('-')::oid; + regconfig +----------- + 0 +(1 row) + +SELECT regdictionary('-')::oid; + regdictionary +--------------- + 0 +(1 row) + +SELECT regrole('-')::oid; + regrole +--------- + 0 +(1 row) + +SELECT regnamespace('-')::oid; + regnamespace +-------------- + 0 +(1 row) + +SELECT regdatabase('-')::oid; + regdatabase +------------- + 0 +(1 row) + +SELECT to_regproc('-')::oid; + to_regproc +------------ + 0 +(1 row) + +SELECT to_regprocedure('-')::oid; + to_regprocedure +----------------- + 0 +(1 row) + +SELECT to_regclass('-')::oid; + to_regclass +------------- + 0 +(1 row) + +SELECT to_regcollation('-')::oid; + to_regcollation +----------------- + 0 +(1 row) + +SELECT to_regtype('-')::oid; + to_regtype +------------ + 0 +(1 row) + +SELECT to_regrole('-')::oid; + to_regrole +------------ + 0 +(1 row) + +SELECT to_regnamespace('-')::oid; + to_regnamespace +----------------- + 0 +(1 row) + +SELECT to_regdatabase('-')::oid; + to_regdatabase +---------------- + 0 +(1 row) + +-- constant cannot be used here +CREATE TABLE regrole_test (rolid OID DEFAULT 'regress_regrole_test'::regrole); +ERROR: constant of the type regrole cannot be used here +CREATE TABLE regdatabase_test (datid OID DEFAULT 'template1'::regdatabase); +ERROR: constant of the type regdatabase cannot be used here /* If objects don't exist, raise errors. */ DROP ROLE regress_regrole_test; -- without schemaname @@ -305,6 +443,18 @@ SELECT regnamespace('foo.bar'); ERROR: invalid name syntax LINE 1: SELECT regnamespace('foo.bar'); ^ +SELECT regdatabase('Nonexistent'); +ERROR: database "nonexistent" does not exist +LINE 1: SELECT regdatabase('Nonexistent'); + ^ +SELECT regdatabase('"Nonexistent"'); +ERROR: database "Nonexistent" does not exist +LINE 1: SELECT regdatabase('"Nonexistent"'); + ^ +SELECT regdatabase('foo.bar'); +ERROR: invalid name syntax +LINE 1: SELECT regdatabase('foo.bar'); + ^ /* If objects don't exist, return NULL with no error. */ -- without schemaname SELECT to_regoper('||//'); @@ -447,6 +597,24 @@ SELECT to_regnamespace('foo.bar'); (1 row) +SELECT to_regdatabase('Nonexistent'); + to_regdatabase +---------------- + +(1 row) + +SELECT to_regdatabase('"Nonexistent"'); + to_regdatabase +---------------- + +(1 row) + +SELECT to_regdatabase('foo.bar'); + to_regdatabase +---------------- + +(1 row) + -- Test to_regtypemod SELECT to_regtypemod('text'); to_regtypemod @@ -569,6 +737,12 @@ SELECT * FROM pg_input_error_info('no_such_type', 'regtype'); type "no_such_type" does not exist | | | 42704 (1 row) +SELECT * FROM pg_input_error_info('Nonexistent', 'regdatabase'); + message | detail | hint | sql_error_code +---------------------------------------+--------+------+---------------- + database "nonexistent" does not exist | | | 42704 +(1 row) + -- Some cases that should be soft errors, but are not yet SELECT * FROM pg_input_error_info('incorrect type name syntax', 'regtype'); ERROR: syntax error at or near "type" diff --git a/src/test/regress/expected/type_sanity.out b/src/test/regress/expected/type_sanity.out index dd0c52ab08b50..943e56506bf1b 100644 --- a/src/test/regress/expected/type_sanity.out +++ b/src/test/regress/expected/type_sanity.out @@ -711,6 +711,7 @@ CREATE TABLE tab_core_types AS SELECT 'regtype'::regtype type, 'pg_monitor'::regrole, 'pg_class'::regclass::oid, + 'template1'::regdatabase, '(1,1)'::tid, '2'::xid, '3'::cid, '10:20:10,14,15'::txid_snapshot, '10:20:10,14,15'::pg_snapshot, diff --git a/src/test/regress/sql/regproc.sql b/src/test/regress/sql/regproc.sql index 232289ac39823..cfec8f8c754a2 100644 --- a/src/test/regress/sql/regproc.sql +++ b/src/test/regress/sql/regproc.sql @@ -47,11 +47,42 @@ SELECT regrole('regress_regrole_test'); SELECT regrole('"regress_regrole_test"'); SELECT regnamespace('pg_catalog'); SELECT regnamespace('"pg_catalog"'); +SELECT regdatabase('template1'); +SELECT regdatabase('"template1"'); SELECT to_regrole('regress_regrole_test'); SELECT to_regrole('"regress_regrole_test"'); SELECT to_regnamespace('pg_catalog'); SELECT to_regnamespace('"pg_catalog"'); +SELECT to_regdatabase('template1'); +SELECT to_regdatabase('"template1"'); + +-- special "single dash" case + +SELECT regproc('-')::oid; +SELECT regprocedure('-')::oid; +SELECT regclass('-')::oid; +SELECT regcollation('-')::oid; +SELECT regtype('-')::oid; +SELECT regconfig('-')::oid; +SELECT regdictionary('-')::oid; +SELECT regrole('-')::oid; +SELECT regnamespace('-')::oid; +SELECT regdatabase('-')::oid; + +SELECT to_regproc('-')::oid; +SELECT to_regprocedure('-')::oid; +SELECT to_regclass('-')::oid; +SELECT to_regcollation('-')::oid; +SELECT to_regtype('-')::oid; +SELECT to_regrole('-')::oid; +SELECT to_regnamespace('-')::oid; +SELECT to_regdatabase('-')::oid; + +-- constant cannot be used here + +CREATE TABLE regrole_test (rolid OID DEFAULT 'regress_regrole_test'::regrole); +CREATE TABLE regdatabase_test (datid OID DEFAULT 'template1'::regdatabase); /* If objects don't exist, raise errors. */ @@ -88,6 +119,9 @@ SELECT regrole('foo.bar'); SELECT regnamespace('Nonexistent'); SELECT regnamespace('"Nonexistent"'); SELECT regnamespace('foo.bar'); +SELECT regdatabase('Nonexistent'); +SELECT regdatabase('"Nonexistent"'); +SELECT regdatabase('foo.bar'); /* If objects don't exist, return NULL with no error. */ @@ -122,6 +156,9 @@ SELECT to_regrole('foo.bar'); SELECT to_regnamespace('Nonexistent'); SELECT to_regnamespace('"Nonexistent"'); SELECT to_regnamespace('foo.bar'); +SELECT to_regdatabase('Nonexistent'); +SELECT to_regdatabase('"Nonexistent"'); +SELECT to_regdatabase('foo.bar'); -- Test to_regtypemod SELECT to_regtypemod('text'); @@ -147,6 +184,7 @@ SELECT * FROM pg_input_error_info('ng_catalog.abs(numeric)', 'regprocedure'); SELECT * FROM pg_input_error_info('ng_catalog.abs(numeric', 'regprocedure'); SELECT * FROM pg_input_error_info('regress_regrole_test', 'regrole'); SELECT * FROM pg_input_error_info('no_such_type', 'regtype'); +SELECT * FROM pg_input_error_info('Nonexistent', 'regdatabase'); -- Some cases that should be soft errors, but are not yet SELECT * FROM pg_input_error_info('incorrect type name syntax', 'regtype'); diff --git a/src/test/regress/sql/type_sanity.sql b/src/test/regress/sql/type_sanity.sql index c94dd83d3061c..df795759bb4cb 100644 --- a/src/test/regress/sql/type_sanity.sql +++ b/src/test/regress/sql/type_sanity.sql @@ -539,6 +539,7 @@ CREATE TABLE tab_core_types AS SELECT 'regtype'::regtype type, 'pg_monitor'::regrole, 'pg_class'::regclass::oid, + 'template1'::regdatabase, '(1,1)'::tid, '2'::xid, '3'::cid, '10:20:10,14,15'::txid_snapshot, '10:20:10,14,15'::pg_snapshot, From 999f172ded2bae7efbd8bf1dd6f823095395493f Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Mon, 30 Jun 2025 16:59:36 -0400 Subject: [PATCH 187/602] De-reserve keywords EXECUTE and STRICT in PL/pgSQL. On close inspection, there does not seem to be a strong reason why these should be fully-reserved keywords. I guess they just escaped consideration in previous attempts to minimize PL/pgSQL's list of reserved words. Author: Tom Lane Reviewed-by: Pavel Stehule Discussion: https://postgr.es/m/2185258.1745617445@sss.pgh.pa.us --- src/pl/plpgsql/src/expected/plpgsql_misc.out | 14 ++++++++++++++ src/pl/plpgsql/src/pl_gram.y | 13 +++++++++---- src/pl/plpgsql/src/pl_reserved_kwlist.h | 2 -- src/pl/plpgsql/src/pl_scanner.c | 2 +- src/pl/plpgsql/src/pl_unreserved_kwlist.h | 2 ++ src/pl/plpgsql/src/sql/plpgsql_misc.sql | 13 +++++++++++++ 6 files changed, 39 insertions(+), 7 deletions(-) diff --git a/src/pl/plpgsql/src/expected/plpgsql_misc.out b/src/pl/plpgsql/src/expected/plpgsql_misc.out index a6511df08ec9f..7bb4f432e7daf 100644 --- a/src/pl/plpgsql/src/expected/plpgsql_misc.out +++ b/src/pl/plpgsql/src/expected/plpgsql_misc.out @@ -65,3 +65,17 @@ do $$ declare x public.foo%rowtype; begin end $$; ERROR: relation "public.foo" does not exist CONTEXT: compilation of PL/pgSQL function "inline_code_block" near line 1 do $$ declare x public.misc_table%rowtype; begin end $$; +-- Test handling of an unreserved keyword as a variable name +-- and record field name. +do $$ +declare + execute int; + r record; +begin + execute := 10; + raise notice 'execute = %', execute; + select 1 as strict into r; + raise notice 'r.strict = %', r.strict; +end $$; +NOTICE: execute = 10 +NOTICE: r.strict = 1 diff --git a/src/pl/plpgsql/src/pl_gram.y b/src/pl/plpgsql/src/pl_gram.y index 5612e66d0239d..7b672ea5179a6 100644 --- a/src/pl/plpgsql/src/pl_gram.y +++ b/src/pl/plpgsql/src/pl_gram.y @@ -1368,7 +1368,8 @@ for_control : for_variable K_IN int tok = yylex(&yylval, &yylloc, yyscanner); int tokloc = yylloc; - if (tok == K_EXECUTE) + if (tok_is_keyword(tok, &yylval, + K_EXECUTE, "execute")) { /* EXECUTE means it's a dynamic FOR loop */ PLpgSQL_stmt_dynfors *new; @@ -2135,7 +2136,8 @@ stmt_open : K_OPEN cursor_variable yyerror(&yylloc, NULL, yyscanner, "syntax error, expected \"FOR\""); tok = yylex(&yylval, &yylloc, yyscanner); - if (tok == K_EXECUTE) + if (tok_is_keyword(tok, &yylval, + K_EXECUTE, "execute")) { int endtoken; @@ -2536,6 +2538,7 @@ unreserved_keyword : | K_ERRCODE | K_ERROR | K_EXCEPTION + | K_EXECUTE | K_EXIT | K_FETCH | K_FIRST @@ -2581,6 +2584,7 @@ unreserved_keyword : | K_SLICE | K_SQLSTATE | K_STACKED + | K_STRICT | K_TABLE | K_TABLE_NAME | K_TYPE @@ -3514,7 +3518,8 @@ make_return_query_stmt(int location, YYSTYPE *yylvalp, YYLTYPE *yyllocp, yyscan_ new->stmtid = ++plpgsql_curr_compile->nstatements; /* check for RETURN QUERY EXECUTE */ - if ((tok = yylex(yylvalp, yyllocp, yyscanner)) != K_EXECUTE) + tok = yylex(yylvalp, yyllocp, yyscanner); + if (!tok_is_keyword(tok, yylvalp, K_EXECUTE, "execute")) { /* ordinary static query */ plpgsql_push_back_token(tok, yylvalp, yyllocp, yyscanner); @@ -3597,7 +3602,7 @@ read_into_target(PLpgSQL_variable **target, bool *strict, YYSTYPE *yylvalp, YYLT *strict = false; tok = yylex(yylvalp, yyllocp, yyscanner); - if (strict && tok == K_STRICT) + if (strict && tok_is_keyword(tok, yylvalp, K_STRICT, "strict")) { *strict = true; tok = yylex(yylvalp, yyllocp, yyscanner); diff --git a/src/pl/plpgsql/src/pl_reserved_kwlist.h b/src/pl/plpgsql/src/pl_reserved_kwlist.h index ce7b0c9d33121..f3ef2cbd8d7dc 100644 --- a/src/pl/plpgsql/src/pl_reserved_kwlist.h +++ b/src/pl/plpgsql/src/pl_reserved_kwlist.h @@ -33,7 +33,6 @@ PG_KEYWORD("case", K_CASE) PG_KEYWORD("declare", K_DECLARE) PG_KEYWORD("else", K_ELSE) PG_KEYWORD("end", K_END) -PG_KEYWORD("execute", K_EXECUTE) PG_KEYWORD("for", K_FOR) PG_KEYWORD("foreach", K_FOREACH) PG_KEYWORD("from", K_FROM) @@ -44,7 +43,6 @@ PG_KEYWORD("loop", K_LOOP) PG_KEYWORD("not", K_NOT) PG_KEYWORD("null", K_NULL) PG_KEYWORD("or", K_OR) -PG_KEYWORD("strict", K_STRICT) PG_KEYWORD("then", K_THEN) PG_KEYWORD("to", K_TO) PG_KEYWORD("using", K_USING) diff --git a/src/pl/plpgsql/src/pl_scanner.c b/src/pl/plpgsql/src/pl_scanner.c index d08187dafcb4c..19825e5c71810 100644 --- a/src/pl/plpgsql/src/pl_scanner.c +++ b/src/pl/plpgsql/src/pl_scanner.c @@ -53,7 +53,7 @@ IdentifierLookup plpgsql_IdentifierLookup = IDENTIFIER_LOOKUP_NORMAL; * We try to avoid reserving more keywords than we have to; but there's * little point in not reserving a word if it's reserved in the core grammar. * Currently, the following words are reserved here but not in the core: - * BEGIN BY DECLARE EXECUTE FOREACH IF LOOP STRICT WHILE + * BEGIN BY DECLARE FOREACH IF LOOP WHILE */ /* ScanKeywordList lookup data for PL/pgSQL keywords */ diff --git a/src/pl/plpgsql/src/pl_unreserved_kwlist.h b/src/pl/plpgsql/src/pl_unreserved_kwlist.h index 98f99ec470cf4..b48c5a645ffaf 100644 --- a/src/pl/plpgsql/src/pl_unreserved_kwlist.h +++ b/src/pl/plpgsql/src/pl_unreserved_kwlist.h @@ -58,6 +58,7 @@ PG_KEYWORD("elsif", K_ELSIF) PG_KEYWORD("errcode", K_ERRCODE) PG_KEYWORD("error", K_ERROR) PG_KEYWORD("exception", K_EXCEPTION) +PG_KEYWORD("execute", K_EXECUTE) PG_KEYWORD("exit", K_EXIT) PG_KEYWORD("fetch", K_FETCH) PG_KEYWORD("first", K_FIRST) @@ -103,6 +104,7 @@ PG_KEYWORD("scroll", K_SCROLL) PG_KEYWORD("slice", K_SLICE) PG_KEYWORD("sqlstate", K_SQLSTATE) PG_KEYWORD("stacked", K_STACKED) +PG_KEYWORD("strict", K_STRICT) PG_KEYWORD("table", K_TABLE) PG_KEYWORD("table_name", K_TABLE_NAME) PG_KEYWORD("type", K_TYPE) diff --git a/src/pl/plpgsql/src/sql/plpgsql_misc.sql b/src/pl/plpgsql/src/sql/plpgsql_misc.sql index d3a7f703a758d..103a20bf8820c 100644 --- a/src/pl/plpgsql/src/sql/plpgsql_misc.sql +++ b/src/pl/plpgsql/src/sql/plpgsql_misc.sql @@ -37,3 +37,16 @@ do $$ declare x foo.bar%rowtype; begin end $$; do $$ declare x foo.bar.baz%rowtype; begin end $$; do $$ declare x public.foo%rowtype; begin end $$; do $$ declare x public.misc_table%rowtype; begin end $$; + +-- Test handling of an unreserved keyword as a variable name +-- and record field name. +do $$ +declare + execute int; + r record; +begin + execute := 10; + raise notice 'execute = %', execute; + select 1 as strict into r; + raise notice 'r.strict = %', r.strict; +end $$; From 0836683a8977cac07d8cbdd0462f8a3e7e32565f Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Mon, 30 Jun 2025 17:06:39 -0400 Subject: [PATCH 188/602] Improve error report for PL/pgSQL reserved word used as a field name. The current code in resolve_column_ref (dating to commits 01f7d2990 and fe24d7816) believes that not finding a RECFIELD datum is a can't-happen case, in consequence of which I didn't spend a whole lot of time considering what to do if it did happen. But it turns out that it *can* happen if the would-be field name is a fully-reserved PL/pgSQL keyword. Change the error message to describe that situation, and add a test case demonstrating it. This might need further refinement if anyone can find other ways to trigger a failure here; but without an example it's not clear what other error to throw. Author: Tom Lane Reviewed-by: Pavel Stehule Discussion: https://postgr.es/m/2185258.1745617445@sss.pgh.pa.us --- src/pl/plpgsql/src/expected/plpgsql_misc.out | 22 ++++++++++++++++++++ src/pl/plpgsql/src/pl_comp.c | 19 ++++++++++------- src/pl/plpgsql/src/sql/plpgsql_misc.sql | 16 ++++++++++++++ 3 files changed, 50 insertions(+), 7 deletions(-) diff --git a/src/pl/plpgsql/src/expected/plpgsql_misc.out b/src/pl/plpgsql/src/expected/plpgsql_misc.out index 7bb4f432e7daf..ffb377f5f54ff 100644 --- a/src/pl/plpgsql/src/expected/plpgsql_misc.out +++ b/src/pl/plpgsql/src/expected/plpgsql_misc.out @@ -79,3 +79,25 @@ begin end $$; NOTICE: execute = 10 NOTICE: r.strict = 1 +-- Test handling of a reserved keyword as a record field name. +do $$ declare r record; +begin + select 1 as x, 2 as foreach into r; + raise notice 'r.x = %', r.x; + raise notice 'r.foreach = %', r.foreach; -- fails +end $$; +NOTICE: r.x = 1 +ERROR: field name "foreach" is a reserved key word +LINE 1: r.foreach + ^ +HINT: Use double quotes to quote it. +QUERY: r.foreach +CONTEXT: PL/pgSQL function inline_code_block line 5 at RAISE +do $$ declare r record; +begin + select 1 as x, 2 as foreach into r; + raise notice 'r.x = %', r.x; + raise notice 'r."foreach" = %', r."foreach"; -- ok +end $$; +NOTICE: r.x = 1 +NOTICE: r."foreach" = 2 diff --git a/src/pl/plpgsql/src/pl_comp.c b/src/pl/plpgsql/src/pl_comp.c index b80c59447fb57..ee961425a5b7e 100644 --- a/src/pl/plpgsql/src/pl_comp.c +++ b/src/pl/plpgsql/src/pl_comp.c @@ -1211,17 +1211,22 @@ resolve_column_ref(ParseState *pstate, PLpgSQL_expr *expr, } /* - * We should not get here, because a RECFIELD datum should - * have been built at parse time for every possible qualified - * reference to fields of this record. But if we do, handle - * it like field-not-found: throw error or return NULL. + * Ideally we'd never get here, because a RECFIELD datum + * should have been built at parse time for every qualified + * reference to a field of this record that appears in the + * source text. However, plpgsql_yylex will not build such a + * datum unless the field name lexes as token type IDENT. + * Hence, if the would-be field name is a PL/pgSQL reserved + * word, we lose. Assume that that's what happened and tell + * the user to quote it, unless the caller prefers we just + * return NULL. */ if (error_if_no_field) ereport(ERROR, - (errcode(ERRCODE_UNDEFINED_COLUMN), - errmsg("record \"%s\" has no field \"%s\"", - (nnames_field == 1) ? name1 : name2, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("field name \"%s\" is a reserved key word", colname), + errhint("Use double quotes to quote it."), parser_errposition(pstate, cref->location))); } break; diff --git a/src/pl/plpgsql/src/sql/plpgsql_misc.sql b/src/pl/plpgsql/src/sql/plpgsql_misc.sql index 103a20bf8820c..0bc39fcf3257c 100644 --- a/src/pl/plpgsql/src/sql/plpgsql_misc.sql +++ b/src/pl/plpgsql/src/sql/plpgsql_misc.sql @@ -50,3 +50,19 @@ begin select 1 as strict into r; raise notice 'r.strict = %', r.strict; end $$; + +-- Test handling of a reserved keyword as a record field name. + +do $$ declare r record; +begin + select 1 as x, 2 as foreach into r; + raise notice 'r.x = %', r.x; + raise notice 'r.foreach = %', r.foreach; -- fails +end $$; + +do $$ declare r record; +begin + select 1 as x, 2 as foreach into r; + raise notice 'r.x = %', r.x; + raise notice 'r."foreach" = %', r."foreach"; -- ok +end $$; From 2e947217474c15c7fd9011d1ab2b0d4657b3eae2 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Tue, 1 Jul 2025 08:57:05 +0900 Subject: [PATCH 189/602] Improve error handling of libxml2 calls in xml.c This commit fixes some defects in the backend's xml.c, found upon inspection of the internals of libxml2: - xmlEncodeSpecialChars() can fail on malloc(), returning NULL back to the caller. xmltext() assumed that this could never happen. Like other code paths, a TRY/CATCH block is added there, covering also the fact that cstring_to_text_with_len() could fail a memory allocation, where the backend would miss to free the buffer allocated by xmlEncodeSpecialChars(). - Some libxml2 routines called in xmlelement() can return NULL, like xmlAddChildList() or xmlTextWriterStartElement(). Dedicated errors are added for them. - xml_xmlnodetoxmltype() missed that xmlXPathCastNodeToString() can fail on an allocation failure. In this case, the call can just be moved to the existing TRY/CATCH block. All these code paths would cause the server to crash. As this is unlikely a problem in practice, no backpatch is done. Jim and I have caught these defects, not sure who has scored the most. The contrib module xml2/ has similar defects, which will be addressed in a separate change. Reported-by: Jim Jones Reviewed-by: Jim Jones Discussion: https://postgr.es/m/aEEingzOta_S_Nu7@paquier.xyz --- src/backend/utils/adt/xml.c | 78 +++++++++++++++++++++++++++++-------- 1 file changed, 62 insertions(+), 16 deletions(-) diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c index a4150bff2eaea..2bd39b6ac4b09 100644 --- a/src/backend/utils/adt/xml.c +++ b/src/backend/utils/adt/xml.c @@ -529,14 +529,36 @@ xmltext(PG_FUNCTION_ARGS) #ifdef USE_LIBXML text *arg = PG_GETARG_TEXT_PP(0); text *result; - xmlChar *xmlbuf = NULL; + volatile xmlChar *xmlbuf = NULL; + PgXmlErrorContext *xmlerrcxt; + + /* Otherwise, we gotta spin up some error handling. */ + xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL); - xmlbuf = xmlEncodeSpecialChars(NULL, xml_text2xmlChar(arg)); + PG_TRY(); + { + xmlbuf = xmlEncodeSpecialChars(NULL, xml_text2xmlChar(arg)); - Assert(xmlbuf); + if (xmlbuf == NULL || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, + "could not allocate xmlChar"); + + result = cstring_to_text_with_len((const char *) xmlbuf, + xmlStrlen((const xmlChar *) xmlbuf)); + } + PG_CATCH(); + { + if (xmlbuf) + xmlFree((xmlChar *) xmlbuf); + + pg_xml_done(xmlerrcxt, true); + PG_RE_THROW(); + } + PG_END_TRY(); + + xmlFree((xmlChar *) xmlbuf); + pg_xml_done(xmlerrcxt, false); - result = cstring_to_text_with_len((const char *) xmlbuf, xmlStrlen(xmlbuf)); - xmlFree(xmlbuf); PG_RETURN_XML_P(result); #else NO_XML_SUPPORT(); @@ -770,7 +792,10 @@ xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent) if (oldroot != NULL) xmlFreeNode(oldroot); - xmlAddChildList(root, content_nodes); + if (xmlAddChildList(root, content_nodes) == NULL || + xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR, + "could not append xml node list"); /* * We use this node to insert newlines in the dump. Note: in at @@ -931,7 +956,10 @@ xmlelement(XmlExpr *xexpr, xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, "could not allocate xmlTextWriter"); - xmlTextWriterStartElement(writer, (xmlChar *) xexpr->name); + if (xmlTextWriterStartElement(writer, (xmlChar *) xexpr->name) < 0 || + xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR, + "could not start xml element"); forboth(arg, named_arg_strings, narg, xexpr->arg_names) { @@ -939,19 +967,30 @@ xmlelement(XmlExpr *xexpr, char *argname = strVal(lfirst(narg)); if (str) - xmlTextWriterWriteAttribute(writer, - (xmlChar *) argname, - (xmlChar *) str); + { + if (xmlTextWriterWriteAttribute(writer, + (xmlChar *) argname, + (xmlChar *) str) < 0 || + xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR, + "could not write xml attribute"); + } } foreach(arg, arg_strings) { char *str = (char *) lfirst(arg); - xmlTextWriterWriteRaw(writer, (xmlChar *) str); + if (xmlTextWriterWriteRaw(writer, (xmlChar *) str) < 0 || + xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR, + "could not write raw xml text"); } - xmlTextWriterEndElement(writer); + if (xmlTextWriterEndElement(writer) < 0 || + xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR, + "could not end xml element"); /* we MUST do this now to flush data out to the buffer ... */ xmlFreeTextWriter(writer); @@ -4220,20 +4259,27 @@ xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt) } else { - xmlChar *str; + volatile xmlChar *str = NULL; - str = xmlXPathCastNodeToString(cur); PG_TRY(); { + char *escaped; + + str = xmlXPathCastNodeToString(cur); + if (str == NULL || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, + "could not allocate xmlChar"); + /* Here we rely on XML having the same representation as TEXT */ - char *escaped = escape_xml((char *) str); + escaped = escape_xml((char *) str); result = (xmltype *) cstring_to_text(escaped); pfree(escaped); } PG_FINALLY(); { - xmlFree(str); + if (str) + xmlFree((xmlChar *) str); } PG_END_TRY(); } From a3df0d43d93789777fd06bb7ffa8cdc1f06d63c3 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Tue, 1 Jul 2025 09:41:42 +0900 Subject: [PATCH 190/602] Fix typo in system_views.sql's definition of pg_stat_activity backend_xmin used a lower-character 's' instead of the upper-character 'S' like the other attributes. This is harmless, but let's be consistent. Issue introduced in dd1a3bccca24. Author: Daisuke Higuchi Discussion: https://postgr.es/m/CAEVT6c8M39cqWje-df39wWr0KWcDgGKd5fMvQo84zvCXKoEL9Q@mail.gmail.com --- src/backend/catalog/system_views.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index 08f780a2e6382..e5dbbe61b811a 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -895,7 +895,7 @@ CREATE VIEW pg_stat_activity AS S.wait_event, S.state, S.backend_xid, - s.backend_xmin, + S.backend_xmin, S.query_id, S.query, S.backend_type From c67989789cec3953effca4e01dff834abff9116a Mon Sep 17 00:00:00 2001 From: Amit Langote Date: Tue, 1 Jul 2025 13:13:48 +0900 Subject: [PATCH 191/602] Fix typos in comments Commit 19d8e2308bc added enum values with the prefix TU_, but a few comments still referred to TUUI_, which was used in development versions of the patches committed as 19d8e2308bc. Author: Yugo Nagata Discussion: https://postgr.es/m/20250701110216.8ac8a9e4c6f607f1d954f44a@sraoss.co.jp Backpatch-through: 16 --- src/backend/executor/execIndexing.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/backend/executor/execIndexing.c b/src/backend/executor/execIndexing.c index bdf862b24062e..ca33a854278ed 100644 --- a/src/backend/executor/execIndexing.c +++ b/src/backend/executor/execIndexing.c @@ -279,7 +279,7 @@ ExecCloseIndices(ResultRelInfo *resultRelInfo) * executor is performing an UPDATE that could not use an * optimization like heapam's HOT (in more general terms a * call to table_tuple_update() took place and set - * 'update_indexes' to TUUI_All). Receiving this hint makes + * 'update_indexes' to TU_All). Receiving this hint makes * us consider if we should pass down the 'indexUnchanged' * hint in turn. That's something that we figure out for * each index_insert() call iff 'update' is true. @@ -290,7 +290,7 @@ ExecCloseIndices(ResultRelInfo *resultRelInfo) * HOT has been applied and any updated columns are indexed * only by summarizing indexes (or in more general terms a * call to table_tuple_update() took place and set - * 'update_indexes' to TUUI_Summarizing). We can (and must) + * 'update_indexes' to TU_Summarizing). We can (and must) * therefore only update the indexes that have * 'amsummarizing' = true. * From 732061150b004385810e522f8629f5bf91d977b7 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Tue, 1 Jul 2025 15:48:32 +0900 Subject: [PATCH 192/602] xml2: Improve error handling of libxml2 calls The contrib module xml2/ has always been fuzzy with the cleanup of the memory allocated by the calls internal to libxml2, even if there are APIs in place giving a lot of control over the error behavior, all located in the backend's xml.c. The code paths fixed in the commit address multiple defects, while sanitizing the code: - In xpath.c, several allocations are done by libxml2 for xpath_workspace, whose memory cleanup could go out of sight as it relied on a single TRY/CATCH block done in pgxml_xpath(). workspace->res is allocated by libxml2, and may finish by not being freed at all upon a failure outside of a TRY area. This code is refactored so as the TRY/CATCH block of pgxml_xpath() is moved one level higher to its callers, which are responsible for cleaning up the contents of a workspace on failure. cleanup_workspace() now requires a volatile workspace, forcing as a rule that a TRY/CATCH block should be used. - Several calls, like xmlStrdup(), xmlXPathNewContext(), xmlXPathCtxtCompile(), etc. can return NULL on failures (for most of them allocation failures. These forgot to check for failures, or missed that pg_xml_error_occurred() should be called, to check if an error is already on the stack. - Some memory allocated by libxml2 calls was freed in an incorrect way, "resstr" in xslt_process() being one example. The class of errors fixed here are for problems that are unlikely going to happen in practice, so no backpatch is done. The changes have finished by being rather invasive, so it is perhaps not a bad thing to be conservative and to keep these changes only on HEAD anyway. Author: Michael Paquier Reported-by: Karavaev Alexey Reviewed-by: Jim Jones Reviewed-by: Tom Lane Discussion: https://postgr.es/m/18943-2f2a04ab03904598@postgresql.org --- contrib/xml2/xpath.c | 421 ++++++++++++++++++++++++++------------- contrib/xml2/xslt_proc.c | 26 ++- 2 files changed, 296 insertions(+), 151 deletions(-) diff --git a/contrib/xml2/xpath.c b/contrib/xml2/xpath.c index 23d3f332dbaa7..3f733405ec6db 100644 --- a/contrib/xml2/xpath.c +++ b/contrib/xml2/xpath.c @@ -51,10 +51,10 @@ static text *pgxml_result_to_text(xmlXPathObjectPtr res, xmlChar *toptag, static xmlChar *pgxml_texttoxmlchar(text *textstring); -static xmlXPathObjectPtr pgxml_xpath(text *document, xmlChar *xpath, - xpath_workspace *workspace); +static xpath_workspace *pgxml_xpath(text *document, xmlChar *xpath, + PgXmlErrorContext *xmlerrcxt); -static void cleanup_workspace(xpath_workspace *workspace); +static void cleanup_workspace(volatile xpath_workspace *workspace); /* @@ -89,18 +89,40 @@ xml_encode_special_chars(PG_FUNCTION_ARGS) { text *tin = PG_GETARG_TEXT_PP(0); text *tout; - xmlChar *ts, - *tt; + volatile xmlChar *tt = NULL; + PgXmlErrorContext *xmlerrcxt; + + xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL); + + PG_TRY(); + { + xmlChar *ts; - ts = pgxml_texttoxmlchar(tin); + ts = pgxml_texttoxmlchar(tin); + + tt = xmlEncodeSpecialChars(NULL, ts); + if (tt == NULL || pg_xml_error_occurred(xmlerrcxt)) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, + "could not allocate xmlChar"); + pfree(ts); + + tout = cstring_to_text((char *) tt); + } + PG_CATCH(); + { + if (tt != NULL) + xmlFree((xmlChar *) tt); - tt = xmlEncodeSpecialChars(NULL, ts); + pg_xml_done(xmlerrcxt, true); - pfree(ts); + PG_RE_THROW(); + } + PG_END_TRY(); - tout = cstring_to_text((char *) tt); + if (tt != NULL) + xmlFree((xmlChar *) tt); - xmlFree(tt); + pg_xml_done(xmlerrcxt, false); PG_RETURN_TEXT_P(tout); } @@ -122,62 +144,90 @@ pgxmlNodeSetToText(xmlNodeSetPtr nodeset, xmlChar *septagname, xmlChar *plainsep) { - xmlBufferPtr buf; + volatile xmlBufferPtr buf = NULL; xmlChar *result; int i; + PgXmlErrorContext *xmlerrcxt; - buf = xmlBufferCreate(); + /* spin some error handling */ + xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL); - if ((toptagname != NULL) && (xmlStrlen(toptagname) > 0)) - { - xmlBufferWriteChar(buf, "<"); - xmlBufferWriteCHAR(buf, toptagname); - xmlBufferWriteChar(buf, ">"); - } - if (nodeset != NULL) + PG_TRY(); { - for (i = 0; i < nodeset->nodeNr; i++) - { - if (plainsep != NULL) - { - xmlBufferWriteCHAR(buf, - xmlXPathCastNodeToString(nodeset->nodeTab[i])); + buf = xmlBufferCreate(); - /* If this isn't the last entry, write the plain sep. */ - if (i < (nodeset->nodeNr) - 1) - xmlBufferWriteChar(buf, (char *) plainsep); - } - else + if (buf == NULL || pg_xml_error_occurred(xmlerrcxt)) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, + "could not allocate xmlBuffer"); + + if ((toptagname != NULL) && (xmlStrlen(toptagname) > 0)) + { + xmlBufferWriteChar(buf, "<"); + xmlBufferWriteCHAR(buf, toptagname); + xmlBufferWriteChar(buf, ">"); + } + if (nodeset != NULL) + { + for (i = 0; i < nodeset->nodeNr; i++) { - if ((septagname != NULL) && (xmlStrlen(septagname) > 0)) + if (plainsep != NULL) { - xmlBufferWriteChar(buf, "<"); - xmlBufferWriteCHAR(buf, septagname); - xmlBufferWriteChar(buf, ">"); - } - xmlNodeDump(buf, - nodeset->nodeTab[i]->doc, - nodeset->nodeTab[i], - 1, 0); + xmlBufferWriteCHAR(buf, + xmlXPathCastNodeToString(nodeset->nodeTab[i])); - if ((septagname != NULL) && (xmlStrlen(septagname) > 0)) + /* If this isn't the last entry, write the plain sep. */ + if (i < (nodeset->nodeNr) - 1) + xmlBufferWriteChar(buf, (char *) plainsep); + } + else { - xmlBufferWriteChar(buf, ""); + if ((septagname != NULL) && (xmlStrlen(septagname) > 0)) + { + xmlBufferWriteChar(buf, "<"); + xmlBufferWriteCHAR(buf, septagname); + xmlBufferWriteChar(buf, ">"); + } + xmlNodeDump(buf, + nodeset->nodeTab[i]->doc, + nodeset->nodeTab[i], + 1, 0); + + if ((septagname != NULL) && (xmlStrlen(septagname) > 0)) + { + xmlBufferWriteChar(buf, ""); + } } } } - } - if ((toptagname != NULL) && (xmlStrlen(toptagname) > 0)) + if ((toptagname != NULL) && (xmlStrlen(toptagname) > 0)) + { + xmlBufferWriteChar(buf, ""); + } + + result = xmlStrdup(buf->content); + if (result == NULL || pg_xml_error_occurred(xmlerrcxt)) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, + "could not allocate result"); + } + PG_CATCH(); { - xmlBufferWriteChar(buf, ""); + if (buf) + xmlBufferFree(buf); + + pg_xml_done(xmlerrcxt, true); + + PG_RE_THROW(); } - result = xmlStrdup(buf->content); + PG_END_TRY(); + xmlBufferFree(buf); + pg_xml_done(xmlerrcxt, false); + return result; } @@ -208,16 +258,29 @@ xpath_nodeset(PG_FUNCTION_ARGS) xmlChar *septag = pgxml_texttoxmlchar(PG_GETARG_TEXT_PP(3)); xmlChar *xpath; text *xpres; - xmlXPathObjectPtr res; - xpath_workspace workspace; + volatile xpath_workspace *workspace; + PgXmlErrorContext *xmlerrcxt; xpath = pgxml_texttoxmlchar(xpathsupp); + xmlerrcxt = pgxml_parser_init(PG_XML_STRICTNESS_LEGACY); - res = pgxml_xpath(document, xpath, &workspace); + PG_TRY(); + { + workspace = pgxml_xpath(document, xpath, xmlerrcxt); + xpres = pgxml_result_to_text(workspace->res, toptag, septag, NULL); + } + PG_CATCH(); + { + if (workspace) + cleanup_workspace(workspace); - xpres = pgxml_result_to_text(res, toptag, septag, NULL); + pg_xml_done(xmlerrcxt, true); + PG_RE_THROW(); + } + PG_END_TRY(); - cleanup_workspace(&workspace); + cleanup_workspace(workspace); + pg_xml_done(xmlerrcxt, false); pfree(xpath); @@ -240,16 +303,29 @@ xpath_list(PG_FUNCTION_ARGS) xmlChar *plainsep = pgxml_texttoxmlchar(PG_GETARG_TEXT_PP(2)); xmlChar *xpath; text *xpres; - xmlXPathObjectPtr res; - xpath_workspace workspace; + volatile xpath_workspace *workspace; + PgXmlErrorContext *xmlerrcxt; xpath = pgxml_texttoxmlchar(xpathsupp); + xmlerrcxt = pgxml_parser_init(PG_XML_STRICTNESS_LEGACY); - res = pgxml_xpath(document, xpath, &workspace); + PG_TRY(); + { + workspace = pgxml_xpath(document, xpath, xmlerrcxt); + xpres = pgxml_result_to_text(workspace->res, NULL, NULL, plainsep); + } + PG_CATCH(); + { + if (workspace) + cleanup_workspace(workspace); - xpres = pgxml_result_to_text(res, NULL, NULL, plainsep); + pg_xml_done(xmlerrcxt, true); + PG_RE_THROW(); + } + PG_END_TRY(); - cleanup_workspace(&workspace); + cleanup_workspace(workspace); + pg_xml_done(xmlerrcxt, false); pfree(xpath); @@ -269,8 +345,8 @@ xpath_string(PG_FUNCTION_ARGS) xmlChar *xpath; int32 pathsize; text *xpres; - xmlXPathObjectPtr res; - xpath_workspace workspace; + volatile xpath_workspace *workspace; + PgXmlErrorContext *xmlerrcxt; pathsize = VARSIZE_ANY_EXHDR(xpathsupp); @@ -286,11 +362,25 @@ xpath_string(PG_FUNCTION_ARGS) xpath[pathsize + 7] = ')'; xpath[pathsize + 8] = '\0'; - res = pgxml_xpath(document, xpath, &workspace); + xmlerrcxt = pgxml_parser_init(PG_XML_STRICTNESS_LEGACY); + + PG_TRY(); + { + workspace = pgxml_xpath(document, xpath, xmlerrcxt); + xpres = pgxml_result_to_text(workspace->res, NULL, NULL, NULL); + } + PG_CATCH(); + { + if (workspace) + cleanup_workspace(workspace); - xpres = pgxml_result_to_text(res, NULL, NULL, NULL); + pg_xml_done(xmlerrcxt, true); + PG_RE_THROW(); + } + PG_END_TRY(); - cleanup_workspace(&workspace); + cleanup_workspace(workspace); + pg_xml_done(xmlerrcxt, false); pfree(xpath); @@ -308,24 +398,38 @@ xpath_number(PG_FUNCTION_ARGS) text *document = PG_GETARG_TEXT_PP(0); text *xpathsupp = PG_GETARG_TEXT_PP(1); /* XPath expression */ xmlChar *xpath; - float4 fRes; - xmlXPathObjectPtr res; - xpath_workspace workspace; + float4 fRes = 0.0; + bool isNull = false; + volatile xpath_workspace *workspace = NULL; + PgXmlErrorContext *xmlerrcxt; xpath = pgxml_texttoxmlchar(xpathsupp); + xmlerrcxt = pgxml_parser_init(PG_XML_STRICTNESS_LEGACY); - res = pgxml_xpath(document, xpath, &workspace); - - pfree(xpath); + PG_TRY(); + { + workspace = pgxml_xpath(document, xpath, xmlerrcxt); + pfree(xpath); - if (res == NULL) - PG_RETURN_NULL(); + if (workspace->res == NULL) + isNull = true; + else + fRes = xmlXPathCastToNumber(workspace->res); + } + PG_CATCH(); + { + if (workspace) + cleanup_workspace(workspace); - fRes = xmlXPathCastToNumber(res); + pg_xml_done(xmlerrcxt, true); + PG_RE_THROW(); + } + PG_END_TRY(); - cleanup_workspace(&workspace); + cleanup_workspace(workspace); + pg_xml_done(xmlerrcxt, false); - if (xmlXPathIsNaN(fRes)) + if (isNull || xmlXPathIsNaN(fRes)) PG_RETURN_NULL(); PG_RETURN_FLOAT4(fRes); @@ -341,21 +445,34 @@ xpath_bool(PG_FUNCTION_ARGS) text *xpathsupp = PG_GETARG_TEXT_PP(1); /* XPath expression */ xmlChar *xpath; int bRes; - xmlXPathObjectPtr res; - xpath_workspace workspace; + volatile xpath_workspace *workspace = NULL; + PgXmlErrorContext *xmlerrcxt; xpath = pgxml_texttoxmlchar(xpathsupp); + xmlerrcxt = pgxml_parser_init(PG_XML_STRICTNESS_LEGACY); - res = pgxml_xpath(document, xpath, &workspace); - - pfree(xpath); + PG_TRY(); + { + workspace = pgxml_xpath(document, xpath, xmlerrcxt); + pfree(xpath); - if (res == NULL) - PG_RETURN_BOOL(false); + if (workspace->res == NULL) + bRes = 0; + else + bRes = xmlXPathCastToBoolean(workspace->res); + } + PG_CATCH(); + { + if (workspace) + cleanup_workspace(workspace); - bRes = xmlXPathCastToBoolean(res); + pg_xml_done(xmlerrcxt, true); + PG_RE_THROW(); + } + PG_END_TRY(); - cleanup_workspace(&workspace); + cleanup_workspace(workspace); + pg_xml_done(xmlerrcxt, false); PG_RETURN_BOOL(bRes); } @@ -364,62 +481,44 @@ xpath_bool(PG_FUNCTION_ARGS) /* Core function to evaluate XPath query */ -static xmlXPathObjectPtr -pgxml_xpath(text *document, xmlChar *xpath, xpath_workspace *workspace) +static xpath_workspace * +pgxml_xpath(text *document, xmlChar *xpath, PgXmlErrorContext *xmlerrcxt) { int32 docsize = VARSIZE_ANY_EXHDR(document); - PgXmlErrorContext *xmlerrcxt; xmlXPathCompExprPtr comppath; + xpath_workspace *workspace = (xpath_workspace *) + palloc0(sizeof(xpath_workspace)); workspace->doctree = NULL; workspace->ctxt = NULL; workspace->res = NULL; - xmlerrcxt = pgxml_parser_init(PG_XML_STRICTNESS_LEGACY); - - PG_TRY(); + workspace->doctree = xmlReadMemory((char *) VARDATA_ANY(document), + docsize, NULL, NULL, + XML_PARSE_NOENT); + if (workspace->doctree != NULL) { - workspace->doctree = xmlReadMemory((char *) VARDATA_ANY(document), - docsize, NULL, NULL, - XML_PARSE_NOENT); - if (workspace->doctree != NULL) - { - workspace->ctxt = xmlXPathNewContext(workspace->doctree); - workspace->ctxt->node = xmlDocGetRootElement(workspace->doctree); - - /* compile the path */ - comppath = xmlXPathCtxtCompile(workspace->ctxt, xpath); - if (comppath == NULL) - xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_ARGUMENT_FOR_XQUERY, - "XPath Syntax Error"); + workspace->ctxt = xmlXPathNewContext(workspace->doctree); + workspace->ctxt->node = xmlDocGetRootElement(workspace->doctree); - /* Now evaluate the path expression. */ - workspace->res = xmlXPathCompiledEval(comppath, workspace->ctxt); + /* compile the path */ + comppath = xmlXPathCtxtCompile(workspace->ctxt, xpath); + if (comppath == NULL || pg_xml_error_occurred(xmlerrcxt)) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_ARGUMENT_FOR_XQUERY, + "XPath Syntax Error"); - xmlXPathFreeCompExpr(comppath); - } - } - PG_CATCH(); - { - cleanup_workspace(workspace); - - pg_xml_done(xmlerrcxt, true); + /* Now evaluate the path expression. */ + workspace->res = xmlXPathCompiledEval(comppath, workspace->ctxt); - PG_RE_THROW(); + xmlXPathFreeCompExpr(comppath); } - PG_END_TRY(); - if (workspace->res == NULL) - cleanup_workspace(workspace); - - pg_xml_done(xmlerrcxt, false); - - return workspace->res; + return workspace; } /* Clean up after processing the result of pgxml_xpath() */ static void -cleanup_workspace(xpath_workspace *workspace) +cleanup_workspace(volatile xpath_workspace *workspace) { if (workspace->res) xmlXPathFreeObject(workspace->res); @@ -438,34 +537,59 @@ pgxml_result_to_text(xmlXPathObjectPtr res, xmlChar *septag, xmlChar *plainsep) { - xmlChar *xpresstr; + volatile xmlChar *xpresstr = NULL; + PgXmlErrorContext *xmlerrcxt; text *xpres; if (res == NULL) return NULL; - switch (res->type) - { - case XPATH_NODESET: - xpresstr = pgxmlNodeSetToText(res->nodesetval, - toptag, - septag, plainsep); - break; + /* spin some error handling */ + xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL); - case XPATH_STRING: - xpresstr = xmlStrdup(res->stringval); - break; + PG_TRY(); + { + switch (res->type) + { + case XPATH_NODESET: + xpresstr = pgxmlNodeSetToText(res->nodesetval, + toptag, + septag, plainsep); + break; + + case XPATH_STRING: + xpresstr = xmlStrdup(res->stringval); + if (xpresstr == NULL || pg_xml_error_occurred(xmlerrcxt)) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, + "could not allocate result"); + break; + + default: + elog(NOTICE, "unsupported XQuery result: %d", res->type); + xpresstr = xmlStrdup((const xmlChar *) ""); + if (xpresstr == NULL || pg_xml_error_occurred(xmlerrcxt)) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, + "could not allocate result"); + } - default: - elog(NOTICE, "unsupported XQuery result: %d", res->type); - xpresstr = xmlStrdup((const xmlChar *) ""); + /* Now convert this result back to text */ + xpres = cstring_to_text((char *) xpresstr); } + PG_CATCH(); + { + if (xpresstr != NULL) + xmlFree((xmlChar *) xpresstr); - /* Now convert this result back to text */ - xpres = cstring_to_text((char *) xpresstr); + pg_xml_done(xmlerrcxt, true); + + PG_RE_THROW(); + } + PG_END_TRY(); /* Free various storage */ - xmlFree(xpresstr); + xmlFree((xmlChar *) xpresstr); + + pg_xml_done(xmlerrcxt, false); return xpres; } @@ -648,11 +772,16 @@ xpath_table(PG_FUNCTION_ARGS) for (j = 0; j < numpaths; j++) { ctxt = xmlXPathNewContext(doctree); + if (ctxt == NULL || pg_xml_error_occurred(xmlerrcxt)) + xml_ereport(xmlerrcxt, + ERROR, ERRCODE_OUT_OF_MEMORY, + "could not allocate XPath context"); + ctxt->node = xmlDocGetRootElement(doctree); /* compile the path */ comppath = xmlXPathCtxtCompile(ctxt, xpaths[j]); - if (comppath == NULL) + if (comppath == NULL || pg_xml_error_occurred(xmlerrcxt)) xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_ARGUMENT_FOR_XQUERY, "XPath Syntax Error"); @@ -671,6 +800,10 @@ xpath_table(PG_FUNCTION_ARGS) rownr < res->nodesetval->nodeNr) { resstr = xmlXPathCastNodeToString(res->nodesetval->nodeTab[rownr]); + if (resstr == NULL || pg_xml_error_occurred(xmlerrcxt)) + xml_ereport(xmlerrcxt, + ERROR, ERRCODE_OUT_OF_MEMORY, + "could not allocate result"); had_values = true; } else @@ -680,11 +813,19 @@ xpath_table(PG_FUNCTION_ARGS) case XPATH_STRING: resstr = xmlStrdup(res->stringval); + if (resstr == NULL || pg_xml_error_occurred(xmlerrcxt)) + xml_ereport(xmlerrcxt, + ERROR, ERRCODE_OUT_OF_MEMORY, + "could not allocate result"); break; default: elog(NOTICE, "unsupported XQuery result: %d", res->type); resstr = xmlStrdup((const xmlChar *) ""); + if (resstr == NULL || pg_xml_error_occurred(xmlerrcxt)) + xml_ereport(xmlerrcxt, + ERROR, ERRCODE_OUT_OF_MEMORY, + "could not allocate result"); } /* diff --git a/contrib/xml2/xslt_proc.c b/contrib/xml2/xslt_proc.c index b720d89f754ae..c8e7dd45ed5b4 100644 --- a/contrib/xml2/xslt_proc.c +++ b/contrib/xml2/xslt_proc.c @@ -58,7 +58,7 @@ xslt_process(PG_FUNCTION_ARGS) volatile xsltSecurityPrefsPtr xslt_sec_prefs = NULL; volatile xsltTransformContextPtr xslt_ctxt = NULL; volatile int resstat = -1; - xmlChar *resstr = NULL; + volatile xmlChar *resstr = NULL; int reslen = 0; if (fcinfo->nargs == 3) @@ -86,7 +86,7 @@ xslt_process(PG_FUNCTION_ARGS) VARSIZE_ANY_EXHDR(doct), NULL, NULL, XML_PARSE_NOENT); - if (doctree == NULL) + if (doctree == NULL || pg_xml_error_occurred(xmlerrcxt)) xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT, "error parsing XML document"); @@ -95,14 +95,14 @@ xslt_process(PG_FUNCTION_ARGS) VARSIZE_ANY_EXHDR(ssheet), NULL, NULL, XML_PARSE_NOENT); - if (ssdoc == NULL) + if (ssdoc == NULL || pg_xml_error_occurred(xmlerrcxt)) xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT, "error parsing stylesheet as XML document"); /* After this call we need not free ssdoc separately */ stylesheet = xsltParseStylesheetDoc(ssdoc); - if (stylesheet == NULL) + if (stylesheet == NULL || pg_xml_error_occurred(xmlerrcxt)) xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_ARGUMENT_FOR_XQUERY, "failed to parse stylesheet"); @@ -137,11 +137,15 @@ xslt_process(PG_FUNCTION_ARGS) restree = xsltApplyStylesheetUser(stylesheet, doctree, params, NULL, NULL, xslt_ctxt); - if (restree == NULL) + if (restree == NULL || pg_xml_error_occurred(xmlerrcxt)) xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_ARGUMENT_FOR_XQUERY, "failed to apply stylesheet"); - resstat = xsltSaveResultToString(&resstr, &reslen, restree, stylesheet); + resstat = xsltSaveResultToString((xmlChar **) &resstr, &reslen, + restree, stylesheet); + + if (resstat >= 0) + result = cstring_to_text_with_len((char *) resstr, reslen); } PG_CATCH(); { @@ -155,6 +159,8 @@ xslt_process(PG_FUNCTION_ARGS) xsltFreeStylesheet(stylesheet); if (doctree != NULL) xmlFreeDoc(doctree); + if (resstr != NULL) + xmlFree((xmlChar *) resstr); xsltCleanupGlobals(); pg_xml_done(xmlerrcxt, true); @@ -170,17 +176,15 @@ xslt_process(PG_FUNCTION_ARGS) xmlFreeDoc(doctree); xsltCleanupGlobals(); + if (resstr) + xmlFree((xmlChar *) resstr); + pg_xml_done(xmlerrcxt, false); /* XXX this is pretty dubious, really ought to throw error instead */ if (resstat < 0) PG_RETURN_NULL(); - result = cstring_to_text_with_len((char *) resstr, reslen); - - if (resstr) - xmlFree(resstr); - PG_RETURN_TEXT_P(result); #else /* !USE_LIBXSLT */ From 8fd9bb1d9654c59d40613232ad964e9a648e4202 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Tue, 1 Jul 2025 09:36:33 +0200 Subject: [PATCH 193/602] Enable MSVC conforming preprocessor Switch MSVC to use the conforming preprocessor, using the /Zc:preprocessor option. This allows us to drop the alternative implementation of VA_ARGS_NARGS() for the previous "traditional" preprocessor. This also prepares the way for enabling C11 mode in the future, which enables the conforming preprocessor by default. This now requires Visual Studio 2019. The installation documentation is adjusted accordingly. Discussion: https://www.postgresql.org/message-id/flat/01a69441-af54-4822-891b-ca28e05b215a%40eisentraut.org --- doc/src/sgml/installation.sgml | 8 ++------ meson.build | 4 ++++ src/include/c.h | 19 ------------------- 3 files changed, 6 insertions(+), 25 deletions(-) diff --git a/doc/src/sgml/installation.sgml b/doc/src/sgml/installation.sgml index de19f3ad92952..cb53530cc4fa8 100644 --- a/doc/src/sgml/installation.sgml +++ b/doc/src/sgml/installation.sgml @@ -3847,17 +3847,13 @@ make: *** [postgres] Error 1 Both 32-bit and 64-bit builds are possible with the Microsoft Compiler suite. 32-bit PostgreSQL builds are possible with - Visual Studio 2015 to + Visual Studio 2019 to Visual Studio 2022, as well as standalone Windows SDK releases 10 and above. 64-bit PostgreSQL builds are supported with Microsoft Windows SDK version 10 and above or - Visual Studio 2015 and above. + Visual Studio 2019 and above. Errors detected at semantic analysis or later, such as a misspelled table or column name, do not have this effect. + + + Lastly, note that all the statements within the Query message will + observe the same value of statement_timestamp(), + since that timestamp is updated only upon receipt of the Query + message. This will result in them all observing the same + value of transaction_timestamp() as well, + except in cases where the query string ends a previously-started + transaction and begins a new one. + From aad1617b76aef034a27f2a52903702dc5435c422 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Tue, 15 Jul 2025 18:11:18 -0400 Subject: [PATCH 310/602] Silence uninitialized-value warnings in compareJsonbContainers(). Because not every path through JsonbIteratorNext() sets val->type, some compilers complain that compareJsonbContainers() is comparing possibly-uninitialized values. The paths that don't set it return WJB_DONE, WJB_END_ARRAY, or WJB_END_OBJECT, so it's clear by manual inspection that the "(ra == rb)" code path is safe, and indeed we aren't seeing warnings about that. But the (ra != rb) case is much less obviously safe. In Assert-enabled builds it seems that the asserts rejecting WJB_END_ARRAY and WJB_END_OBJECT persuade gcc 15.x not to warn, which makes little sense because it's impossible to believe that the compiler can prove of its own accord that ra/rb aren't WJB_DONE here. (In fact they never will be, so the code isn't wrong, but why is there no warning?) Without Asserts, the appearance of warnings is quite unsurprising. We discussed fixing this by converting those two Asserts into pg_assume, but that seems not very satisfactory when it's so unclear why the compiler is or isn't warning: the warning could easily reappear with some other compiler version. Let's fix it in a less magical, more future-proof way by changing JsonbIteratorNext() so that it always does set val->type. The cost of that should be pretty negligible, and it makes the function's API spec less squishy. Reported-by: Erik Rijkers Author: Tom Lane Reviewed-by: Andres Freund Discussion: https://postgr.es/m/988bf1bc-3f1f-99f3-bf98-222f1cd9dc5e@xs4all.nl Discussion: https://postgr.es/m/0c623e8a204187b87b4736792398eaf1@postgrespro.ru Backpatch-through: 13 --- src/backend/utils/adt/jsonb_util.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/src/backend/utils/adt/jsonb_util.c b/src/backend/utils/adt/jsonb_util.c index c8b6c15e05975..136952861e14f 100644 --- a/src/backend/utils/adt/jsonb_util.c +++ b/src/backend/utils/adt/jsonb_util.c @@ -277,9 +277,6 @@ compareJsonbContainers(JsonbContainer *a, JsonbContainer *b) else { /* - * It's safe to assume that the types differed, and that the va - * and vb values passed were set. - * * If the two values were of the same container type, then there'd * have been a chance to observe the variation in the number of * elements/pairs (when processing WJB_BEGIN_OBJECT, say). They're @@ -852,15 +849,20 @@ JsonbIteratorInit(JsonbContainer *container) * It is our job to expand the jbvBinary representation without bothering them * with it. However, clients should not take it upon themselves to touch array * or Object element/pair buffers, since their element/pair pointers are - * garbage. Also, *val will not be set when returning WJB_END_ARRAY or - * WJB_END_OBJECT, on the assumption that it's only useful to access values - * when recursing in. + * garbage. + * + * *val is not meaningful when the result is WJB_DONE, WJB_END_ARRAY or + * WJB_END_OBJECT. However, we set val->type = jbvNull in those cases, + * so that callers may assume that val->type is always well-defined. */ JsonbIteratorToken JsonbIteratorNext(JsonbIterator **it, JsonbValue *val, bool skipNested) { if (*it == NULL) + { + val->type = jbvNull; return WJB_DONE; + } /* * When stepping into a nested container, we jump back here to start @@ -898,6 +900,7 @@ JsonbIteratorNext(JsonbIterator **it, JsonbValue *val, bool skipNested) * nesting). */ *it = freeAndGetParent(*it); + val->type = jbvNull; return WJB_END_ARRAY; } @@ -951,6 +954,7 @@ JsonbIteratorNext(JsonbIterator **it, JsonbValue *val, bool skipNested) * of nesting). */ *it = freeAndGetParent(*it); + val->type = jbvNull; return WJB_END_OBJECT; } else @@ -995,8 +999,10 @@ JsonbIteratorNext(JsonbIterator **it, JsonbValue *val, bool skipNested) return WJB_VALUE; } - elog(ERROR, "invalid iterator state"); - return -1; + elog(ERROR, "invalid jsonb iterator state"); + /* satisfy compilers that don't know that elog(ERROR) doesn't return */ + val->type = jbvNull; + return WJB_DONE; } /* From 2a3a3964328a0b6b0cb278ae6cb595772586d654 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Tue, 15 Jul 2025 18:21:12 -0400 Subject: [PATCH 311/602] Clarify the ra != rb case in compareJsonbContainers(). It's impossible to reach this case with either ra or rb being WJB_DONE, because our earlier checks that the structure and length of the inputs match should guarantee that we reach their ends simultaneously. However, the comment completely fails to explain this, and the Asserts don't cover it either. The comment is pretty obscure anyway, so rewrite it, and extend the Asserts to reject WJB_DONE. This is only cosmetic, so no need for back-patch. Author: Tom Lane Discussion: https://postgr.es/m/0c623e8a204187b87b4736792398eaf1@postgrespro.ru --- src/backend/utils/adt/jsonb_util.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/src/backend/utils/adt/jsonb_util.c b/src/backend/utils/adt/jsonb_util.c index 136952861e14f..82b807d067a34 100644 --- a/src/backend/utils/adt/jsonb_util.c +++ b/src/backend/utils/adt/jsonb_util.c @@ -277,19 +277,16 @@ compareJsonbContainers(JsonbContainer *a, JsonbContainer *b) else { /* - * If the two values were of the same container type, then there'd - * have been a chance to observe the variation in the number of - * elements/pairs (when processing WJB_BEGIN_OBJECT, say). They're - * either two heterogeneously-typed containers, or a container and - * some scalar type. - * - * We don't have to consider the WJB_END_ARRAY and WJB_END_OBJECT - * cases here, because we would have seen the corresponding - * WJB_BEGIN_ARRAY and WJB_BEGIN_OBJECT tokens first, and - * concluded that they don't match. + * It's not possible for one iterator to report end of array or + * object while the other one reports something else, because we + * would have detected a length mismatch when we processed the + * container-start tokens above. Likewise we can't see WJB_DONE + * from one but not the other. So we have two different-type + * containers, or a container and some scalar type, or two + * different scalar types. Sort on the basis of the type code. */ - Assert(ra != WJB_END_ARRAY && ra != WJB_END_OBJECT); - Assert(rb != WJB_END_ARRAY && rb != WJB_END_OBJECT); + Assert(ra != WJB_DONE && ra != WJB_END_ARRAY && ra != WJB_END_OBJECT); + Assert(rb != WJB_DONE && rb != WJB_END_ARRAY && rb != WJB_END_OBJECT); Assert(va.type != vb.type); Assert(va.type != jbvBinary); From 5fe55a0fe40e801c77d8b2541caaaca49e67a75f Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Tue, 15 Jul 2025 18:53:00 -0400 Subject: [PATCH 312/602] Doc: clarify description of regexp fields in pg_ident.conf. The grammar was a little shaky and confusing here, so word-smith it a bit. Also, adjust the comments in pg_ident.conf.sample to use the same terminology as the SGML docs, in particular "DATABASE-USERNAME" not "PG-USERNAME". Back-patch appropriate subsets. I did not risk changing pg_ident.conf.sample in released branches, but it still seems OK to change it in v18. Reported-by: Alexey Shishkin Author: Tom Lane Reviewed-by: David G. Johnston Discussion: https://postgr.es/m/175206279327.3157504.12519088928605422253@wrigleys.postgresql.org Backpatch-through: 13 --- doc/src/sgml/client-auth.sgml | 16 ++++++++-------- src/backend/libpq/pg_ident.conf.sample | 26 +++++++++++++------------- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/doc/src/sgml/client-auth.sgml b/doc/src/sgml/client-auth.sgml index 832b616a7bbff..51b95ed04f399 100644 --- a/doc/src/sgml/client-auth.sgml +++ b/doc/src/sgml/client-auth.sgml @@ -1003,8 +1003,9 @@ local db1,db2,@demodbs all md5 the remainder of the field is treated as a regular expression. (See for details of PostgreSQL's regular expression syntax.) The regular - expression can include a single capture, or parenthesized subexpression, - which can then be referenced in the database-username + expression can include a single capture, or parenthesized subexpression. + The portion of the system user name that matched the capture can then + be referenced in the database-username field as \1 (backslash-one). This allows the mapping of multiple user names in a single line, which is particularly useful for simple syntax substitutions. For example, these entries @@ -1022,12 +1023,11 @@ mymap /^(.*)@otherdomain\.com$ guest If the database-username field starts with a slash (/), the remainder of the field is treated - as a regular expression (see - for details of PostgreSQL's regular - expression syntax). It is not possible to use \1 - to use a capture from regular expression on - system-username for a regular expression - on database-username. + as a regular expression. + When the database-username field is a regular + expression, it is not possible to use \1 within it to + refer to a capture from the system-username + field. diff --git a/src/backend/libpq/pg_ident.conf.sample b/src/backend/libpq/pg_ident.conf.sample index f5225f26cdf2c..8ee6c0ba31576 100644 --- a/src/backend/libpq/pg_ident.conf.sample +++ b/src/backend/libpq/pg_ident.conf.sample @@ -13,25 +13,25 @@ # user names to their corresponding PostgreSQL user names. Records # are of the form: # -# MAPNAME SYSTEM-USERNAME PG-USERNAME +# MAPNAME SYSTEM-USERNAME DATABASE-USERNAME # # (The uppercase quantities must be replaced by actual values.) # # MAPNAME is the (otherwise freely chosen) map name that was used in # pg_hba.conf. SYSTEM-USERNAME is the detected user name of the -# client. PG-USERNAME is the requested PostgreSQL user name. The -# existence of a record specifies that SYSTEM-USERNAME may connect as -# PG-USERNAME. +# client. DATABASE-USERNAME is the requested PostgreSQL user name. +# The existence of a record specifies that SYSTEM-USERNAME may connect +# as DATABASE-USERNAME. # -# If SYSTEM-USERNAME starts with a slash (/), it will be treated as a -# regular expression. Optionally this can contain a capture (a -# parenthesized subexpression). The substring matching the capture -# will be substituted for \1 (backslash-one) if present in -# PG-USERNAME. +# If SYSTEM-USERNAME starts with a slash (/), the rest of it will be +# treated as a regular expression. Optionally this can contain a capture +# (a parenthesized subexpression). The substring matching the capture +# will be substituted for \1 (backslash-one) if that appears in +# DATABASE-USERNAME. # -# PG-USERNAME can be "all", a user name, a group name prefixed with "+", or -# a regular expression (if it starts with a slash (/)). If it is a regular -# expression, the substring matching with \1 has no effect. +# DATABASE-USERNAME can be "all", a user name, a group name prefixed with "+", +# or a regular expression (if it starts with a slash (/)). If it is a regular +# expression, no substitution for \1 will occur. # # Multiple maps may be specified in this file and used by pg_hba.conf. # @@ -69,4 +69,4 @@ # Put your actual configuration here # ---------------------------------- -# MAPNAME SYSTEM-USERNAME PG-USERNAME +# MAPNAME SYSTEM-USERNAME DATABASE-USERNAME From 7c3b591af3d83520789cf80a74624125357c6918 Mon Sep 17 00:00:00 2001 From: Fujii Masao Date: Wed, 16 Jul 2025 08:03:36 +0900 Subject: [PATCH 313/602] doc: Clarify that total_vacuum_time excludes VACUUM FULL. The last_vacuum and vacuum_count fields in pg_stat_all_tables already state that they do not include VACUUM FULL. However, total_vacuum_time, which also excludes VACUUM FULL, did not mention this. This could mislead users into thinking VACUUM FULL time is included. To address this, this commit updates the documentation for pg_stat_all_tables to explicitly state that total_vacuum_time does not count VACUUM FULL. Back-patched to v18, where total_vacuum_time was introduced. Additionally, this commit clarifies that n_ins_since_vacuum also excludes VACUUM FULL. Although n_ins_since_vacuum was added in v13, we are not back-patching this change to stable branches, as it is a documentation improvement, not a bug fix. Author: Fujii Masao Reviewed-by: Robert Treat Reviewed-by: David G. Johnston Reviewed-by: Laurenz Albe Discussion: https://postgr.es/m/2ac375d1-591b-4f1b-a2af-f24335567866@oss.nttdata.com Backpatch-through: 18 --- doc/src/sgml/monitoring.sgml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml index 4265a22d4de35..823afe1b30b22 100644 --- a/doc/src/sgml/monitoring.sgml +++ b/doc/src/sgml/monitoring.sgml @@ -3980,6 +3980,7 @@ description | Waiting for a newly initialized WAL file to reach durable storage Estimated number of rows inserted since this table was last vacuumed + (not counting VACUUM FULL) @@ -4066,7 +4067,8 @@ description | Waiting for a newly initialized WAL file to reach durable storage total_vacuum_time double precision - Total time this table has been manually vacuumed, in milliseconds. + Total time this table has been manually vacuumed, in milliseconds + (not counting VACUUM FULL). (This includes the time spent sleeping due to cost-based delays.) From 09fcc652fefdb58e9cb729e8a604607d8805140c Mon Sep 17 00:00:00 2001 From: Fujii Masao Date: Wed, 16 Jul 2025 08:32:52 +0900 Subject: [PATCH 314/602] doc: Fix confusing description of streaming option in START_REPLICATION. Previously, the documentation described the streaming option as a boolean, which is outdated since it's no longer a boolean as of protocol version 4. This could confuse users. This commit updates the description to remove the "boolean" reference and clearly list the valid values for the streaming option. Back-patch to v16, where the streaming option changed to a non-boolean. Author: Euler Taveira Reviewed-by: Fujii Masao Discussion: https://postgr.es/m/8d21fb98-5c25-4dee-8387-e5a62b01ea7d@app.fastmail.com Backpatch-through: 16 --- doc/src/sgml/protocol.sgml | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/doc/src/sgml/protocol.sgml b/doc/src/sgml/protocol.sgml index 1b38e5d725ae7..6c14da0cc7685 100644 --- a/doc/src/sgml/protocol.sgml +++ b/doc/src/sgml/protocol.sgml @@ -3514,11 +3514,13 @@ psql "dbname=postgres replication=database" -c "IDENTIFY_SYSTEM;" - Boolean option to enable streaming of in-progress transactions. - It accepts an additional value "parallel" to enable sending extra - information with some messages to be used for parallelisation. - Minimum protocol version 2 is required to turn it on. Minimum protocol - version 4 is required for the "parallel" option. + Option to enable streaming of in-progress transactions. Valid values are + off (the default), on and + parallel. The setting parallel + enables sending extra information with some messages to be used for + parallelization. Minimum protocol version 2 is required to turn it + on. Minimum protocol version 4 is required for the + parallel value. From d8425811b681ea5ba48a36235de5e1332e92685c Mon Sep 17 00:00:00 2001 From: Fujii Masao Date: Wed, 16 Jul 2025 08:51:04 +0900 Subject: [PATCH 315/602] doc: Document default values for pgoutput options in protocol.sgml. The pgoutput plugin options are described in the logical streaming replication protocol documentation, but their default values were previously not mentioned. This made it less convenient for users, for example, when specifying those options to use pg_recvlogical with pgoutput plugin. This commit adds the explanations of the default values for pgoutput options to improve clarity and usability. Author: Fujii Masao Reviewed-by: Euler Taveira Reviewed-by: Amit Kapila Discussion: https://postgr.es/m/d2790f10-238d-4cb5-a743-d9d2a9dd900f@oss.nttdata.com --- doc/src/sgml/protocol.sgml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/doc/src/sgml/protocol.sgml b/doc/src/sgml/protocol.sgml index 6c14da0cc7685..e74b5be1effdd 100644 --- a/doc/src/sgml/protocol.sgml +++ b/doc/src/sgml/protocol.sgml @@ -3492,6 +3492,7 @@ psql "dbname=postgres replication=database" -c "IDENTIFY_SYSTEM;" Boolean option to use binary transfer mode. Binary mode is faster than the text mode but slightly less robust. + The default is off. @@ -3504,6 +3505,7 @@ psql "dbname=postgres replication=database" -c "IDENTIFY_SYSTEM;" Boolean option to enable sending the messages that are written by pg_logical_emit_message. + The default is off. @@ -3533,6 +3535,7 @@ psql "dbname=postgres replication=database" -c "IDENTIFY_SYSTEM;" Boolean option to enable two-phase transactions. Minimum protocol version 3 is required to turn it on. + The default is off. @@ -3549,6 +3552,7 @@ psql "dbname=postgres replication=database" -c "IDENTIFY_SYSTEM;" to send the changes regardless of their origin. This can be used to avoid loops (infinite replication of the same data) among replication nodes. + The default is any. From b8341ae856f239c6d84c738e516267e890969d8a Mon Sep 17 00:00:00 2001 From: Fujii Masao Date: Wed, 16 Jul 2025 10:31:51 +0900 Subject: [PATCH 316/602] pgoutput: Initialize missing default for "origin" parameter. The pgoutput plugin initializes optional parameters like "binary" with default values at the start of processing. However, the "origin" parameter was previously missed and left without explicit initialization. Although the PGOutputData struct, which holds these settings, is zero-initialized at allocation (resulting in publish_no_origin field for "origin" parameter being false by default), this default was not set explicitly, unlike other parameters. This commit adds explicit initialization of the "origin" parameter to ensure consistency and clarity in how defaults are handled. Author: Fujii Masao Reviewed-by: Euler Taveira Discussion: https://postgr.es/m/d2790f10-238d-4cb5-a743-d9d2a9dd900f@oss.nttdata.com --- src/backend/replication/pgoutput/pgoutput.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/backend/replication/pgoutput/pgoutput.c b/src/backend/replication/pgoutput/pgoutput.c index 082b4d9d32798..f4c977262c5a4 100644 --- a/src/backend/replication/pgoutput/pgoutput.c +++ b/src/backend/replication/pgoutput/pgoutput.c @@ -297,10 +297,12 @@ parse_output_parameters(List *options, PGOutputData *data) bool two_phase_option_given = false; bool origin_option_given = false; + /* Initialize optional parameters to defaults */ data->binary = false; data->streaming = LOGICALREP_STREAM_OFF; data->messages = false; data->two_phase = false; + data->publish_no_origin = false; foreach(lc, options) { From 1dbe6f76677c26096518998fdc72dab771a98913 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Wed, 16 Jul 2025 11:59:22 +0900 Subject: [PATCH 317/602] Refactor non-supported compression error message in toast_compression.c This code used a NO_LZ4_SUPPORT() macro to issue an error in the code paths where LZ4 [de]compression is attempted but the build does not support it. This commit refactors the code to use a more flexible error message so as it can be used for other compression methods, where the method is given in input of macro. Extracted from a larger patch by the same author. Author: Nikhil Kumar Veldanda Reviewed-by: Kirill Reshke Discussion: https://postgr.es/m/CAFAfj_HX84EK4hyRYw50AOHOcdVi-+FFwAAPo7JHx4aShCvunQ@mail.gmail.com --- src/backend/access/common/toast_compression.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/backend/access/common/toast_compression.c b/src/backend/access/common/toast_compression.c index 21f2f4af97e3f..926f1e4008abe 100644 --- a/src/backend/access/common/toast_compression.c +++ b/src/backend/access/common/toast_compression.c @@ -25,11 +25,11 @@ /* GUC */ int default_toast_compression = TOAST_PGLZ_COMPRESSION; -#define NO_LZ4_SUPPORT() \ +#define NO_COMPRESSION_SUPPORT(method) \ ereport(ERROR, \ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), \ - errmsg("compression method lz4 not supported"), \ - errdetail("This functionality requires the server to be built with lz4 support."))) + errmsg("compression method %s not supported", method), \ + errdetail("This functionality requires the server to be built with %s support.", method))) /* * Compress a varlena using PGLZ. @@ -139,7 +139,7 @@ struct varlena * lz4_compress_datum(const struct varlena *value) { #ifndef USE_LZ4 - NO_LZ4_SUPPORT(); + NO_COMPRESSION_SUPPORT("lz4"); return NULL; /* keep compiler quiet */ #else int32 valsize; @@ -182,7 +182,7 @@ struct varlena * lz4_decompress_datum(const struct varlena *value) { #ifndef USE_LZ4 - NO_LZ4_SUPPORT(); + NO_COMPRESSION_SUPPORT("lz4"); return NULL; /* keep compiler quiet */ #else int32 rawsize; @@ -215,7 +215,7 @@ struct varlena * lz4_decompress_datum_slice(const struct varlena *value, int32 slicelength) { #ifndef USE_LZ4 - NO_LZ4_SUPPORT(); + NO_COMPRESSION_SUPPORT("lz4"); return NULL; /* keep compiler quiet */ #else int32 rawsize; @@ -289,7 +289,7 @@ CompressionNameToMethod(const char *compression) else if (strcmp(compression, "lz4") == 0) { #ifndef USE_LZ4 - NO_LZ4_SUPPORT(); + NO_COMPRESSION_SUPPORT("lz4"); #endif return TOAST_LZ4_COMPRESSION; } From ecc5161a0bd3cba8dda7ece98e0848856b97b7a1 Mon Sep 17 00:00:00 2001 From: Nathan Bossart Date: Wed, 16 Jul 2025 11:50:34 -0500 Subject: [PATCH 318/602] psql: Fix note on project naming in output of \copyright. This adjusts the wording to match the changes in commits 5987553fde, a233a603ba, and pgweb commit 2d764dbc08. Reviewed-by: Tom Lane Reviewed-by: Daniel Gustafsson Discussion: https://postgr.es/m/aHVo791guQR6uqwT%40nathan Backpatch-through: 13 --- src/bin/psql/help.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bin/psql/help.c b/src/bin/psql/help.c index a2e009ab9bea7..8c62729a0d124 100644 --- a/src/bin/psql/help.c +++ b/src/bin/psql/help.c @@ -748,7 +748,7 @@ void print_copyright(void) { puts("PostgreSQL Database Management System\n" - "(formerly known as Postgres, then as Postgres95)\n\n" + "(also known as Postgres, formerly known as Postgres95)\n\n" "Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group\n\n" "Portions Copyright (c) 1994, The Regents of the University of California\n\n" "Permission to use, copy, modify, and distribute this software and its\n" From 48c2c7b4b45b3bb696d566f4f425fccdd871532f Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Wed, 16 Jul 2025 09:57:12 -0700 Subject: [PATCH 319/602] pg_dumpall: Skip global objects with --statistics-only or --no-schema. Previously, pg_dumpall would still dump global objects such as roles and tablespaces even when --statistics-only or --no-schema was specified. Since these global objects are treated as schema-level data, they should be skipped in these cases. This commit fixes the issue by ensuring that global objects are not dumped when either --statistics-only or --no-schema is used. Author: Fujii Masao Reviewed-by: Corey Huinker Discussion: https://postgr.es/m/08129593-6f3c-4fb9-94b7-5aa2eefb99b0@oss.nttdata.com Backpatch-through: 18 --- src/bin/pg_dump/pg_dumpall.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bin/pg_dump/pg_dumpall.c b/src/bin/pg_dump/pg_dumpall.c index 3cbcad65c5fb5..100317b1aa949 100644 --- a/src/bin/pg_dump/pg_dumpall.c +++ b/src/bin/pg_dump/pg_dumpall.c @@ -632,7 +632,7 @@ main(int argc, char *argv[]) fprintf(OPF, "SET escape_string_warning = off;\n"); fprintf(OPF, "\n"); - if (!data_only) + if (!data_only && !statistics_only && !no_schema) { /* * If asked to --clean, do that first. We can avoid detailed From 4c8ad67a98b5d84c1ca00a26d53d08f2d2b881aa Mon Sep 17 00:00:00 2001 From: Peter Geoghegan Date: Wed, 16 Jul 2025 13:05:44 -0400 Subject: [PATCH 320/602] nbtree: Use only one notnullkey ScanKeyData. _bt_first need only store one ScanKeyData struct on the stack for the purposes of building an IS NOT NULL key based on an implied NOT NULL constraint. We don't need INDEX_MAX_KEYS-many ScanKeyData structs. This saves us a little over 2KB in stack space. It's possible that this has some performance benefit. It also seems simpler and more direct. It isn't possible for more than a single index attribute to need its own implied IS NOT NULL key: the first such attribute/IS NOT NULL key always makes _bt_first stop adding additional boundary keys to startKeys[]. Using INDEX_MAX_KEYS-many ScanKeyData entries was (at best) misleading. Author: Peter Geoghegan Reviewed-By: Mircea Cadariu Discussion: https://postgr.es/m/CAH2-Wzm=1kJMSZhhTLoM5BPbwQNWxUj-ynOEh=89ptDZAVgauw@mail.gmail.com --- src/backend/access/nbtree/nbtsearch.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c index 4af1ff1e9e5e3..d69798795b43b 100644 --- a/src/backend/access/nbtree/nbtsearch.c +++ b/src/backend/access/nbtree/nbtsearch.c @@ -892,9 +892,9 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) OffsetNumber offnum; BTScanInsertData inskey; ScanKey startKeys[INDEX_MAX_KEYS]; - ScanKeyData notnullkeys[INDEX_MAX_KEYS]; + ScanKeyData notnullkey; int keysz = 0; - StrategyNumber strat_total; + StrategyNumber strat_total = InvalidStrategy; BlockNumber blkno = InvalidBlockNumber, lastcurrblkno; @@ -1034,7 +1034,6 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) * need to be kept in sync. *---------- */ - strat_total = BTEqualStrategyNumber; if (so->numberOfKeys > 0) { AttrNumber curattr; @@ -1122,16 +1121,15 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) ScanDirectionIsForward(dir) : ScanDirectionIsBackward(dir))) { - /* Yes, so build the key in notnullkeys[keysz] */ - bkey = ¬nullkeys[keysz]; + /* Final startKeys[] entry will be deduced NOT NULL key */ + bkey = ¬nullkey; ScanKeyEntryInitialize(bkey, (SK_SEARCHNOTNULL | SK_ISNULL | (impliesNN->sk_flags & (SK_BT_DESC | SK_BT_NULLS_FIRST))), curattr, - ((impliesNN->sk_flags & SK_BT_NULLS_FIRST) ? - BTGreaterStrategyNumber : - BTLessStrategyNumber), + ScanDirectionIsForward(dir) ? + BTGreaterStrategyNumber : BTLessStrategyNumber, InvalidOid, InvalidOid, InvalidOid, From 0858f0f96ebb891c8960994f023ed5a17b758a38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Herrera?= Date: Wed, 16 Jul 2025 19:22:53 +0200 Subject: [PATCH 321/602] Fix dumping of comments on invalid constraints on domains We skip dumping constraints together with domains if they are invalid ('separate') so that they appear after data -- but their comments were dumped together with the domain definition, which in effect leads to the comment being dumped when the constraint does not yet exist. Delay them in the same way. Oversight in 7eca575d1c28; backpatch all the way back. Author: jian he Discussion: https://postgr.es/m/CACJufxF_C2pe6J_+nPr6C5jf5rQnbYP8XOKr4HM8yHZtp2aQqQ@mail.gmail.com --- src/bin/pg_dump/pg_dump.c | 23 ++++++++++++++++++++++- src/test/regress/expected/constraints.out | 4 ++++ src/test/regress/sql/constraints.sql | 6 ++++++ 3 files changed, 32 insertions(+), 1 deletion(-) diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 1937997ea674d..c6226175528bb 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -12583,8 +12583,13 @@ dumpDomain(Archive *fout, const TypeInfo *tyinfo) for (i = 0; i < tyinfo->nDomChecks; i++) { ConstraintInfo *domcheck = &(tyinfo->domChecks[i]); - PQExpBuffer conprefix = createPQExpBuffer(); + PQExpBuffer conprefix; + /* but only if the constraint itself was dumped here */ + if (domcheck->separate) + continue; + + conprefix = createPQExpBuffer(); appendPQExpBuffer(conprefix, "CONSTRAINT %s ON DOMAIN", fmtId(domcheck->dobj.name)); @@ -18488,6 +18493,22 @@ dumpConstraint(Archive *fout, const ConstraintInfo *coninfo) .section = SECTION_POST_DATA, .createStmt = q->data, .dropStmt = delq->data)); + + if (coninfo->dobj.dump & DUMP_COMPONENT_COMMENT) + { + PQExpBuffer conprefix = createPQExpBuffer(); + char *qtypname = pg_strdup(fmtId(tyinfo->dobj.name)); + + appendPQExpBuffer(conprefix, "CONSTRAINT %s ON DOMAIN", + fmtId(coninfo->dobj.name)); + + dumpComment(fout, conprefix->data, qtypname, + tyinfo->dobj.namespace->dobj.name, + tyinfo->rolname, + coninfo->dobj.catId, 0, tyinfo->dobj.dumpId); + destroyPQExpBuffer(conprefix); + free(qtypname); + } } } else diff --git a/src/test/regress/expected/constraints.out b/src/test/regress/expected/constraints.out index ccea883cffd65..3590d3274f05a 100644 --- a/src/test/regress/expected/constraints.out +++ b/src/test/regress/expected/constraints.out @@ -1701,3 +1701,7 @@ DROP TABLE constraint_comments_tbl; DROP DOMAIN constraint_comments_dom; DROP ROLE regress_constraint_comments; DROP ROLE regress_constraint_comments_noaccess; +-- Leave some constraints for the pg_upgrade test to pick up +CREATE DOMAIN constraint_comments_dom AS int; +ALTER DOMAIN constraint_comments_dom ADD CONSTRAINT inv_ck CHECK (value > 0) NOT VALID; +COMMENT ON CONSTRAINT inv_ck ON DOMAIN constraint_comments_dom IS 'comment on invalid constraint'; diff --git a/src/test/regress/sql/constraints.sql b/src/test/regress/sql/constraints.sql index 7487723ab8437..1f6dc8fd69f6d 100644 --- a/src/test/regress/sql/constraints.sql +++ b/src/test/regress/sql/constraints.sql @@ -1043,3 +1043,9 @@ DROP DOMAIN constraint_comments_dom; DROP ROLE regress_constraint_comments; DROP ROLE regress_constraint_comments_noaccess; + +-- Leave some constraints for the pg_upgrade test to pick up +CREATE DOMAIN constraint_comments_dom AS int; + +ALTER DOMAIN constraint_comments_dom ADD CONSTRAINT inv_ck CHECK (value > 0) NOT VALID; +COMMENT ON CONSTRAINT inv_ck ON DOMAIN constraint_comments_dom IS 'comment on invalid constraint'; From 5e6e42e44fe10cab616b4fbe9725df03c987c90a Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Wed, 16 Jul 2025 14:13:18 -0700 Subject: [PATCH 322/602] Force LC_COLLATE to C in postmaster. Avoid dependence on setlocale(). strcoll(), etc., are not called directly; all collation-sensitive calls should go through pg_locale.c and use the appropriate provider. By setting LC_COLLATE to C, we avoid accidentally depending on libc behavior when using a different provider. No behavior change in the backend, but it's possible that some extensions will be affected. Such extensions should be updated to use the pg_locale_t APIs. Discussion: https://postgr.es/m/9875f7f9-50f1-4b5d-86fc-ee8b03e8c162@eisentraut.org Reviewed-by: Peter Eisentraut --- doc/src/sgml/catalogs.sgml | 2 +- doc/src/sgml/charset.sgml | 2 +- doc/src/sgml/ref/create_database.sgml | 45 ++++++++++++++------------- doc/src/sgml/ref/createdb.sgml | 3 +- src/backend/main/main.c | 16 ++++++---- src/backend/utils/init/postinit.c | 11 +++---- 6 files changed, 42 insertions(+), 37 deletions(-) diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index aa5b8772436c6..0d23bc1b122b6 100644 --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -3158,7 +3158,7 @@ SCRAM-SHA-256$<iteration count>:&l datcollate text - LC_COLLATE for this database + LC_COLLATE for this database (ignored unless datlocprovider is c) diff --git a/doc/src/sgml/charset.sgml b/doc/src/sgml/charset.sgml index 5a0e97f6f3158..59b27c3c370e2 100644 --- a/doc/src/sgml/charset.sgml +++ b/doc/src/sgml/charset.sgml @@ -100,7 +100,7 @@ initdb --locale=sv_SE LC_COLLATE - String sort order + String sort order (ignored unless the provider is libc) LC_CTYPE diff --git a/doc/src/sgml/ref/create_database.sgml b/doc/src/sgml/ref/create_database.sgml index 4da8aeebb50a2..3544b15efdafa 100644 --- a/doc/src/sgml/ref/create_database.sgml +++ b/doc/src/sgml/ref/create_database.sgml @@ -150,12 +150,12 @@ CREATE DATABASE name Sets the default collation order and character classification in the new database. Collation affects the sort order applied to strings, - e.g., in queries with ORDER BY, as well as the order used in indexes - on text columns. Character classification affects the categorization - of characters, e.g., lower, upper, and digit. Also sets the - associated aspects of the operating system environment, - LC_COLLATE and LC_CTYPE. The - default is the same setting as the template database. See ORDER BY, as well as the + order used in indexes on text columns. Character classification + affects the categorization of characters, e.g., lower, upper, and + digit. Also sets the LC_CTYPE aspect of the + operating system environment. The default is the same setting as the + template database. See and for details. @@ -189,17 +189,16 @@ CREATE DATABASE name lc_collate - Sets LC_COLLATE in the database server's operating - system environment. The default is the setting of if specified, otherwise the same - setting as the template database. See below for additional - restrictions. + If is + libc, sets the default collation order to use in + the new database, overriding the setting . Otherwise, this setting is + ignored. - If is - libc, also sets the default collation order to use - in the new database, overriding the setting . + The default is the setting of + if specified, otherwise the same setting as the template database. + See below for additional restrictions. @@ -208,16 +207,18 @@ CREATE DATABASE name Sets LC_CTYPE in the database server's operating - system environment. The default is the setting of if specified, otherwise the same - setting as the template database. See below for additional - restrictions. + system environment. If is - libc, also sets the default character - classification to use in the new database, overriding the setting - . + libc, sets the default character classification to + use in the new database, overriding the setting . + + + The default is the setting of + if specified, otherwise the same setting as the template database. + See below for additional restrictions. diff --git a/doc/src/sgml/ref/createdb.sgml b/doc/src/sgml/ref/createdb.sgml index 5c4e0465ed9da..2ccbe13f39008 100644 --- a/doc/src/sgml/ref/createdb.sgml +++ b/doc/src/sgml/ref/createdb.sgml @@ -136,7 +136,8 @@ PostgreSQL documentation - Specifies the LC_COLLATE setting to be used in this database. + Specifies the LC_COLLATE setting to be used in this database (ignored + unless the locale provider is libc). diff --git a/src/backend/main/main.c b/src/backend/main/main.c index 7d63cf94a6b44..bdcb5e4f26159 100644 --- a/src/backend/main/main.c +++ b/src/backend/main/main.c @@ -125,13 +125,17 @@ main(int argc, char *argv[]) set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("postgres")); /* - * In the postmaster, absorb the environment values for LC_COLLATE and - * LC_CTYPE. Individual backends will change these later to settings - * taken from pg_database, but the postmaster cannot do that. If we leave - * these set to "C" then message localization might not work well in the - * postmaster. + * Collation is handled by pg_locale.c, and the behavior is dependent on + * the provider. strcoll(), etc., should not be called directly. + */ + init_locale("LC_COLLATE", LC_COLLATE, "C"); + + /* + * In the postmaster, absorb the environment value for LC_CTYPE. + * Individual backends will change it later to pg_database.datctype, but + * the postmaster cannot do that. If we leave it set to "C" then message + * localization might not work well in the postmaster. */ - init_locale("LC_COLLATE", LC_COLLATE, ""); init_locale("LC_CTYPE", LC_CTYPE, ""); /* diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c index c86ceefda940b..641e535a73c7c 100644 --- a/src/backend/utils/init/postinit.c +++ b/src/backend/utils/init/postinit.c @@ -417,12 +417,11 @@ CheckMyDatabase(const char *name, bool am_superuser, bool override_allow_connect datum = SysCacheGetAttrNotNull(DATABASEOID, tup, Anum_pg_database_datctype); ctype = TextDatumGetCString(datum); - if (pg_perm_setlocale(LC_COLLATE, collate) == NULL) - ereport(FATAL, - (errmsg("database locale is incompatible with operating system"), - errdetail("The database was initialized with LC_COLLATE \"%s\", " - " which is not recognized by setlocale().", collate), - errhint("Recreate the database with another locale or install the missing locale."))); + /* + * Historcally, we set LC_COLLATE from datcollate, as well. That's no + * longer necessary because all collation behavior is handled through + * pg_locale_t. + */ if (pg_perm_setlocale(LC_CTYPE, ctype) == NULL) ereport(FATAL, From f6ffbeda00e08c4c8ac8cf72173f84157491bfde Mon Sep 17 00:00:00 2001 From: Daniel Gustafsson Date: Thu, 17 Jul 2025 00:21:18 +0200 Subject: [PATCH 323/602] doc: Add example file for COPY The paragraph for introducing INSERT and COPY discussed how a file could be used for bulk loading with COPY, without actually showing what the file would look like. This adds a programlisting for the file contents. Backpatch to all supported branches since this example has lacked the file contents since PostgreSQL 7.2. Author: Daniel Gustafsson Reviewed-by: Fujii Masao Reviewed-by: Tom Lane Discussion: https://postgr.es/m/158017814191.19852.15019251381150731439@wrigleys.postgresql.org Backpatch-through: 13 --- doc/src/sgml/query.sgml | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/doc/src/sgml/query.sgml b/doc/src/sgml/query.sgml index 727a0cb185fb2..b190f28d41ea6 100644 --- a/doc/src/sgml/query.sgml +++ b/doc/src/sgml/query.sgml @@ -264,8 +264,18 @@ COPY weather FROM '/home/user/weather.txt'; where the file name for the source file must be available on the machine running the backend process, not the client, since the backend process - reads the file directly. You can read more about the - COPY command in . + reads the file directly. The data inserted above into the weather table + could also be inserted from a file containing (values are separated by a + tab character): + + +San Francisco 46 50 0.25 1994-11-27 +San Francisco 43 57 0.0 1994-11-29 +Hayward 37 54 \N 1994-11-29 + + + You can read more about the COPY command in + . From a493e741d32b7580abe4d0dcc444fcedd8feec6e Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Thu, 17 Jul 2025 09:30:26 +0900 Subject: [PATCH 324/602] Fix inconsistent LWLock tranche names for MultiXact* MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The terms used in wait_event_names.txt and lwlock.c were inconsistent for MultiXactOffsetSLRU and MultiXactMemberSLRU, which could cause joins between pg_wait_events and pg_stat_activity to fail. lwlock.c is adjusted in this commit to what the historical name of the event has always been, and what is documented. Oversight in 53c2a97a9266. 08b9b9e043bb has fixed a similar inconsistency some time ago. Author: Bertrand Drouvot Reviewed-by: Álvaro Herrera Discussion: https://postgr.es/m/aHdxN0D0hKXzHFQG@ip-10-97-1-34.eu-west-3.compute.internal Backpatch-through: 17 --- src/backend/storage/lmgr/lwlock.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c index 46f44bc45113f..2d43bf2cc1323 100644 --- a/src/backend/storage/lmgr/lwlock.c +++ b/src/backend/storage/lmgr/lwlock.c @@ -170,8 +170,8 @@ static const char *const BuiltinTrancheNames[] = { [LWTRANCHE_DSM_REGISTRY_DSA] = "DSMRegistryDSA", [LWTRANCHE_DSM_REGISTRY_HASH] = "DSMRegistryHash", [LWTRANCHE_COMMITTS_SLRU] = "CommitTsSLRU", - [LWTRANCHE_MULTIXACTOFFSET_SLRU] = "MultixactOffsetSLRU", - [LWTRANCHE_MULTIXACTMEMBER_SLRU] = "MultixactMemberSLRU", + [LWTRANCHE_MULTIXACTOFFSET_SLRU] = "MultiXactOffsetSLRU", + [LWTRANCHE_MULTIXACTMEMBER_SLRU] = "MultiXactMemberSLRU", [LWTRANCHE_NOTIFY_SLRU] = "NotifySLRU", [LWTRANCHE_SERIAL_SLRU] = "SerialSLRU", [LWTRANCHE_SUBTRANS_SLRU] = "SubtransSLRU", From 74a3fc36f3141677a94d1f6fbfaee4cb3896a35a Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Thu, 17 Jul 2025 14:08:55 +0900 Subject: [PATCH 325/602] Split regression tests for TOAST compression methods into two files The regression tests for TOAST compression methods are split into two independent files: one specific to LZ4 and interactions between two different TOAST compression methods, now called compression_lz4, and a second one for the "core" cases where only pglz is required. This saves 300 lines in diffs coming from the alternate output of compression.sql, required for builds where lz4 is not available. The new test is skipped if the build does not support LZ4 compression, relying on an \if and the values reported in pg_settings for the GUC default_toast_compression, "lz4" being available only under USE_LZ4. Another benefit of this split is that this facilitates the addition of more compression methods for TOAST, which are under discussion. Note the trick added for the tests of the GUC default_toast_compression, where VERBOSITY = terse is used to avoid the HINT printing the lists of values available in the GUC, which are environment-dependent. This makes compression.sql independent of the availability of LZ4. The code coverage of toast_compression.c is slightly improved, increased from 89% to 91%, with one new case covered in lz4_compress_datum() for incompressible data. Author: Nikhil Kumar Veldanda Co-authored-by: Michael Paquier Discussion: https://postgr.es/m/aDlcU-ym9KfMj9sG@paquier.xyz --- src/test/regress/expected/compression.out | 235 +----------- src/test/regress/expected/compression_1.out | 360 ------------------ src/test/regress/expected/compression_lz4.out | 249 ++++++++++++ .../regress/expected/compression_lz4_1.out | 7 + src/test/regress/parallel_schedule | 2 +- src/test/regress/sql/compression.sql | 84 +--- src/test/regress/sql/compression_lz4.sql | 129 +++++++ 7 files changed, 414 insertions(+), 652 deletions(-) delete mode 100644 src/test/regress/expected/compression_1.out create mode 100644 src/test/regress/expected/compression_lz4.out create mode 100644 src/test/regress/expected/compression_lz4_1.out create mode 100644 src/test/regress/sql/compression_lz4.sql diff --git a/src/test/regress/expected/compression.out b/src/test/regress/expected/compression.out index 4dd9ee7200d18..09f198149aa4f 100644 --- a/src/test/regress/expected/compression.out +++ b/src/test/regress/expected/compression.out @@ -1,3 +1,7 @@ +-- Default set of tests for TOAST compression, independent on compression +-- methods supported by the build. +CREATE SCHEMA pglz; +SET search_path TO pglz, public; \set HIDE_TOAST_COMPRESSION false -- ensure we get stable results regardless of installation's default SET default_toast_compression = 'pglz'; @@ -6,21 +10,13 @@ CREATE TABLE cmdata(f1 text COMPRESSION pglz); CREATE INDEX idx ON cmdata(f1); INSERT INTO cmdata VALUES(repeat('1234567890', 1000)); \d+ cmdata - Table "public.cmdata" + Table "pglz.cmdata" Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description --------+------+-----------+----------+---------+----------+-------------+--------------+------------- f1 | text | | | | extended | pglz | | Indexes: "idx" btree (f1) -CREATE TABLE cmdata1(f1 TEXT COMPRESSION lz4); -INSERT INTO cmdata1 VALUES(repeat('1234567890', 1004)); -\d+ cmdata1 - Table "public.cmdata1" - Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description ---------+------+-----------+----------+---------+----------+-------------+--------------+------------- - f1 | text | | | | extended | lz4 | | - -- verify stored compression method in the data SELECT pg_column_compression(f1) FROM cmdata; pg_column_compression @@ -28,12 +24,6 @@ SELECT pg_column_compression(f1) FROM cmdata; pglz (1 row) -SELECT pg_column_compression(f1) FROM cmdata1; - pg_column_compression ------------------------ - lz4 -(1 row) - -- decompress data slice SELECT SUBSTR(f1, 200, 5) FROM cmdata; substr @@ -41,16 +31,10 @@ SELECT SUBSTR(f1, 200, 5) FROM cmdata; 01234 (1 row) -SELECT SUBSTR(f1, 2000, 50) FROM cmdata1; - substr ----------------------------------------------------- - 01234567890123456789012345678901234567890123456789 -(1 row) - -- copy with table creation SELECT * INTO cmmove1 FROM cmdata; \d+ cmmove1 - Table "public.cmmove1" + Table "pglz.cmmove1" Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description --------+------+-----------+----------+---------+----------+-------------+--------------+------------- f1 | text | | | | extended | | | @@ -61,45 +45,9 @@ SELECT pg_column_compression(f1) FROM cmmove1; pglz (1 row) --- copy to existing table -CREATE TABLE cmmove3(f1 text COMPRESSION pglz); -INSERT INTO cmmove3 SELECT * FROM cmdata; -INSERT INTO cmmove3 SELECT * FROM cmdata1; -SELECT pg_column_compression(f1) FROM cmmove3; - pg_column_compression ------------------------ - pglz - lz4 -(2 rows) - --- test LIKE INCLUDING COMPRESSION -CREATE TABLE cmdata2 (LIKE cmdata1 INCLUDING COMPRESSION); -\d+ cmdata2 - Table "public.cmdata2" - Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description ---------+------+-----------+----------+---------+----------+-------------+--------------+------------- - f1 | text | | | | extended | lz4 | | - -DROP TABLE cmdata2; -- try setting compression for incompressible data type CREATE TABLE cmdata2 (f1 int COMPRESSION pglz); ERROR: column data type integer does not support compression --- update using datum from different table -CREATE TABLE cmmove2(f1 text COMPRESSION pglz); -INSERT INTO cmmove2 VALUES (repeat('1234567890', 1004)); -SELECT pg_column_compression(f1) FROM cmmove2; - pg_column_compression ------------------------ - pglz -(1 row) - -UPDATE cmmove2 SET f1 = cmdata1.f1 FROM cmdata1; -SELECT pg_column_compression(f1) FROM cmmove2; - pg_column_compression ------------------------ - lz4 -(1 row) - -- test externally stored compressed data CREATE OR REPLACE FUNCTION large_val() RETURNS TEXT LANGUAGE SQL AS 'select array_agg(fipshash(g::text))::text from generate_series(1, 256) g'; @@ -111,21 +59,6 @@ SELECT pg_column_compression(f1) FROM cmdata2; pglz (1 row) -INSERT INTO cmdata1 SELECT large_val() || repeat('a', 4000); -SELECT pg_column_compression(f1) FROM cmdata1; - pg_column_compression ------------------------ - lz4 - lz4 -(2 rows) - -SELECT SUBSTR(f1, 200, 5) FROM cmdata1; - substr --------- - 01234 - 79026 -(2 rows) - SELECT SUBSTR(f1, 200, 5) FROM cmdata2; substr -------- @@ -136,21 +69,21 @@ DROP TABLE cmdata2; --test column type update varlena/non-varlena CREATE TABLE cmdata2 (f1 int); \d+ cmdata2 - Table "public.cmdata2" + Table "pglz.cmdata2" Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description --------+---------+-----------+----------+---------+---------+-------------+--------------+------------- f1 | integer | | | | plain | | | ALTER TABLE cmdata2 ALTER COLUMN f1 TYPE varchar; \d+ cmdata2 - Table "public.cmdata2" + Table "pglz.cmdata2" Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description --------+-------------------+-----------+----------+---------+----------+-------------+--------------+------------- f1 | character varying | | | | extended | | | ALTER TABLE cmdata2 ALTER COLUMN f1 TYPE int USING f1::integer; \d+ cmdata2 - Table "public.cmdata2" + Table "pglz.cmdata2" Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description --------+---------+-----------+----------+---------+---------+-------------+--------------+------------- f1 | integer | | | | plain | | | @@ -160,14 +93,14 @@ ALTER TABLE cmdata2 ALTER COLUMN f1 TYPE int USING f1::integer; ALTER TABLE cmdata2 ALTER COLUMN f1 TYPE varchar; ALTER TABLE cmdata2 ALTER COLUMN f1 SET COMPRESSION pglz; \d+ cmdata2 - Table "public.cmdata2" + Table "pglz.cmdata2" Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description --------+-------------------+-----------+----------+---------+----------+-------------+--------------+------------- f1 | character varying | | | | extended | pglz | | ALTER TABLE cmdata2 ALTER COLUMN f1 SET STORAGE plain; \d+ cmdata2 - Table "public.cmdata2" + Table "pglz.cmdata2" Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description --------+-------------------+-----------+----------+---------+---------+-------------+--------------+------------- f1 | character varying | | | | plain | pglz | | @@ -179,164 +112,47 @@ SELECT pg_column_compression(f1) FROM cmdata2; (1 row) --- test compression with materialized view -CREATE MATERIALIZED VIEW compressmv(x) AS SELECT * FROM cmdata1; -\d+ compressmv - Materialized view "public.compressmv" - Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description ---------+------+-----------+----------+---------+----------+-------------+--------------+------------- - x | text | | | | extended | | | -View definition: - SELECT f1 AS x - FROM cmdata1; - -SELECT pg_column_compression(f1) FROM cmdata1; - pg_column_compression ------------------------ - lz4 - lz4 -(2 rows) - -SELECT pg_column_compression(x) FROM compressmv; - pg_column_compression ------------------------ - lz4 - lz4 -(2 rows) - --- test compression with partition -CREATE TABLE cmpart(f1 text COMPRESSION lz4) PARTITION BY HASH(f1); -CREATE TABLE cmpart1 PARTITION OF cmpart FOR VALUES WITH (MODULUS 2, REMAINDER 0); -CREATE TABLE cmpart2(f1 text COMPRESSION pglz); -ALTER TABLE cmpart ATTACH PARTITION cmpart2 FOR VALUES WITH (MODULUS 2, REMAINDER 1); -INSERT INTO cmpart VALUES (repeat('123456789', 1004)); -INSERT INTO cmpart VALUES (repeat('123456789', 4004)); -SELECT pg_column_compression(f1) FROM cmpart1; - pg_column_compression ------------------------ - lz4 -(1 row) - -SELECT pg_column_compression(f1) FROM cmpart2; - pg_column_compression ------------------------ - pglz -(1 row) - -- test compression with inheritance -CREATE TABLE cminh() INHERITS(cmdata, cmdata1); -- error -NOTICE: merging multiple inherited definitions of column "f1" -ERROR: column "f1" has a compression method conflict -DETAIL: pglz versus lz4 -CREATE TABLE cminh(f1 TEXT COMPRESSION lz4) INHERITS(cmdata); -- error -NOTICE: merging column "f1" with inherited definition -ERROR: column "f1" has a compression method conflict -DETAIL: pglz versus lz4 CREATE TABLE cmdata3(f1 text); CREATE TABLE cminh() INHERITS (cmdata, cmdata3); NOTICE: merging multiple inherited definitions of column "f1" -- test default_toast_compression GUC +-- suppress machine-dependent details +\set VERBOSITY terse SET default_toast_compression = ''; ERROR: invalid value for parameter "default_toast_compression": "" -HINT: Available values: pglz, lz4. SET default_toast_compression = 'I do not exist compression'; ERROR: invalid value for parameter "default_toast_compression": "I do not exist compression" -HINT: Available values: pglz, lz4. -SET default_toast_compression = 'lz4'; SET default_toast_compression = 'pglz'; --- test alter compression method -ALTER TABLE cmdata ALTER COLUMN f1 SET COMPRESSION lz4; -INSERT INTO cmdata VALUES (repeat('123456789', 4004)); -\d+ cmdata - Table "public.cmdata" - Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description ---------+------+-----------+----------+---------+----------+-------------+--------------+------------- - f1 | text | | | | extended | lz4 | | -Indexes: - "idx" btree (f1) -Child tables: cminh - -SELECT pg_column_compression(f1) FROM cmdata; - pg_column_compression ------------------------ - pglz - lz4 -(2 rows) - +\set VERBOSITY default ALTER TABLE cmdata2 ALTER COLUMN f1 SET COMPRESSION default; \d+ cmdata2 - Table "public.cmdata2" + Table "pglz.cmdata2" Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description --------+-------------------+-----------+----------+---------+---------+-------------+--------------+------------- f1 | character varying | | | | plain | | | --- test alter compression method for materialized views -ALTER MATERIALIZED VIEW compressmv ALTER COLUMN x SET COMPRESSION lz4; -\d+ compressmv - Materialized view "public.compressmv" - Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description ---------+------+-----------+----------+---------+----------+-------------+--------------+------------- - x | text | | | | extended | lz4 | | -View definition: - SELECT f1 AS x - FROM cmdata1; - --- test alter compression method for partitioned tables -ALTER TABLE cmpart1 ALTER COLUMN f1 SET COMPRESSION pglz; -ALTER TABLE cmpart2 ALTER COLUMN f1 SET COMPRESSION lz4; --- new data should be compressed with the current compression method -INSERT INTO cmpart VALUES (repeat('123456789', 1004)); -INSERT INTO cmpart VALUES (repeat('123456789', 4004)); -SELECT pg_column_compression(f1) FROM cmpart1; - pg_column_compression ------------------------ - lz4 - pglz -(2 rows) - -SELECT pg_column_compression(f1) FROM cmpart2; - pg_column_compression ------------------------ - pglz - lz4 -(2 rows) - +DROP TABLE cmdata2; -- VACUUM FULL does not recompress SELECT pg_column_compression(f1) FROM cmdata; pg_column_compression ----------------------- pglz - lz4 -(2 rows) +(1 row) VACUUM FULL cmdata; SELECT pg_column_compression(f1) FROM cmdata; pg_column_compression ----------------------- pglz - lz4 -(2 rows) +(1 row) --- test expression index -DROP TABLE cmdata2; -CREATE TABLE cmdata2 (f1 TEXT COMPRESSION pglz, f2 TEXT COMPRESSION lz4); -CREATE UNIQUE INDEX idx1 ON cmdata2 ((f1 || f2)); -INSERT INTO cmdata2 VALUES((SELECT array_agg(fipshash(g::TEXT))::TEXT FROM -generate_series(1, 50) g), VERSION()); -- check data is ok SELECT length(f1) FROM cmdata; length -------- 10000 - 36036 -(2 rows) - -SELECT length(f1) FROM cmdata1; - length --------- - 10040 - 12449 -(2 rows) +(1 row) SELECT length(f1) FROM cmmove1; length @@ -344,19 +160,6 @@ SELECT length(f1) FROM cmmove1; 10000 (1 row) -SELECT length(f1) FROM cmmove2; - length --------- - 10040 -(1 row) - -SELECT length(f1) FROM cmmove3; - length --------- - 10000 - 10040 -(2 rows) - CREATE TABLE badcompresstbl (a text COMPRESSION I_Do_Not_Exist_Compression); -- fails ERROR: invalid compression method "i_do_not_exist_compression" CREATE TABLE badcompresstbl (a text); diff --git a/src/test/regress/expected/compression_1.out b/src/test/regress/expected/compression_1.out deleted file mode 100644 index 7bd7642b4b94f..0000000000000 --- a/src/test/regress/expected/compression_1.out +++ /dev/null @@ -1,360 +0,0 @@ -\set HIDE_TOAST_COMPRESSION false --- ensure we get stable results regardless of installation's default -SET default_toast_compression = 'pglz'; --- test creating table with compression method -CREATE TABLE cmdata(f1 text COMPRESSION pglz); -CREATE INDEX idx ON cmdata(f1); -INSERT INTO cmdata VALUES(repeat('1234567890', 1000)); -\d+ cmdata - Table "public.cmdata" - Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description ---------+------+-----------+----------+---------+----------+-------------+--------------+------------- - f1 | text | | | | extended | pglz | | -Indexes: - "idx" btree (f1) - -CREATE TABLE cmdata1(f1 TEXT COMPRESSION lz4); -ERROR: compression method lz4 not supported -DETAIL: This functionality requires the server to be built with lz4 support. -INSERT INTO cmdata1 VALUES(repeat('1234567890', 1004)); -ERROR: relation "cmdata1" does not exist -LINE 1: INSERT INTO cmdata1 VALUES(repeat('1234567890', 1004)); - ^ -\d+ cmdata1 --- verify stored compression method in the data -SELECT pg_column_compression(f1) FROM cmdata; - pg_column_compression ------------------------ - pglz -(1 row) - -SELECT pg_column_compression(f1) FROM cmdata1; -ERROR: relation "cmdata1" does not exist -LINE 1: SELECT pg_column_compression(f1) FROM cmdata1; - ^ --- decompress data slice -SELECT SUBSTR(f1, 200, 5) FROM cmdata; - substr --------- - 01234 -(1 row) - -SELECT SUBSTR(f1, 2000, 50) FROM cmdata1; -ERROR: relation "cmdata1" does not exist -LINE 1: SELECT SUBSTR(f1, 2000, 50) FROM cmdata1; - ^ --- copy with table creation -SELECT * INTO cmmove1 FROM cmdata; -\d+ cmmove1 - Table "public.cmmove1" - Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description ---------+------+-----------+----------+---------+----------+-------------+--------------+------------- - f1 | text | | | | extended | | | - -SELECT pg_column_compression(f1) FROM cmmove1; - pg_column_compression ------------------------ - pglz -(1 row) - --- copy to existing table -CREATE TABLE cmmove3(f1 text COMPRESSION pglz); -INSERT INTO cmmove3 SELECT * FROM cmdata; -INSERT INTO cmmove3 SELECT * FROM cmdata1; -ERROR: relation "cmdata1" does not exist -LINE 1: INSERT INTO cmmove3 SELECT * FROM cmdata1; - ^ -SELECT pg_column_compression(f1) FROM cmmove3; - pg_column_compression ------------------------ - pglz -(1 row) - --- test LIKE INCLUDING COMPRESSION -CREATE TABLE cmdata2 (LIKE cmdata1 INCLUDING COMPRESSION); -ERROR: relation "cmdata1" does not exist -LINE 1: CREATE TABLE cmdata2 (LIKE cmdata1 INCLUDING COMPRESSION); - ^ -\d+ cmdata2 -DROP TABLE cmdata2; -ERROR: table "cmdata2" does not exist --- try setting compression for incompressible data type -CREATE TABLE cmdata2 (f1 int COMPRESSION pglz); -ERROR: column data type integer does not support compression --- update using datum from different table -CREATE TABLE cmmove2(f1 text COMPRESSION pglz); -INSERT INTO cmmove2 VALUES (repeat('1234567890', 1004)); -SELECT pg_column_compression(f1) FROM cmmove2; - pg_column_compression ------------------------ - pglz -(1 row) - -UPDATE cmmove2 SET f1 = cmdata1.f1 FROM cmdata1; -ERROR: relation "cmdata1" does not exist -LINE 1: UPDATE cmmove2 SET f1 = cmdata1.f1 FROM cmdata1; - ^ -SELECT pg_column_compression(f1) FROM cmmove2; - pg_column_compression ------------------------ - pglz -(1 row) - --- test externally stored compressed data -CREATE OR REPLACE FUNCTION large_val() RETURNS TEXT LANGUAGE SQL AS -'select array_agg(fipshash(g::text))::text from generate_series(1, 256) g'; -CREATE TABLE cmdata2 (f1 text COMPRESSION pglz); -INSERT INTO cmdata2 SELECT large_val() || repeat('a', 4000); -SELECT pg_column_compression(f1) FROM cmdata2; - pg_column_compression ------------------------ - pglz -(1 row) - -INSERT INTO cmdata1 SELECT large_val() || repeat('a', 4000); -ERROR: relation "cmdata1" does not exist -LINE 1: INSERT INTO cmdata1 SELECT large_val() || repeat('a', 4000); - ^ -SELECT pg_column_compression(f1) FROM cmdata1; -ERROR: relation "cmdata1" does not exist -LINE 1: SELECT pg_column_compression(f1) FROM cmdata1; - ^ -SELECT SUBSTR(f1, 200, 5) FROM cmdata1; -ERROR: relation "cmdata1" does not exist -LINE 1: SELECT SUBSTR(f1, 200, 5) FROM cmdata1; - ^ -SELECT SUBSTR(f1, 200, 5) FROM cmdata2; - substr --------- - 79026 -(1 row) - -DROP TABLE cmdata2; ---test column type update varlena/non-varlena -CREATE TABLE cmdata2 (f1 int); -\d+ cmdata2 - Table "public.cmdata2" - Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description ---------+---------+-----------+----------+---------+---------+-------------+--------------+------------- - f1 | integer | | | | plain | | | - -ALTER TABLE cmdata2 ALTER COLUMN f1 TYPE varchar; -\d+ cmdata2 - Table "public.cmdata2" - Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description ---------+-------------------+-----------+----------+---------+----------+-------------+--------------+------------- - f1 | character varying | | | | extended | | | - -ALTER TABLE cmdata2 ALTER COLUMN f1 TYPE int USING f1::integer; -\d+ cmdata2 - Table "public.cmdata2" - Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description ---------+---------+-----------+----------+---------+---------+-------------+--------------+------------- - f1 | integer | | | | plain | | | - ---changing column storage should not impact the compression method ---but the data should not be compressed -ALTER TABLE cmdata2 ALTER COLUMN f1 TYPE varchar; -ALTER TABLE cmdata2 ALTER COLUMN f1 SET COMPRESSION pglz; -\d+ cmdata2 - Table "public.cmdata2" - Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description ---------+-------------------+-----------+----------+---------+----------+-------------+--------------+------------- - f1 | character varying | | | | extended | pglz | | - -ALTER TABLE cmdata2 ALTER COLUMN f1 SET STORAGE plain; -\d+ cmdata2 - Table "public.cmdata2" - Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description ---------+-------------------+-----------+----------+---------+---------+-------------+--------------+------------- - f1 | character varying | | | | plain | pglz | | - -INSERT INTO cmdata2 VALUES (repeat('123456789', 800)); -SELECT pg_column_compression(f1) FROM cmdata2; - pg_column_compression ------------------------ - -(1 row) - --- test compression with materialized view -CREATE MATERIALIZED VIEW compressmv(x) AS SELECT * FROM cmdata1; -ERROR: relation "cmdata1" does not exist -LINE 1: ...TE MATERIALIZED VIEW compressmv(x) AS SELECT * FROM cmdata1; - ^ -\d+ compressmv -SELECT pg_column_compression(f1) FROM cmdata1; -ERROR: relation "cmdata1" does not exist -LINE 1: SELECT pg_column_compression(f1) FROM cmdata1; - ^ -SELECT pg_column_compression(x) FROM compressmv; -ERROR: relation "compressmv" does not exist -LINE 1: SELECT pg_column_compression(x) FROM compressmv; - ^ --- test compression with partition -CREATE TABLE cmpart(f1 text COMPRESSION lz4) PARTITION BY HASH(f1); -ERROR: compression method lz4 not supported -DETAIL: This functionality requires the server to be built with lz4 support. -CREATE TABLE cmpart1 PARTITION OF cmpart FOR VALUES WITH (MODULUS 2, REMAINDER 0); -ERROR: relation "cmpart" does not exist -CREATE TABLE cmpart2(f1 text COMPRESSION pglz); -ALTER TABLE cmpart ATTACH PARTITION cmpart2 FOR VALUES WITH (MODULUS 2, REMAINDER 1); -ERROR: relation "cmpart" does not exist -INSERT INTO cmpart VALUES (repeat('123456789', 1004)); -ERROR: relation "cmpart" does not exist -LINE 1: INSERT INTO cmpart VALUES (repeat('123456789', 1004)); - ^ -INSERT INTO cmpart VALUES (repeat('123456789', 4004)); -ERROR: relation "cmpart" does not exist -LINE 1: INSERT INTO cmpart VALUES (repeat('123456789', 4004)); - ^ -SELECT pg_column_compression(f1) FROM cmpart1; -ERROR: relation "cmpart1" does not exist -LINE 1: SELECT pg_column_compression(f1) FROM cmpart1; - ^ -SELECT pg_column_compression(f1) FROM cmpart2; - pg_column_compression ------------------------ -(0 rows) - --- test compression with inheritance -CREATE TABLE cminh() INHERITS(cmdata, cmdata1); -- error -ERROR: relation "cmdata1" does not exist -CREATE TABLE cminh(f1 TEXT COMPRESSION lz4) INHERITS(cmdata); -- error -NOTICE: merging column "f1" with inherited definition -ERROR: column "f1" has a compression method conflict -DETAIL: pglz versus lz4 -CREATE TABLE cmdata3(f1 text); -CREATE TABLE cminh() INHERITS (cmdata, cmdata3); -NOTICE: merging multiple inherited definitions of column "f1" --- test default_toast_compression GUC -SET default_toast_compression = ''; -ERROR: invalid value for parameter "default_toast_compression": "" -HINT: Available values: pglz. -SET default_toast_compression = 'I do not exist compression'; -ERROR: invalid value for parameter "default_toast_compression": "I do not exist compression" -HINT: Available values: pglz. -SET default_toast_compression = 'lz4'; -ERROR: invalid value for parameter "default_toast_compression": "lz4" -HINT: Available values: pglz. -SET default_toast_compression = 'pglz'; --- test alter compression method -ALTER TABLE cmdata ALTER COLUMN f1 SET COMPRESSION lz4; -ERROR: compression method lz4 not supported -DETAIL: This functionality requires the server to be built with lz4 support. -INSERT INTO cmdata VALUES (repeat('123456789', 4004)); -\d+ cmdata - Table "public.cmdata" - Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description ---------+------+-----------+----------+---------+----------+-------------+--------------+------------- - f1 | text | | | | extended | pglz | | -Indexes: - "idx" btree (f1) -Child tables: cminh - -SELECT pg_column_compression(f1) FROM cmdata; - pg_column_compression ------------------------ - pglz - pglz -(2 rows) - -ALTER TABLE cmdata2 ALTER COLUMN f1 SET COMPRESSION default; -\d+ cmdata2 - Table "public.cmdata2" - Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description ---------+-------------------+-----------+----------+---------+---------+-------------+--------------+------------- - f1 | character varying | | | | plain | | | - --- test alter compression method for materialized views -ALTER MATERIALIZED VIEW compressmv ALTER COLUMN x SET COMPRESSION lz4; -ERROR: relation "compressmv" does not exist -\d+ compressmv --- test alter compression method for partitioned tables -ALTER TABLE cmpart1 ALTER COLUMN f1 SET COMPRESSION pglz; -ERROR: relation "cmpart1" does not exist -ALTER TABLE cmpart2 ALTER COLUMN f1 SET COMPRESSION lz4; -ERROR: compression method lz4 not supported -DETAIL: This functionality requires the server to be built with lz4 support. --- new data should be compressed with the current compression method -INSERT INTO cmpart VALUES (repeat('123456789', 1004)); -ERROR: relation "cmpart" does not exist -LINE 1: INSERT INTO cmpart VALUES (repeat('123456789', 1004)); - ^ -INSERT INTO cmpart VALUES (repeat('123456789', 4004)); -ERROR: relation "cmpart" does not exist -LINE 1: INSERT INTO cmpart VALUES (repeat('123456789', 4004)); - ^ -SELECT pg_column_compression(f1) FROM cmpart1; -ERROR: relation "cmpart1" does not exist -LINE 1: SELECT pg_column_compression(f1) FROM cmpart1; - ^ -SELECT pg_column_compression(f1) FROM cmpart2; - pg_column_compression ------------------------ -(0 rows) - --- VACUUM FULL does not recompress -SELECT pg_column_compression(f1) FROM cmdata; - pg_column_compression ------------------------ - pglz - pglz -(2 rows) - -VACUUM FULL cmdata; -SELECT pg_column_compression(f1) FROM cmdata; - pg_column_compression ------------------------ - pglz - pglz -(2 rows) - --- test expression index -DROP TABLE cmdata2; -CREATE TABLE cmdata2 (f1 TEXT COMPRESSION pglz, f2 TEXT COMPRESSION lz4); -ERROR: compression method lz4 not supported -DETAIL: This functionality requires the server to be built with lz4 support. -CREATE UNIQUE INDEX idx1 ON cmdata2 ((f1 || f2)); -ERROR: relation "cmdata2" does not exist -INSERT INTO cmdata2 VALUES((SELECT array_agg(fipshash(g::TEXT))::TEXT FROM -generate_series(1, 50) g), VERSION()); -ERROR: relation "cmdata2" does not exist -LINE 1: INSERT INTO cmdata2 VALUES((SELECT array_agg(fipshash(g::TEX... - ^ --- check data is ok -SELECT length(f1) FROM cmdata; - length --------- - 10000 - 36036 -(2 rows) - -SELECT length(f1) FROM cmdata1; -ERROR: relation "cmdata1" does not exist -LINE 1: SELECT length(f1) FROM cmdata1; - ^ -SELECT length(f1) FROM cmmove1; - length --------- - 10000 -(1 row) - -SELECT length(f1) FROM cmmove2; - length --------- - 10040 -(1 row) - -SELECT length(f1) FROM cmmove3; - length --------- - 10000 -(1 row) - -CREATE TABLE badcompresstbl (a text COMPRESSION I_Do_Not_Exist_Compression); -- fails -ERROR: invalid compression method "i_do_not_exist_compression" -CREATE TABLE badcompresstbl (a text); -ALTER TABLE badcompresstbl ALTER a SET COMPRESSION I_Do_Not_Exist_Compression; -- fails -ERROR: invalid compression method "i_do_not_exist_compression" -DROP TABLE badcompresstbl; -\set HIDE_TOAST_COMPRESSION true diff --git a/src/test/regress/expected/compression_lz4.out b/src/test/regress/expected/compression_lz4.out new file mode 100644 index 0000000000000..068dd7c367446 --- /dev/null +++ b/src/test/regress/expected/compression_lz4.out @@ -0,0 +1,249 @@ +-- Tests for TOAST compression with lz4 +SELECT NOT(enumvals @> '{lz4}') AS skip_test FROM pg_settings WHERE + name = 'default_toast_compression' \gset +\if :skip_test + \echo '*** skipping TOAST tests with lz4 (not supported) ***' + \quit +\endif +CREATE SCHEMA lz4; +SET search_path TO lz4, public; +\set HIDE_TOAST_COMPRESSION false +-- Ensure we get stable results regardless of the installation's default. +-- We rely on this GUC value for a few tests. +SET default_toast_compression = 'pglz'; +-- test creating table with compression method +CREATE TABLE cmdata_pglz(f1 text COMPRESSION pglz); +CREATE INDEX idx ON cmdata_pglz(f1); +INSERT INTO cmdata_pglz VALUES(repeat('1234567890', 1000)); +\d+ cmdata +CREATE TABLE cmdata_lz4(f1 TEXT COMPRESSION lz4); +INSERT INTO cmdata_lz4 VALUES(repeat('1234567890', 1004)); +\d+ cmdata1 +-- verify stored compression method in the data +SELECT pg_column_compression(f1) FROM cmdata_lz4; + pg_column_compression +----------------------- + lz4 +(1 row) + +-- decompress data slice +SELECT SUBSTR(f1, 200, 5) FROM cmdata_pglz; + substr +-------- + 01234 +(1 row) + +SELECT SUBSTR(f1, 2000, 50) FROM cmdata_lz4; + substr +---------------------------------------------------- + 01234567890123456789012345678901234567890123456789 +(1 row) + +-- copy with table creation +SELECT * INTO cmmove1 FROM cmdata_lz4; +\d+ cmmove1 + Table "lz4.cmmove1" + Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description +--------+------+-----------+----------+---------+----------+-------------+--------------+------------- + f1 | text | | | | extended | | | + +SELECT pg_column_compression(f1) FROM cmmove1; + pg_column_compression +----------------------- + lz4 +(1 row) + +-- test LIKE INCLUDING COMPRESSION. The GUC default_toast_compression +-- has no effect, the compression method from the table being copied. +CREATE TABLE cmdata2 (LIKE cmdata_lz4 INCLUDING COMPRESSION); +\d+ cmdata2 + Table "lz4.cmdata2" + Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description +--------+------+-----------+----------+---------+----------+-------------+--------------+------------- + f1 | text | | | | extended | lz4 | | + +DROP TABLE cmdata2; +-- copy to existing table +CREATE TABLE cmmove3(f1 text COMPRESSION pglz); +INSERT INTO cmmove3 SELECT * FROM cmdata_pglz; +INSERT INTO cmmove3 SELECT * FROM cmdata_lz4; +SELECT pg_column_compression(f1) FROM cmmove3; + pg_column_compression +----------------------- + pglz + lz4 +(2 rows) + +-- update using datum from different table with LZ4 data. +CREATE TABLE cmmove2(f1 text COMPRESSION pglz); +INSERT INTO cmmove2 VALUES (repeat('1234567890', 1004)); +SELECT pg_column_compression(f1) FROM cmmove2; + pg_column_compression +----------------------- + pglz +(1 row) + +UPDATE cmmove2 SET f1 = cmdata_lz4.f1 FROM cmdata_lz4; +SELECT pg_column_compression(f1) FROM cmmove2; + pg_column_compression +----------------------- + lz4 +(1 row) + +-- test externally stored compressed data +CREATE OR REPLACE FUNCTION large_val_lz4() RETURNS TEXT LANGUAGE SQL AS +'select array_agg(fipshash(g::text))::text from generate_series(1, 256) g'; +CREATE TABLE cmdata2 (f1 text COMPRESSION lz4); +INSERT INTO cmdata2 SELECT large_val_lz4() || repeat('a', 4000); +SELECT pg_column_compression(f1) FROM cmdata2; + pg_column_compression +----------------------- + lz4 +(1 row) + +SELECT SUBSTR(f1, 200, 5) FROM cmdata2; + substr +-------- + 79026 +(1 row) + +DROP TABLE cmdata2; +DROP FUNCTION large_val_lz4; +-- test compression with materialized view +CREATE MATERIALIZED VIEW compressmv(x) AS SELECT * FROM cmdata_lz4; +\d+ compressmv + Materialized view "lz4.compressmv" + Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description +--------+------+-----------+----------+---------+----------+-------------+--------------+------------- + x | text | | | | extended | | | +View definition: + SELECT f1 AS x + FROM cmdata_lz4; + +SELECT pg_column_compression(f1) FROM cmdata_lz4; + pg_column_compression +----------------------- + lz4 +(1 row) + +SELECT pg_column_compression(x) FROM compressmv; + pg_column_compression +----------------------- + lz4 +(1 row) + +-- test compression with partition +CREATE TABLE cmpart(f1 text COMPRESSION lz4) PARTITION BY HASH(f1); +CREATE TABLE cmpart1 PARTITION OF cmpart FOR VALUES WITH (MODULUS 2, REMAINDER 0); +CREATE TABLE cmpart2(f1 text COMPRESSION pglz); +ALTER TABLE cmpart ATTACH PARTITION cmpart2 FOR VALUES WITH (MODULUS 2, REMAINDER 1); +INSERT INTO cmpart VALUES (repeat('123456789', 1004)); +INSERT INTO cmpart VALUES (repeat('123456789', 4004)); +SELECT pg_column_compression(f1) FROM cmpart1; + pg_column_compression +----------------------- + lz4 +(1 row) + +SELECT pg_column_compression(f1) FROM cmpart2; + pg_column_compression +----------------------- + pglz +(1 row) + +-- test compression with inheritance +CREATE TABLE cminh() INHERITS(cmdata_pglz, cmdata_lz4); -- error +NOTICE: merging multiple inherited definitions of column "f1" +ERROR: column "f1" has a compression method conflict +DETAIL: pglz versus lz4 +CREATE TABLE cminh(f1 TEXT COMPRESSION lz4) INHERITS(cmdata_pglz); -- error +NOTICE: merging column "f1" with inherited definition +ERROR: column "f1" has a compression method conflict +DETAIL: pglz versus lz4 +CREATE TABLE cmdata3(f1 text); +CREATE TABLE cminh() INHERITS (cmdata_pglz, cmdata3); +NOTICE: merging multiple inherited definitions of column "f1" +-- test default_toast_compression GUC +SET default_toast_compression = 'lz4'; +-- test alter compression method +ALTER TABLE cmdata_pglz ALTER COLUMN f1 SET COMPRESSION lz4; +INSERT INTO cmdata_pglz VALUES (repeat('123456789', 4004)); +\d+ cmdata +SELECT pg_column_compression(f1) FROM cmdata_pglz; + pg_column_compression +----------------------- + pglz + lz4 +(2 rows) + +ALTER TABLE cmdata_pglz ALTER COLUMN f1 SET COMPRESSION pglz; +-- test alter compression method for materialized views +ALTER MATERIALIZED VIEW compressmv ALTER COLUMN x SET COMPRESSION lz4; +\d+ compressmv + Materialized view "lz4.compressmv" + Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description +--------+------+-----------+----------+---------+----------+-------------+--------------+------------- + x | text | | | | extended | lz4 | | +View definition: + SELECT f1 AS x + FROM cmdata_lz4; + +-- test alter compression method for partitioned tables +ALTER TABLE cmpart1 ALTER COLUMN f1 SET COMPRESSION pglz; +ALTER TABLE cmpart2 ALTER COLUMN f1 SET COMPRESSION lz4; +-- new data should be compressed with the current compression method +INSERT INTO cmpart VALUES (repeat('123456789', 1004)); +INSERT INTO cmpart VALUES (repeat('123456789', 4004)); +SELECT pg_column_compression(f1) FROM cmpart1; + pg_column_compression +----------------------- + lz4 + pglz +(2 rows) + +SELECT pg_column_compression(f1) FROM cmpart2; + pg_column_compression +----------------------- + pglz + lz4 +(2 rows) + +-- test expression index +CREATE TABLE cmdata2 (f1 TEXT COMPRESSION pglz, f2 TEXT COMPRESSION lz4); +CREATE UNIQUE INDEX idx1 ON cmdata2 ((f1 || f2)); +INSERT INTO cmdata2 VALUES((SELECT array_agg(fipshash(g::TEXT))::TEXT FROM +generate_series(1, 50) g), VERSION()); +-- check data is ok +SELECT length(f1) FROM cmdata_pglz; + length +-------- + 10000 + 36036 +(2 rows) + +SELECT length(f1) FROM cmdata_lz4; + length +-------- + 10040 +(1 row) + +SELECT length(f1) FROM cmmove1; + length +-------- + 10040 +(1 row) + +SELECT length(f1) FROM cmmove2; + length +-------- + 10040 +(1 row) + +SELECT length(f1) FROM cmmove3; + length +-------- + 10000 + 10040 +(2 rows) + +\set HIDE_TOAST_COMPRESSION true diff --git a/src/test/regress/expected/compression_lz4_1.out b/src/test/regress/expected/compression_lz4_1.out new file mode 100644 index 0000000000000..198056fa22498 --- /dev/null +++ b/src/test/regress/expected/compression_lz4_1.out @@ -0,0 +1,7 @@ +-- Tests for TOAST compression with lz4 +SELECT NOT(enumvals @> '{lz4}') AS skip_test FROM pg_settings WHERE + name = 'default_toast_compression' \gset +\if :skip_test + \echo '*** skipping TOAST tests with lz4 (not supported) ***' +*** skipping TOAST tests with lz4 (not supported) *** + \quit diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule index a424be2a6bf0f..fbffc67ae6013 100644 --- a/src/test/regress/parallel_schedule +++ b/src/test/regress/parallel_schedule @@ -123,7 +123,7 @@ test: plancache limit plpgsql copy2 temp domain rangefuncs prepare conversion tr # The stats test resets stats, so nothing else needing stats access can be in # this group. # ---------- -test: partition_join partition_prune reloptions hash_part indexing partition_aggregate partition_info tuplesort explain compression memoize stats predicate numa +test: partition_join partition_prune reloptions hash_part indexing partition_aggregate partition_info tuplesort explain compression compression_lz4 memoize stats predicate numa # event_trigger depends on create_am and cannot run concurrently with # any test that runs DDL diff --git a/src/test/regress/sql/compression.sql b/src/test/regress/sql/compression.sql index 490595fcfb263..ce5ea37a660ce 100644 --- a/src/test/regress/sql/compression.sql +++ b/src/test/regress/sql/compression.sql @@ -1,3 +1,8 @@ +-- Default set of tests for TOAST compression, independent on compression +-- methods supported by the build. + +CREATE SCHEMA pglz; +SET search_path TO pglz, public; \set HIDE_TOAST_COMPRESSION false -- ensure we get stable results regardless of installation's default @@ -8,53 +13,27 @@ CREATE TABLE cmdata(f1 text COMPRESSION pglz); CREATE INDEX idx ON cmdata(f1); INSERT INTO cmdata VALUES(repeat('1234567890', 1000)); \d+ cmdata -CREATE TABLE cmdata1(f1 TEXT COMPRESSION lz4); -INSERT INTO cmdata1 VALUES(repeat('1234567890', 1004)); -\d+ cmdata1 -- verify stored compression method in the data SELECT pg_column_compression(f1) FROM cmdata; -SELECT pg_column_compression(f1) FROM cmdata1; -- decompress data slice SELECT SUBSTR(f1, 200, 5) FROM cmdata; -SELECT SUBSTR(f1, 2000, 50) FROM cmdata1; -- copy with table creation SELECT * INTO cmmove1 FROM cmdata; \d+ cmmove1 SELECT pg_column_compression(f1) FROM cmmove1; --- copy to existing table -CREATE TABLE cmmove3(f1 text COMPRESSION pglz); -INSERT INTO cmmove3 SELECT * FROM cmdata; -INSERT INTO cmmove3 SELECT * FROM cmdata1; -SELECT pg_column_compression(f1) FROM cmmove3; - --- test LIKE INCLUDING COMPRESSION -CREATE TABLE cmdata2 (LIKE cmdata1 INCLUDING COMPRESSION); -\d+ cmdata2 -DROP TABLE cmdata2; - -- try setting compression for incompressible data type CREATE TABLE cmdata2 (f1 int COMPRESSION pglz); --- update using datum from different table -CREATE TABLE cmmove2(f1 text COMPRESSION pglz); -INSERT INTO cmmove2 VALUES (repeat('1234567890', 1004)); -SELECT pg_column_compression(f1) FROM cmmove2; -UPDATE cmmove2 SET f1 = cmdata1.f1 FROM cmdata1; -SELECT pg_column_compression(f1) FROM cmmove2; - -- test externally stored compressed data CREATE OR REPLACE FUNCTION large_val() RETURNS TEXT LANGUAGE SQL AS 'select array_agg(fipshash(g::text))::text from generate_series(1, 256) g'; CREATE TABLE cmdata2 (f1 text COMPRESSION pglz); INSERT INTO cmdata2 SELECT large_val() || repeat('a', 4000); SELECT pg_column_compression(f1) FROM cmdata2; -INSERT INTO cmdata1 SELECT large_val() || repeat('a', 4000); -SELECT pg_column_compression(f1) FROM cmdata1; -SELECT SUBSTR(f1, 200, 5) FROM cmdata1; SELECT SUBSTR(f1, 200, 5) FROM cmdata2; DROP TABLE cmdata2; @@ -76,76 +55,31 @@ ALTER TABLE cmdata2 ALTER COLUMN f1 SET STORAGE plain; INSERT INTO cmdata2 VALUES (repeat('123456789', 800)); SELECT pg_column_compression(f1) FROM cmdata2; --- test compression with materialized view -CREATE MATERIALIZED VIEW compressmv(x) AS SELECT * FROM cmdata1; -\d+ compressmv -SELECT pg_column_compression(f1) FROM cmdata1; -SELECT pg_column_compression(x) FROM compressmv; - --- test compression with partition -CREATE TABLE cmpart(f1 text COMPRESSION lz4) PARTITION BY HASH(f1); -CREATE TABLE cmpart1 PARTITION OF cmpart FOR VALUES WITH (MODULUS 2, REMAINDER 0); -CREATE TABLE cmpart2(f1 text COMPRESSION pglz); - -ALTER TABLE cmpart ATTACH PARTITION cmpart2 FOR VALUES WITH (MODULUS 2, REMAINDER 1); -INSERT INTO cmpart VALUES (repeat('123456789', 1004)); -INSERT INTO cmpart VALUES (repeat('123456789', 4004)); -SELECT pg_column_compression(f1) FROM cmpart1; -SELECT pg_column_compression(f1) FROM cmpart2; - -- test compression with inheritance -CREATE TABLE cminh() INHERITS(cmdata, cmdata1); -- error -CREATE TABLE cminh(f1 TEXT COMPRESSION lz4) INHERITS(cmdata); -- error CREATE TABLE cmdata3(f1 text); CREATE TABLE cminh() INHERITS (cmdata, cmdata3); -- test default_toast_compression GUC +-- suppress machine-dependent details +\set VERBOSITY terse SET default_toast_compression = ''; SET default_toast_compression = 'I do not exist compression'; -SET default_toast_compression = 'lz4'; SET default_toast_compression = 'pglz'; - --- test alter compression method -ALTER TABLE cmdata ALTER COLUMN f1 SET COMPRESSION lz4; -INSERT INTO cmdata VALUES (repeat('123456789', 4004)); -\d+ cmdata -SELECT pg_column_compression(f1) FROM cmdata; +\set VERBOSITY default ALTER TABLE cmdata2 ALTER COLUMN f1 SET COMPRESSION default; \d+ cmdata2 --- test alter compression method for materialized views -ALTER MATERIALIZED VIEW compressmv ALTER COLUMN x SET COMPRESSION lz4; -\d+ compressmv - --- test alter compression method for partitioned tables -ALTER TABLE cmpart1 ALTER COLUMN f1 SET COMPRESSION pglz; -ALTER TABLE cmpart2 ALTER COLUMN f1 SET COMPRESSION lz4; - --- new data should be compressed with the current compression method -INSERT INTO cmpart VALUES (repeat('123456789', 1004)); -INSERT INTO cmpart VALUES (repeat('123456789', 4004)); -SELECT pg_column_compression(f1) FROM cmpart1; -SELECT pg_column_compression(f1) FROM cmpart2; +DROP TABLE cmdata2; -- VACUUM FULL does not recompress SELECT pg_column_compression(f1) FROM cmdata; VACUUM FULL cmdata; SELECT pg_column_compression(f1) FROM cmdata; --- test expression index -DROP TABLE cmdata2; -CREATE TABLE cmdata2 (f1 TEXT COMPRESSION pglz, f2 TEXT COMPRESSION lz4); -CREATE UNIQUE INDEX idx1 ON cmdata2 ((f1 || f2)); -INSERT INTO cmdata2 VALUES((SELECT array_agg(fipshash(g::TEXT))::TEXT FROM -generate_series(1, 50) g), VERSION()); - -- check data is ok SELECT length(f1) FROM cmdata; -SELECT length(f1) FROM cmdata1; SELECT length(f1) FROM cmmove1; -SELECT length(f1) FROM cmmove2; -SELECT length(f1) FROM cmmove3; CREATE TABLE badcompresstbl (a text COMPRESSION I_Do_Not_Exist_Compression); -- fails CREATE TABLE badcompresstbl (a text); diff --git a/src/test/regress/sql/compression_lz4.sql b/src/test/regress/sql/compression_lz4.sql new file mode 100644 index 0000000000000..3849f8618dee4 --- /dev/null +++ b/src/test/regress/sql/compression_lz4.sql @@ -0,0 +1,129 @@ +-- Tests for TOAST compression with lz4 + +SELECT NOT(enumvals @> '{lz4}') AS skip_test FROM pg_settings WHERE + name = 'default_toast_compression' \gset +\if :skip_test + \echo '*** skipping TOAST tests with lz4 (not supported) ***' + \quit +\endif + +CREATE SCHEMA lz4; +SET search_path TO lz4, public; + +\set HIDE_TOAST_COMPRESSION false + +-- Ensure we get stable results regardless of the installation's default. +-- We rely on this GUC value for a few tests. +SET default_toast_compression = 'pglz'; + +-- test creating table with compression method +CREATE TABLE cmdata_pglz(f1 text COMPRESSION pglz); +CREATE INDEX idx ON cmdata_pglz(f1); +INSERT INTO cmdata_pglz VALUES(repeat('1234567890', 1000)); +\d+ cmdata +CREATE TABLE cmdata_lz4(f1 TEXT COMPRESSION lz4); +INSERT INTO cmdata_lz4 VALUES(repeat('1234567890', 1004)); +\d+ cmdata1 + +-- verify stored compression method in the data +SELECT pg_column_compression(f1) FROM cmdata_lz4; + +-- decompress data slice +SELECT SUBSTR(f1, 200, 5) FROM cmdata_pglz; +SELECT SUBSTR(f1, 2000, 50) FROM cmdata_lz4; + +-- copy with table creation +SELECT * INTO cmmove1 FROM cmdata_lz4; +\d+ cmmove1 +SELECT pg_column_compression(f1) FROM cmmove1; + +-- test LIKE INCLUDING COMPRESSION. The GUC default_toast_compression +-- has no effect, the compression method from the table being copied. +CREATE TABLE cmdata2 (LIKE cmdata_lz4 INCLUDING COMPRESSION); +\d+ cmdata2 +DROP TABLE cmdata2; + +-- copy to existing table +CREATE TABLE cmmove3(f1 text COMPRESSION pglz); +INSERT INTO cmmove3 SELECT * FROM cmdata_pglz; +INSERT INTO cmmove3 SELECT * FROM cmdata_lz4; +SELECT pg_column_compression(f1) FROM cmmove3; + +-- update using datum from different table with LZ4 data. +CREATE TABLE cmmove2(f1 text COMPRESSION pglz); +INSERT INTO cmmove2 VALUES (repeat('1234567890', 1004)); +SELECT pg_column_compression(f1) FROM cmmove2; +UPDATE cmmove2 SET f1 = cmdata_lz4.f1 FROM cmdata_lz4; +SELECT pg_column_compression(f1) FROM cmmove2; + +-- test externally stored compressed data +CREATE OR REPLACE FUNCTION large_val_lz4() RETURNS TEXT LANGUAGE SQL AS +'select array_agg(fipshash(g::text))::text from generate_series(1, 256) g'; +CREATE TABLE cmdata2 (f1 text COMPRESSION lz4); +INSERT INTO cmdata2 SELECT large_val_lz4() || repeat('a', 4000); +SELECT pg_column_compression(f1) FROM cmdata2; +SELECT SUBSTR(f1, 200, 5) FROM cmdata2; +DROP TABLE cmdata2; +DROP FUNCTION large_val_lz4; + +-- test compression with materialized view +CREATE MATERIALIZED VIEW compressmv(x) AS SELECT * FROM cmdata_lz4; +\d+ compressmv +SELECT pg_column_compression(f1) FROM cmdata_lz4; +SELECT pg_column_compression(x) FROM compressmv; + +-- test compression with partition +CREATE TABLE cmpart(f1 text COMPRESSION lz4) PARTITION BY HASH(f1); +CREATE TABLE cmpart1 PARTITION OF cmpart FOR VALUES WITH (MODULUS 2, REMAINDER 0); +CREATE TABLE cmpart2(f1 text COMPRESSION pglz); + +ALTER TABLE cmpart ATTACH PARTITION cmpart2 FOR VALUES WITH (MODULUS 2, REMAINDER 1); +INSERT INTO cmpart VALUES (repeat('123456789', 1004)); +INSERT INTO cmpart VALUES (repeat('123456789', 4004)); +SELECT pg_column_compression(f1) FROM cmpart1; +SELECT pg_column_compression(f1) FROM cmpart2; + +-- test compression with inheritance +CREATE TABLE cminh() INHERITS(cmdata_pglz, cmdata_lz4); -- error +CREATE TABLE cminh(f1 TEXT COMPRESSION lz4) INHERITS(cmdata_pglz); -- error +CREATE TABLE cmdata3(f1 text); +CREATE TABLE cminh() INHERITS (cmdata_pglz, cmdata3); + +-- test default_toast_compression GUC +SET default_toast_compression = 'lz4'; + +-- test alter compression method +ALTER TABLE cmdata_pglz ALTER COLUMN f1 SET COMPRESSION lz4; +INSERT INTO cmdata_pglz VALUES (repeat('123456789', 4004)); +\d+ cmdata +SELECT pg_column_compression(f1) FROM cmdata_pglz; +ALTER TABLE cmdata_pglz ALTER COLUMN f1 SET COMPRESSION pglz; + +-- test alter compression method for materialized views +ALTER MATERIALIZED VIEW compressmv ALTER COLUMN x SET COMPRESSION lz4; +\d+ compressmv + +-- test alter compression method for partitioned tables +ALTER TABLE cmpart1 ALTER COLUMN f1 SET COMPRESSION pglz; +ALTER TABLE cmpart2 ALTER COLUMN f1 SET COMPRESSION lz4; + +-- new data should be compressed with the current compression method +INSERT INTO cmpart VALUES (repeat('123456789', 1004)); +INSERT INTO cmpart VALUES (repeat('123456789', 4004)); +SELECT pg_column_compression(f1) FROM cmpart1; +SELECT pg_column_compression(f1) FROM cmpart2; + +-- test expression index +CREATE TABLE cmdata2 (f1 TEXT COMPRESSION pglz, f2 TEXT COMPRESSION lz4); +CREATE UNIQUE INDEX idx1 ON cmdata2 ((f1 || f2)); +INSERT INTO cmdata2 VALUES((SELECT array_agg(fipshash(g::TEXT))::TEXT FROM +generate_series(1, 50) g), VERSION()); + +-- check data is ok +SELECT length(f1) FROM cmdata_pglz; +SELECT length(f1) FROM cmdata_lz4; +SELECT length(f1) FROM cmmove1; +SELECT length(f1) FROM cmmove2; +SELECT length(f1) FROM cmmove3; + +\set HIDE_TOAST_COMPRESSION true From afa5c365ec5ad978878e5d26c536d8f865abf1ae Mon Sep 17 00:00:00 2001 From: Amit Langote Date: Thu, 17 Jul 2025 14:31:27 +0900 Subject: [PATCH 326/602] Remove duplicate line In 231b7d670b21, while copy-pasting some code into ExecEvalJsonCoercionFinish(), I (amitlan) accidentally introduced a duplicate line. Remove it. Reported-by: Jian He Discussion: https://postgr.es/m/CACJufxHcf=BpmRAJcjgfjOUfV76MwKnyz1x3ErXsWL26EAFmng@mail.gmail.com --- src/backend/executor/execExprInterp.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c index 8a72b5e70a4ec..1a37737d4a235 100644 --- a/src/backend/executor/execExprInterp.c +++ b/src/backend/executor/execExprInterp.c @@ -5228,7 +5228,6 @@ ExecEvalJsonCoercionFinish(ExprState *state, ExprEvalStep *op) * JsonBehavior expression. */ jsestate->escontext.error_occurred = false; - jsestate->escontext.error_occurred = false; jsestate->escontext.details_wanted = true; } } From 26cc96d4521acb598ddcd886bd64653452c7e887 Mon Sep 17 00:00:00 2001 From: Nathan Bossart Date: Thu, 17 Jul 2025 10:25:59 -0500 Subject: [PATCH 327/602] doc: Add note about how to use pg_overexplain. This commit adds a note to the pg_overexplain page that describes how to use it (LOAD, session_preload_libraries, or shared_preload_libraries). The new text is mostly lifted from the auto_explain page. We should probably consider centralizing this information in the future. While at it, add a missing "module" to the opening sentence. Reviewed-by: "David G. Johnston" Reviewed-by: Robert Treat Reviewed-by: Dean Rasheed Discussion: https://postgr.es/m/aHVWKM8l8kLlZzgv%40nathan Backpatch-through: 18 --- doc/src/sgml/pgoverexplain.sgml | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/doc/src/sgml/pgoverexplain.sgml b/doc/src/sgml/pgoverexplain.sgml index 21930fbd3bd76..377ddc8139ecf 100644 --- a/doc/src/sgml/pgoverexplain.sgml +++ b/doc/src/sgml/pgoverexplain.sgml @@ -8,7 +8,7 @@ - The pg_overexplain extends EXPLAIN + The pg_overexplain module extends EXPLAIN with new options that provide additional output. It is mostly intended to assist with debugging of and development of the planner, rather than for general use. Since this module displays internal details of planner data @@ -17,6 +17,21 @@ often as) those data structures change. + + To use it, simply load it into the server. You can load it into an + individual session: + + +LOAD 'pg_overexplain'; + + + You can also preload it into some or all sessions by including + pg_overexplain in + or + in + postgresql.conf. + + EXPLAIN (DEBUG) From b8926a5b4bb82e3c56855185da4106d24d26154c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Herrera?= Date: Thu, 17 Jul 2025 17:40:22 +0200 Subject: [PATCH 328/602] Remove assertion from PortalRunMulti MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We have an assertion to ensure that a command tag has been assigned by the time we're done executing, but if we happen to execute a command with no queries, the assertion would fail. Per discussion, rather than contort things to get a tag assigned, just remove the assertion. Oversight in 2f9661311b83. That commit also retained a comment that explained logic that had been adjacent to it but diffused into various places, leaving none apt to keep part of the comment. Remove that part, and rewrite what remains for extra clarity. Bug: #18984 Backpatch-through: 13 Reported-by: Aleksander Alekseev Reviewed-by: Tom Lane Reviewed-by: Michaël Paquier Discussion: https://postgr.es/m/18984-0f4778a6599ac3ae@postgresql.org --- src/backend/tcop/pquery.c | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/src/backend/tcop/pquery.c b/src/backend/tcop/pquery.c index d1593f38b35fd..08791b8f75ec2 100644 --- a/src/backend/tcop/pquery.c +++ b/src/backend/tcop/pquery.c @@ -1350,24 +1350,15 @@ PortalRunMulti(Portal portal, PopActiveSnapshot(); /* - * If a query completion data was supplied, use it. Otherwise use the - * portal's query completion data. - * - * Exception: Clients expect INSERT/UPDATE/DELETE tags to have counts, so - * fake them with zeros. This can happen with DO INSTEAD rules if there - * is no replacement query of the same type as the original. We print "0 - * 0" here because technically there is no query of the matching tag type, - * and printing a non-zero count for a different query type seems wrong, - * e.g. an INSERT that does an UPDATE instead should not print "0 1" if - * one row was updated. See QueryRewrite(), step 3, for details. + * If a command tag was requested and we did not fill in a run-time- + * determined tag above, copy the parse-time tag from the Portal. (There + * might not be any tag there either, in edge cases such as empty prepared + * statements. That's OK.) */ - if (qc && qc->commandTag == CMDTAG_UNKNOWN) - { - if (portal->qc.commandTag != CMDTAG_UNKNOWN) - CopyQueryCompletion(qc, &portal->qc); - /* If the caller supplied a qc, we should have set it by now. */ - Assert(qc->commandTag != CMDTAG_UNKNOWN); - } + if (qc && + qc->commandTag == CMDTAG_UNKNOWN && + portal->qc.commandTag != CMDTAG_UNKNOWN) + CopyQueryCompletion(qc, &portal->qc); } /* From daf9bdc47d11822da8a1269bd73fb23258b24f80 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Thu, 17 Jul 2025 12:46:38 -0400 Subject: [PATCH 329/602] Fix PQport to never return NULL unless the connection is NULL. This is the documented behavior, and it worked that way before v10. However, addition of the connhost[] array created cases where conn->connhost[conn->whichhost].port is NULL. The rest of libpq is careful to substitute DEF_PGPORT[_STR] for a null or empty port string, but we failed to do so here, leading to possibly returning NULL. As of v18 that causes psql's \conninfo command to segfault. Older psql versions avoid that, but it's pretty likely that other clients have trouble with this, so we'd better back-patch the fix. In stable branches, just revert to our historical behavior of returning an empty string when there was no user-given port specification. However, it seems substantially more useful and indeed more correct to hand back DEF_PGPORT_STR in such cases, so let's make v18 and master do that. Author: Daniele Varrazzo Reviewed-by: Laurenz Albe Reviewed-by: Tom Lane Discussion: https://postgr.es/m/CA+mi_8YTS8WPZPO0PAb2aaGLwHuQ0DEQRF0ZMnvWss4y9FwDYQ@mail.gmail.com Backpatch-through: 13 --- src/interfaces/libpq/fe-connect.c | 6 ++++-- src/interfaces/libpq/libpq-int.h | 3 ++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/interfaces/libpq/fe-connect.c b/src/interfaces/libpq/fe-connect.c index 2a2b10d5a29ba..afa85d9fca961 100644 --- a/src/interfaces/libpq/fe-connect.c +++ b/src/interfaces/libpq/fe-connect.c @@ -7574,10 +7574,12 @@ PQport(const PGconn *conn) if (!conn) return NULL; - if (conn->connhost != NULL) + if (conn->connhost != NULL && + conn->connhost[conn->whichhost].port != NULL && + conn->connhost[conn->whichhost].port[0] != '\0') return conn->connhost[conn->whichhost].port; - return ""; + return DEF_PGPORT_STR; } /* diff --git a/src/interfaces/libpq/libpq-int.h b/src/interfaces/libpq/libpq-int.h index 70c28f2ffca0b..a701c25038a75 100644 --- a/src/interfaces/libpq/libpq-int.h +++ b/src/interfaces/libpq/libpq-int.h @@ -357,7 +357,8 @@ typedef struct pg_conn_host pg_conn_host_type type; /* type of host address */ char *host; /* host name or socket path */ char *hostaddr; /* host numeric IP address */ - char *port; /* port number (always provided) */ + char *port; /* port number (if NULL or empty, use + * DEF_PGPORT[_STR]) */ char *password; /* password for this host, read from the * password file; NULL if not sought or not * found in password file. */ From b597ae6cc128b17038d461c5aa426d42f9cc33f9 Mon Sep 17 00:00:00 2001 From: Nathan Bossart Date: Thu, 17 Jul 2025 16:32:10 -0500 Subject: [PATCH 330/602] Add a test harness for the binary heap code. binaryheap is heavily used and already has decent test coverage, but it lacks dedicated tests for its correctness. This commit changes that. Author: Aleksander Alekseev Discussion: https://postgr.es/m/CAJ7c6TMwp%2Bmb8MMoi%3DSMVMso2hYecoVu2Pwf2EOkesq0MiSKxw%40mail.gmail.com --- src/test/modules/Makefile | 1 + src/test/modules/meson.build | 1 + src/test/modules/test_binaryheap/.gitignore | 4 + src/test/modules/test_binaryheap/Makefile | 24 ++ .../expected/test_binaryheap.out | 12 + src/test/modules/test_binaryheap/meson.build | 33 +++ .../test_binaryheap/sql/test_binaryheap.sql | 8 + .../test_binaryheap/test_binaryheap--1.0.sql | 7 + .../modules/test_binaryheap/test_binaryheap.c | 275 ++++++++++++++++++ .../test_binaryheap/test_binaryheap.control | 5 + 10 files changed, 370 insertions(+) create mode 100644 src/test/modules/test_binaryheap/.gitignore create mode 100644 src/test/modules/test_binaryheap/Makefile create mode 100644 src/test/modules/test_binaryheap/expected/test_binaryheap.out create mode 100644 src/test/modules/test_binaryheap/meson.build create mode 100644 src/test/modules/test_binaryheap/sql/test_binaryheap.sql create mode 100644 src/test/modules/test_binaryheap/test_binaryheap--1.0.sql create mode 100644 src/test/modules/test_binaryheap/test_binaryheap.c create mode 100644 src/test/modules/test_binaryheap/test_binaryheap.control diff --git a/src/test/modules/Makefile b/src/test/modules/Makefile index aa1d27bbed310..7d3d3d52b45e9 100644 --- a/src/test/modules/Makefile +++ b/src/test/modules/Makefile @@ -15,6 +15,7 @@ SUBDIRS = \ plsample \ spgist_name_ops \ test_aio \ + test_binaryheap \ test_bloomfilter \ test_copy_callbacks \ test_custom_rmgrs \ diff --git a/src/test/modules/meson.build b/src/test/modules/meson.build index 9de0057bd1d43..dd5cd065ba10c 100644 --- a/src/test/modules/meson.build +++ b/src/test/modules/meson.build @@ -14,6 +14,7 @@ subdir('plsample') subdir('spgist_name_ops') subdir('ssl_passphrase_callback') subdir('test_aio') +subdir('test_binaryheap') subdir('test_bloomfilter') subdir('test_copy_callbacks') subdir('test_custom_rmgrs') diff --git a/src/test/modules/test_binaryheap/.gitignore b/src/test/modules/test_binaryheap/.gitignore new file mode 100644 index 0000000000000..5dcb3ff972350 --- /dev/null +++ b/src/test/modules/test_binaryheap/.gitignore @@ -0,0 +1,4 @@ +# Generated subdirectories +/log/ +/results/ +/tmp_check/ diff --git a/src/test/modules/test_binaryheap/Makefile b/src/test/modules/test_binaryheap/Makefile new file mode 100644 index 0000000000000..d310fbc9e88fb --- /dev/null +++ b/src/test/modules/test_binaryheap/Makefile @@ -0,0 +1,24 @@ +# src/test/modules/test_binaryheap/Makefile + +MODULE_big = test_binaryheap +OBJS = \ + $(WIN32RES) \ + test_binaryheap.o + +PGFILEDESC = "test_binaryheap - test code for binaryheap" + +EXTENSION = test_binaryheap +DATA = test_binaryheap--1.0.sql + +REGRESS = test_binaryheap + +ifdef USE_PGXS +PG_CONFIG = pg_config +PGXS := $(shell $(PG_CONFIG) --pgxs) +include $(PGXS) +else +subdir = src/test/modules/test_binaryheap +top_builddir = ../../../.. +include $(top_builddir)/src/Makefile.global +include $(top_srcdir)/contrib/contrib-global.mk +endif diff --git a/src/test/modules/test_binaryheap/expected/test_binaryheap.out b/src/test/modules/test_binaryheap/expected/test_binaryheap.out new file mode 100644 index 0000000000000..16ce07875e3f7 --- /dev/null +++ b/src/test/modules/test_binaryheap/expected/test_binaryheap.out @@ -0,0 +1,12 @@ +CREATE EXTENSION test_binaryheap; +-- +-- These tests don't produce any interesting output. We're checking that +-- the operations complete without crashing or hanging and that none of their +-- internal sanity tests fail. +-- +SELECT test_binaryheap(); + test_binaryheap +----------------- + +(1 row) + diff --git a/src/test/modules/test_binaryheap/meson.build b/src/test/modules/test_binaryheap/meson.build new file mode 100644 index 0000000000000..816a43c93e945 --- /dev/null +++ b/src/test/modules/test_binaryheap/meson.build @@ -0,0 +1,33 @@ +# Copyright (c) 2025, PostgreSQL Global Development Group + +test_binaryheap_sources = files( + 'test_binaryheap.c', +) + +if host_system == 'windows' + test_binaryheap_sources += rc_lib_gen.process(win32ver_rc, extra_args: [ + '--NAME', 'test_binaryheap', + '--FILEDESC', 'test_binaryheap - test code for binaryheap',]) +endif + +test_binaryheap = shared_module('test_binaryheap', + test_binaryheap_sources, + kwargs: pg_test_mod_args, +) +test_install_libs += test_binaryheap + +test_install_data += files( + 'test_binaryheap.control', + 'test_binaryheap--1.0.sql', +) + +tests += { + 'name': 'test_binaryheap', + 'sd': meson.current_source_dir(), + 'bd': meson.current_build_dir(), + 'regress': { + 'sql': [ + 'test_binaryheap', + ], + }, +} diff --git a/src/test/modules/test_binaryheap/sql/test_binaryheap.sql b/src/test/modules/test_binaryheap/sql/test_binaryheap.sql new file mode 100644 index 0000000000000..8439545815b37 --- /dev/null +++ b/src/test/modules/test_binaryheap/sql/test_binaryheap.sql @@ -0,0 +1,8 @@ +CREATE EXTENSION test_binaryheap; + +-- +-- These tests don't produce any interesting output. We're checking that +-- the operations complete without crashing or hanging and that none of their +-- internal sanity tests fail. +-- +SELECT test_binaryheap(); diff --git a/src/test/modules/test_binaryheap/test_binaryheap--1.0.sql b/src/test/modules/test_binaryheap/test_binaryheap--1.0.sql new file mode 100644 index 0000000000000..cddceeee60337 --- /dev/null +++ b/src/test/modules/test_binaryheap/test_binaryheap--1.0.sql @@ -0,0 +1,7 @@ +/* src/test/modules/test_binaryheap/test_binaryheap--1.0.sql */ + +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "CREATE EXTENSION test_binaryheap" to load this file. \quit + +CREATE FUNCTION test_binaryheap() RETURNS VOID + AS 'MODULE_PATHNAME' LANGUAGE C; diff --git a/src/test/modules/test_binaryheap/test_binaryheap.c b/src/test/modules/test_binaryheap/test_binaryheap.c new file mode 100644 index 0000000000000..583dae1da30d5 --- /dev/null +++ b/src/test/modules/test_binaryheap/test_binaryheap.c @@ -0,0 +1,275 @@ +/*-------------------------------------------------------------------------- + * + * test_binaryheap.c + * Test correctness of binary heap implementation. + * + * Copyright (c) 2025, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/test/modules/test_binaryheap/test_binaryheap.c + * + * ------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "common/int.h" +#include "common/pg_prng.h" +#include "fmgr.h" +#include "lib/binaryheap.h" + +PG_MODULE_MAGIC; + +/* + * Test binaryheap_comparator for max-heap of integers. + */ +static int +int_cmp(Datum a, Datum b, void *arg) +{ + return pg_cmp_s32(DatumGetInt32(a), DatumGetInt32(b)); +} + +/* + * Loops through all nodes and returns the maximum value. + */ +static int +get_max_from_heap(binaryheap *heap) +{ + int max = -1; + + for (int i = 0; i < binaryheap_size(heap); i++) + max = Max(max, DatumGetInt32(binaryheap_get_node(heap, i))); + + return max; +} + +/* + * Generate a random permutation of the integers 0..size-1. + */ +static int * +get_permutation(int size) +{ + int *permutation = (int *) palloc(size * sizeof(int)); + + permutation[0] = 0; + + /* + * This is the "inside-out" variant of the Fisher-Yates shuffle algorithm. + * Notionally, we append each new value to the array and then swap it with + * a randomly-chosen array element (possibly including itself, else we + * fail to generate permutations with the last integer last). The swap + * step can be optimized by combining it with the insertion. + */ + for (int i = 1; i < size; i++) + { + int j = pg_prng_uint64_range(&pg_global_prng_state, 0, i); + + if (j < i) /* avoid fetching undefined data if j=i */ + permutation[i] = permutation[j]; + permutation[j] = i; + } + + return permutation; +} + +/* + * Ensure that the heap property holds for the given heap, i.e., each parent is + * greater than or equal to its children. + */ +static void +verify_heap_property(binaryheap *heap) +{ + for (int i = 0; i < binaryheap_size(heap); i++) + { + int left = 2 * i + 1; + int right = 2 * i + 2; + int parent_val = DatumGetInt32(binaryheap_get_node(heap, i)); + + if (left < binaryheap_size(heap) && + parent_val < DatumGetInt32(binaryheap_get_node(heap, left))) + elog(ERROR, "parent node less than left child"); + + if (right < binaryheap_size(heap) && + parent_val < DatumGetInt32(binaryheap_get_node(heap, right))) + elog(ERROR, "parent node less than right child"); + } +} + +/* + * Check correctness of basic operations. + */ +static void +test_basic(int size) +{ + binaryheap *heap = binaryheap_allocate(size, int_cmp, NULL); + int *permutation = get_permutation(size); + + if (!binaryheap_empty(heap)) + elog(ERROR, "new heap not empty"); + if (binaryheap_size(heap) != 0) + elog(ERROR, "wrong size for new heap"); + + for (int i = 0; i < size; i++) + { + binaryheap_add(heap, Int32GetDatum(permutation[i])); + verify_heap_property(heap); + } + + if (binaryheap_empty(heap)) + elog(ERROR, "heap empty after adding values"); + if (binaryheap_size(heap) != size) + elog(ERROR, "wrong size for heap after adding values"); + + if (DatumGetInt32(binaryheap_first(heap)) != get_max_from_heap(heap)) + elog(ERROR, "incorrect root node after adding values"); + + for (int i = 0; i < size; i++) + { + int expected = get_max_from_heap(heap); + int actual = DatumGetInt32(binaryheap_remove_first(heap)); + + if (actual != expected) + elog(ERROR, "incorrect root node after removing root"); + verify_heap_property(heap); + } + + if (!binaryheap_empty(heap)) + elog(ERROR, "heap not empty after removing all nodes"); +} + +/* + * Test building heap after unordered additions. + */ +static void +test_build(int size) +{ + binaryheap *heap = binaryheap_allocate(size, int_cmp, NULL); + int *permutation = get_permutation(size); + + for (int i = 0; i < size; i++) + binaryheap_add_unordered(heap, Int32GetDatum(permutation[i])); + + if (binaryheap_size(heap) != size) + elog(ERROR, "wrong size for heap after unordered additions"); + + binaryheap_build(heap); + verify_heap_property(heap); +} + +/* + * Test removing nodes. + */ +static void +test_remove_node(int size) +{ + binaryheap *heap = binaryheap_allocate(size, int_cmp, NULL); + int *permutation = get_permutation(size); + int remove_count = pg_prng_uint64_range(&pg_global_prng_state, + 0, size - 1); + + for (int i = 0; i < size; i++) + binaryheap_add(heap, Int32GetDatum(permutation[i])); + + for (int i = 0; i < remove_count; i++) + { + int idx = pg_prng_uint64_range(&pg_global_prng_state, + 0, binaryheap_size(heap) - 1); + + binaryheap_remove_node(heap, idx); + verify_heap_property(heap); + } + + if (binaryheap_size(heap) != size - remove_count) + elog(ERROR, "wrong size after removing nodes"); +} + +/* + * Test replacing the root node. + */ +static void +test_replace_first(int size) +{ + binaryheap *heap = binaryheap_allocate(size, int_cmp, NULL); + + for (int i = 0; i < size; i++) + binaryheap_add(heap, Int32GetDatum(i)); + + /* + * Replace root with a value smaller than everything in the heap. + */ + binaryheap_replace_first(heap, Int32GetDatum(-1)); + verify_heap_property(heap); + + /* + * Replace root with a value in the middle of the heap. + */ + binaryheap_replace_first(heap, Int32GetDatum(size / 2)); + verify_heap_property(heap); + + /* + * Replace root with a larger value than everything in the heap. + */ + binaryheap_replace_first(heap, Int32GetDatum(size + 1)); + verify_heap_property(heap); +} + +/* + * Test duplicate values. + */ +static void +test_duplicates(int size) +{ + binaryheap *heap = binaryheap_allocate(size, int_cmp, NULL); + int dup = pg_prng_uint64_range(&pg_global_prng_state, 0, size - 1); + + for (int i = 0; i < size; i++) + binaryheap_add(heap, Int32GetDatum(dup)); + + for (int i = 0; i < size; i++) + { + if (DatumGetInt32(binaryheap_remove_first(heap)) != dup) + elog(ERROR, "unexpected value in heap with duplicates"); + } +} + +/* + * Test resetting. + */ +static void +test_reset(int size) +{ + binaryheap *heap = binaryheap_allocate(size, int_cmp, NULL); + + for (int i = 0; i < size; i++) + binaryheap_add(heap, Int32GetDatum(i)); + + binaryheap_reset(heap); + + if (!binaryheap_empty(heap)) + elog(ERROR, "heap not empty after resetting"); +} + +/* + * SQL-callable entry point to perform all tests. + */ +PG_FUNCTION_INFO_V1(test_binaryheap); + +Datum +test_binaryheap(PG_FUNCTION_ARGS) +{ + static const int test_sizes[] = {1, 2, 3, 10, 100, 1000}; + + for (int i = 0; i < sizeof(test_sizes) / sizeof(int); i++) + { + int size = test_sizes[i]; + + test_basic(size); + test_build(size); + test_remove_node(size); + test_replace_first(size); + test_duplicates(size); + test_reset(size); + } + + PG_RETURN_VOID(); +} diff --git a/src/test/modules/test_binaryheap/test_binaryheap.control b/src/test/modules/test_binaryheap/test_binaryheap.control new file mode 100644 index 0000000000000..dd0785e05bdaf --- /dev/null +++ b/src/test/modules/test_binaryheap/test_binaryheap.control @@ -0,0 +1,5 @@ +# test_binaryheap extension +comment = 'Test code for binaryheap' +default_version = '1.0' +module_pathname = '$libdir/test_binaryheap' +relocatable = true From 62c3b4cd9ddc6d3066e3f6e43b68fd00c620d9ad Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Fri, 18 Jul 2025 10:52:05 +0300 Subject: [PATCH 331/602] Support for deparsing of ArrayCoerceExpr node in contrib/postgres_fdw When using a prepared statement to select data from a PostgreSQL foreign table (postgres_fdw) with the "field = ANY($1)" expression, the operation is not pushed down when an implicit type case is applied, and a generic plan is used. This commit resolves the issue by supporting the push-down of ArrayCoerceExpr, which is used in this case. The support is quite straightforward and similar to other nods, such as RelabelType. Discussion: https://postgr.es/m/4f0cea802476d23c6e799512ffd17aff%40postgrespro.ru Author: Alexander Pyhalov Reviewed-by: Maxim Orlov Reviewed-by: Alexander Korotkov --- contrib/postgres_fdw/deparse.c | 50 +++++++++++++++++++ .../postgres_fdw/expected/postgres_fdw.out | 21 ++++++++ contrib/postgres_fdw/sql/postgres_fdw.sql | 9 ++++ 3 files changed, 80 insertions(+) diff --git a/contrib/postgres_fdw/deparse.c b/contrib/postgres_fdw/deparse.c index 9351835b5e4f8..d761d076dc8be 100644 --- a/contrib/postgres_fdw/deparse.c +++ b/contrib/postgres_fdw/deparse.c @@ -161,6 +161,7 @@ static void deparseDistinctExpr(DistinctExpr *node, deparse_expr_cxt *context); static void deparseScalarArrayOpExpr(ScalarArrayOpExpr *node, deparse_expr_cxt *context); static void deparseRelabelType(RelabelType *node, deparse_expr_cxt *context); +static void deparseArrayCoerceExpr(ArrayCoerceExpr *node, deparse_expr_cxt *context); static void deparseBoolExpr(BoolExpr *node, deparse_expr_cxt *context); static void deparseNullTest(NullTest *node, deparse_expr_cxt *context); static void deparseCaseExpr(CaseExpr *node, deparse_expr_cxt *context); @@ -702,6 +703,34 @@ foreign_expr_walker(Node *node, state = FDW_COLLATE_UNSAFE; } break; + case T_ArrayCoerceExpr: + { + ArrayCoerceExpr *e = (ArrayCoerceExpr *) node; + + /* + * Recurse to input subexpression. + */ + if (!foreign_expr_walker((Node *) e->arg, + glob_cxt, &inner_cxt, case_arg_cxt)) + return false; + + /* + * T_ArrayCoerceExpr must not introduce a collation not + * derived from an input foreign Var (same logic as for a + * function). + */ + collation = e->resultcollid; + if (collation == InvalidOid) + state = FDW_COLLATE_NONE; + else if (inner_cxt.state == FDW_COLLATE_SAFE && + collation == inner_cxt.collation) + state = FDW_COLLATE_SAFE; + else if (collation == DEFAULT_COLLATION_OID) + state = FDW_COLLATE_NONE; + else + state = FDW_COLLATE_UNSAFE; + } + break; case T_BoolExpr: { BoolExpr *b = (BoolExpr *) node; @@ -2919,6 +2948,9 @@ deparseExpr(Expr *node, deparse_expr_cxt *context) case T_RelabelType: deparseRelabelType((RelabelType *) node, context); break; + case T_ArrayCoerceExpr: + deparseArrayCoerceExpr((ArrayCoerceExpr *) node, context); + break; case T_BoolExpr: deparseBoolExpr((BoolExpr *) node, context); break; @@ -3507,6 +3539,24 @@ deparseRelabelType(RelabelType *node, deparse_expr_cxt *context) node->resulttypmod)); } +/* + * Deparse a ArrayCoerceExpr (array-type conversion) node. + */ +static void +deparseArrayCoerceExpr(ArrayCoerceExpr *node, deparse_expr_cxt *context) +{ + deparseExpr(node->arg, context); + + /* + * No difference how to deparse explicit cast, but if we omit implicit + * cast in the query, it'll be more user-friendly + */ + if (node->coerceformat != COERCE_IMPLICIT_CAST) + appendStringInfo(context->buf, "::%s", + deparse_type_name(node->resulttype, + node->resulttypmod)); +} + /* * Deparse a BoolExpr node. */ diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out index 2185b42bb4f79..ff2b30cc91221 100644 --- a/contrib/postgres_fdw/expected/postgres_fdw.out +++ b/contrib/postgres_fdw/expected/postgres_fdw.out @@ -1180,6 +1180,27 @@ SELECT * FROM ft1 WHERE CASE c3 COLLATE "C" WHEN c6 THEN true ELSE c3 < 'bar' EN Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" (4 rows) +-- Test array type conversion pushdown +SET plan_cache_mode = force_generic_plan; +PREPARE s(varchar[]) AS SELECT count(*) FROM ft2 WHERE c6 = ANY ($1); +EXPLAIN (VERBOSE, COSTS OFF) +EXECUTE s(ARRAY['1','2']); + QUERY PLAN +--------------------------------------------------------------------------------------------- + Foreign Scan + Output: (count(*)) + Relations: Aggregate on (public.ft2) + Remote SQL: SELECT count(*) FROM "S 1"."T 1" WHERE ((c6 = ANY ($1::character varying[]))) +(4 rows) + +EXECUTE s(ARRAY['1','2']); + count +------- + 200 +(1 row) + +DEALLOCATE s; +RESET plan_cache_mode; -- a regconfig constant referring to this text search configuration -- is initially unshippable CREATE TEXT SEARCH CONFIGURATION public.custom_search diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql index e534b40de3c76..7267732f569e5 100644 --- a/contrib/postgres_fdw/sql/postgres_fdw.sql +++ b/contrib/postgres_fdw/sql/postgres_fdw.sql @@ -458,6 +458,15 @@ SELECT * FROM ft1 WHERE CASE c3 WHEN c6 THEN true ELSE c3 < 'bar' END; EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ft1 WHERE CASE c3 COLLATE "C" WHEN c6 THEN true ELSE c3 < 'bar' END; +-- Test array type conversion pushdown +SET plan_cache_mode = force_generic_plan; +PREPARE s(varchar[]) AS SELECT count(*) FROM ft2 WHERE c6 = ANY ($1); +EXPLAIN (VERBOSE, COSTS OFF) +EXECUTE s(ARRAY['1','2']); +EXECUTE s(ARRAY['1','2']); +DEALLOCATE s; +RESET plan_cache_mode; + -- a regconfig constant referring to this text search configuration -- is initially unshippable CREATE TEXT SEARCH CONFIGURATION public.custom_search From 5022ff250eeba2367fb4e74fed8ee65bcddb6c99 Mon Sep 17 00:00:00 2001 From: Dean Rasheed Date: Fri, 18 Jul 2025 09:55:43 +0100 Subject: [PATCH 332/602] Fix concurrent update trigger issues with MERGE in a CTE. If a MERGE inside a CTE attempts an UPDATE or DELETE on a table with BEFORE ROW triggers, and a concurrent UPDATE or DELETE happens, the merge code would fail (crashing in the case of an UPDATE action, and potentially executing the wrong action for a DELETE action). This is the same issue that 9321c79c86 attempted to fix, except now for a MERGE inside a CTE. As noted in 9321c79c86, what needs to happen is for the trigger code to exit early, returning the TM_Result and TM_FailureData information to the merge code, if a concurrent modification is detected, rather than attempting to do an EPQ recheck. The merge code will then do its own rechecking, and rescan the action list, potentially executing a different action in light of the concurrent update. In particular, the trigger code must never call ExecGetUpdateNewTuple() for MERGE, since that is bound to fail because MERGE has its own per-action projection information. Commit 9321c79c86 did this using estate->es_plannedstmt->commandType in the trigger code to detect that a MERGE was being executed, which is fine for a plain MERGE command, but does not work for a MERGE inside a CTE. Fix by passing that information to the trigger code as an additional parameter passed to ExecBRUpdateTriggers() and ExecBRDeleteTriggers(). Back-patch as far as v17 only, since MERGE cannot appear inside a CTE prior to that. Additionally, take care to preserve the trigger ABI in v17 (though not in v18, which is still in beta). Bug: #18986 Reported-by: Yaroslav Syrytsia Author: Dean Rasheed Reviewed-by: Michael Paquier Discussion: https://postgr.es/m/18986-e7a8aac3d339fa47@postgresql.org Backpatch-through: 17 --- src/backend/commands/trigger.c | 74 ++++++++++++------- src/backend/executor/execReplication.c | 4 +- src/backend/executor/nodeModifyTable.c | 6 +- src/include/commands/trigger.h | 6 +- .../expected/merge-match-recheck.out | 27 ++++--- .../isolation/specs/merge-match-recheck.spec | 22 +++--- 6 files changed, 89 insertions(+), 50 deletions(-) diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c index 67f8e70f9c166..7dc121f73f17e 100644 --- a/src/backend/commands/trigger.c +++ b/src/backend/commands/trigger.c @@ -80,6 +80,7 @@ static bool GetTupleForTrigger(EState *estate, ItemPointer tid, LockTupleMode lockmode, TupleTableSlot *oldslot, + bool do_epq_recheck, TupleTableSlot **epqslot, TM_Result *tmresultp, TM_FailureData *tmfdp); @@ -2693,7 +2694,8 @@ ExecBRDeleteTriggers(EState *estate, EPQState *epqstate, HeapTuple fdw_trigtuple, TupleTableSlot **epqslot, TM_Result *tmresult, - TM_FailureData *tmfd) + TM_FailureData *tmfd, + bool is_merge_delete) { TupleTableSlot *slot = ExecGetTriggerOldSlot(estate, relinfo); TriggerDesc *trigdesc = relinfo->ri_TrigDesc; @@ -2708,9 +2710,17 @@ ExecBRDeleteTriggers(EState *estate, EPQState *epqstate, { TupleTableSlot *epqslot_candidate = NULL; + /* + * Get a copy of the on-disk tuple we are planning to delete. In + * general, if the tuple has been concurrently updated, we should + * recheck it using EPQ. However, if this is a MERGE DELETE action, + * we skip this EPQ recheck and leave it to the caller (it must do + * additional rechecking, and might end up executing a different + * action entirely). + */ if (!GetTupleForTrigger(estate, epqstate, relinfo, tupleid, - LockTupleExclusive, slot, &epqslot_candidate, - tmresult, tmfd)) + LockTupleExclusive, slot, !is_merge_delete, + &epqslot_candidate, tmresult, tmfd)) return false; /* @@ -2800,6 +2810,7 @@ ExecARDeleteTriggers(EState *estate, tupleid, LockTupleExclusive, slot, + false, NULL, NULL, NULL); @@ -2944,7 +2955,8 @@ ExecBRUpdateTriggers(EState *estate, EPQState *epqstate, HeapTuple fdw_trigtuple, TupleTableSlot *newslot, TM_Result *tmresult, - TM_FailureData *tmfd) + TM_FailureData *tmfd, + bool is_merge_update) { TriggerDesc *trigdesc = relinfo->ri_TrigDesc; TupleTableSlot *oldslot = ExecGetTriggerOldSlot(estate, relinfo); @@ -2965,10 +2977,17 @@ ExecBRUpdateTriggers(EState *estate, EPQState *epqstate, { TupleTableSlot *epqslot_candidate = NULL; - /* get a copy of the on-disk tuple we are planning to update */ + /* + * Get a copy of the on-disk tuple we are planning to update. In + * general, if the tuple has been concurrently updated, we should + * recheck it using EPQ. However, if this is a MERGE UPDATE action, + * we skip this EPQ recheck and leave it to the caller (it must do + * additional rechecking, and might end up executing a different + * action entirely). + */ if (!GetTupleForTrigger(estate, epqstate, relinfo, tupleid, - lockmode, oldslot, &epqslot_candidate, - tmresult, tmfd)) + lockmode, oldslot, !is_merge_update, + &epqslot_candidate, tmresult, tmfd)) return false; /* cancel the update action */ /* @@ -3142,6 +3161,7 @@ ExecARUpdateTriggers(EState *estate, ResultRelInfo *relinfo, tupleid, LockTupleExclusive, oldslot, + false, NULL, NULL, NULL); @@ -3298,6 +3318,7 @@ GetTupleForTrigger(EState *estate, ItemPointer tid, LockTupleMode lockmode, TupleTableSlot *oldslot, + bool do_epq_recheck, TupleTableSlot **epqslot, TM_Result *tmresultp, TM_FailureData *tmfdp) @@ -3357,29 +3378,30 @@ GetTupleForTrigger(EState *estate, if (tmfd.traversed) { /* - * Recheck the tuple using EPQ. For MERGE, we leave this - * to the caller (it must do additional rechecking, and - * might end up executing a different action entirely). + * Recheck the tuple using EPQ, if requested. Otherwise, + * just return that it was concurrently updated. */ - if (estate->es_plannedstmt->commandType == CMD_MERGE) + if (do_epq_recheck) { - if (tmresultp) - *tmresultp = TM_Updated; - return false; + *epqslot = EvalPlanQual(epqstate, + relation, + relinfo->ri_RangeTableIndex, + oldslot); + + /* + * If PlanQual failed for updated tuple - we must not + * process this tuple! + */ + if (TupIsNull(*epqslot)) + { + *epqslot = NULL; + return false; + } } - - *epqslot = EvalPlanQual(epqstate, - relation, - relinfo->ri_RangeTableIndex, - oldslot); - - /* - * If PlanQual failed for updated tuple - we must not - * process this tuple! - */ - if (TupIsNull(*epqslot)) + else { - *epqslot = NULL; + if (tmresultp) + *tmresultp = TM_Updated; return false; } } diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c index 53ddd25c42db9..f262e7a66f771 100644 --- a/src/backend/executor/execReplication.c +++ b/src/backend/executor/execReplication.c @@ -670,7 +670,7 @@ ExecSimpleRelationUpdate(ResultRelInfo *resultRelInfo, resultRelInfo->ri_TrigDesc->trig_update_before_row) { if (!ExecBRUpdateTriggers(estate, epqstate, resultRelInfo, - tid, NULL, slot, NULL, NULL)) + tid, NULL, slot, NULL, NULL, false)) skip_tuple = true; /* "do nothing" */ } @@ -746,7 +746,7 @@ ExecSimpleRelationDelete(ResultRelInfo *resultRelInfo, resultRelInfo->ri_TrigDesc->trig_delete_before_row) { skip_tuple = !ExecBRDeleteTriggers(estate, epqstate, resultRelInfo, - tid, NULL, NULL, NULL, NULL); + tid, NULL, NULL, NULL, NULL, false); } if (!skip_tuple) diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c index 54da8e7995bd3..7c6c2c1f6e42a 100644 --- a/src/backend/executor/nodeModifyTable.c +++ b/src/backend/executor/nodeModifyTable.c @@ -1474,7 +1474,8 @@ ExecDeletePrologue(ModifyTableContext *context, ResultRelInfo *resultRelInfo, return ExecBRDeleteTriggers(context->estate, context->epqstate, resultRelInfo, tupleid, oldtuple, - epqreturnslot, result, &context->tmfd); + epqreturnslot, result, &context->tmfd, + context->mtstate->operation == CMD_MERGE); } return true; @@ -2117,7 +2118,8 @@ ExecUpdatePrologue(ModifyTableContext *context, ResultRelInfo *resultRelInfo, return ExecBRUpdateTriggers(context->estate, context->epqstate, resultRelInfo, tupleid, oldtuple, slot, - result, &context->tmfd); + result, &context->tmfd, + context->mtstate->operation == CMD_MERGE); } return true; diff --git a/src/include/commands/trigger.h b/src/include/commands/trigger.h index 2ed2c4bb3784b..cfd7daa20edac 100644 --- a/src/include/commands/trigger.h +++ b/src/include/commands/trigger.h @@ -213,7 +213,8 @@ extern bool ExecBRDeleteTriggers(EState *estate, HeapTuple fdw_trigtuple, TupleTableSlot **epqslot, TM_Result *tmresult, - TM_FailureData *tmfd); + TM_FailureData *tmfd, + bool is_merge_delete); extern void ExecARDeleteTriggers(EState *estate, ResultRelInfo *relinfo, ItemPointer tupleid, @@ -235,7 +236,8 @@ extern bool ExecBRUpdateTriggers(EState *estate, HeapTuple fdw_trigtuple, TupleTableSlot *newslot, TM_Result *tmresult, - TM_FailureData *tmfd); + TM_FailureData *tmfd, + bool is_merge_update); extern void ExecARUpdateTriggers(EState *estate, ResultRelInfo *relinfo, ResultRelInfo *src_partinfo, diff --git a/src/test/isolation/expected/merge-match-recheck.out b/src/test/isolation/expected/merge-match-recheck.out index 9a44a5959270b..90300f1db5ab3 100644 --- a/src/test/isolation/expected/merge-match-recheck.out +++ b/src/test/isolation/expected/merge-match-recheck.out @@ -241,19 +241,28 @@ starting permutation: update_bal1_tg merge_bal_tg c2 select1_tg c1 s2: NOTICE: Update: (1,160,s1,setup) -> (1,50,s1,"setup updated by update_bal1_tg") step update_bal1_tg: UPDATE target_tg t SET balance = 50, val = t.val || ' updated by update_bal1_tg' WHERE t.key = 1; step merge_bal_tg: - MERGE INTO target_tg t - USING (SELECT 1 as key) s - ON s.key = t.key - WHEN MATCHED AND balance < 100 THEN - UPDATE SET balance = balance * 2, val = t.val || ' when1' - WHEN MATCHED AND balance < 200 THEN - UPDATE SET balance = balance * 4, val = t.val || ' when2' - WHEN MATCHED AND balance < 300 THEN - UPDATE SET balance = balance * 8, val = t.val || ' when3'; + WITH t AS ( + MERGE INTO target_tg t + USING (SELECT 1 as key) s + ON s.key = t.key + WHEN MATCHED AND balance < 100 THEN + UPDATE SET balance = balance * 2, val = t.val || ' when1' + WHEN MATCHED AND balance < 200 THEN + UPDATE SET balance = balance * 4, val = t.val || ' when2' + WHEN MATCHED AND balance < 300 THEN + UPDATE SET balance = balance * 8, val = t.val || ' when3' + RETURNING t.* + ) + SELECT * FROM t; step c2: COMMIT; s1: NOTICE: Update: (1,50,s1,"setup updated by update_bal1_tg") -> (1,100,s1,"setup updated by update_bal1_tg when1") step merge_bal_tg: <... completed> +key|balance|status|val +---+-------+------+------------------------------------- + 1| 100|s1 |setup updated by update_bal1_tg when1 +(1 row) + step select1_tg: SELECT * FROM target_tg; key|balance|status|val ---+-------+------+------------------------------------- diff --git a/src/test/isolation/specs/merge-match-recheck.spec b/src/test/isolation/specs/merge-match-recheck.spec index 26266b8c2978e..15226e40c9efc 100644 --- a/src/test/isolation/specs/merge-match-recheck.spec +++ b/src/test/isolation/specs/merge-match-recheck.spec @@ -99,15 +99,19 @@ step "merge_bal_pa" } step "merge_bal_tg" { - MERGE INTO target_tg t - USING (SELECT 1 as key) s - ON s.key = t.key - WHEN MATCHED AND balance < 100 THEN - UPDATE SET balance = balance * 2, val = t.val || ' when1' - WHEN MATCHED AND balance < 200 THEN - UPDATE SET balance = balance * 4, val = t.val || ' when2' - WHEN MATCHED AND balance < 300 THEN - UPDATE SET balance = balance * 8, val = t.val || ' when3'; + WITH t AS ( + MERGE INTO target_tg t + USING (SELECT 1 as key) s + ON s.key = t.key + WHEN MATCHED AND balance < 100 THEN + UPDATE SET balance = balance * 2, val = t.val || ' when1' + WHEN MATCHED AND balance < 200 THEN + UPDATE SET balance = balance * 4, val = t.val || ' when2' + WHEN MATCHED AND balance < 300 THEN + UPDATE SET balance = balance * 8, val = t.val || ' when3' + RETURNING t.* + ) + SELECT * FROM t; } step "merge_delete" From 4c5159a2d8c01e6f08ce20a51bb2dcaa9c8be526 Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Fri, 18 Jul 2025 18:40:07 +0300 Subject: [PATCH 333/602] Fix a typo in the deparseArrayCoerceExpr() header comment Discussion: https://postgr.es/m/CAHewXNn%3D_ykCtcTw5SCfZ-eVr4m%2BCuc804rGeMsKuj%3DD4xpL4w%40mail.gmail.com Author: Tender Wang --- contrib/postgres_fdw/deparse.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/postgres_fdw/deparse.c b/contrib/postgres_fdw/deparse.c index d761d076dc8be..e5b5e1a5f51a5 100644 --- a/contrib/postgres_fdw/deparse.c +++ b/contrib/postgres_fdw/deparse.c @@ -3540,7 +3540,7 @@ deparseRelabelType(RelabelType *node, deparse_expr_cxt *context) } /* - * Deparse a ArrayCoerceExpr (array-type conversion) node. + * Deparse an ArrayCoerceExpr (array-type conversion) node. */ static void deparseArrayCoerceExpr(ArrayCoerceExpr *node, deparse_expr_cxt *context) From 161a3e8b682ebb98ea0b9d5015d22990696b99ec Mon Sep 17 00:00:00 2001 From: Nathan Bossart Date: Fri, 18 Jul 2025 10:59:46 -0500 Subject: [PATCH 334/602] pg_upgrade: Use COPY for large object metadata. Presently, pg_dump generates commands like SELECT pg_catalog.lo_create('5432'); ALTER LARGE OBJECT 5432 OWNER TO alice; GRANT SELECT ON LARGE OBJECT 5432 TO bob; for each large object. This is particularly slow at restore time, especially when there are tens or hundreds of millions of large objects. From reports and personal experience, such slow restores seem to be most painful when encountered during pg_upgrade. This commit teaches pg_dump to instead dump pg_largeobject_metadata and the corresponding pg_shdepend rows when in binary upgrade mode, i.e., pg_dump now generates commands like COPY pg_catalog.pg_largeobject_metadata (oid, lomowner, lomacl) FROM stdin; 5432 16384 {alice=rw/alice,bob=r/alice} \. COPY pg_catalog.pg_shdepend (dbid, classid, objid, objsubid, refclassid, refobjid, deptype) FROM stdin; 5 2613 5432 0 1260 16384 o 5 2613 5432 0 1260 16385 a \. Testing indicates the COPY approach can be significantly faster. To do any better, we'd probably need to find a way to copy/link pg_largeobject_metadata's files during pg_upgrade, which would be limited to upgrades from >= v16 (since commit 7b378237aa changed the storage format for aclitem, which is used for pg_largeobject_metadata.lomacl). Note that this change only applies to binary upgrade mode (i.e., dumps initiated by pg_upgrade) since it inserts rows directly into catalogs. Also, this optimization can only be used for upgrades from >= v12 because pg_largeobject_metadata was created WITH OIDS in older versions, which prevents pg_dump from handling pg_largeobject_metadata.oid properly. With some extra effort, it might be possible to support upgrades from older versions, but the added complexity didn't seem worth it to support versions that will have been out-of-support for nearly 3 years by the time this change is released. Experienced hackers may remember that prior to v12, pg_upgrade copied/linked pg_largeobject_metadata's files (see commit 12a53c732c). Besides the aforementioned storage format issues, this approach failed to transfer the relevant pg_shdepend rows, and pg_dump still had to generate an lo_create() command per large object so that creating the dependent comments and security labels worked. We could perhaps adopt a hybrid approach for upgrades from v16 and newer (i.e., generate lo_create() commands for each large object, copy/link pg_largeobject_metadata's files, and COPY the relevant pg_shdepend rows), but further testing is needed. Reported-by: Hannu Krosing Suggested-by: Tom Lane Reviewed-by: Hannu Krosing Reviewed-by: Nitin Motiani Reviewed-by: Tom Lane Discussion: https://postgr.es/m/CAMT0RQSS-6qLH%2BzYsOeUbAYhop3wmQTkNmQpo5--QRDUR%2BqYmQ%40mail.gmail.com --- src/bin/pg_dump/pg_backup_archiver.c | 15 +++++ src/bin/pg_dump/pg_dump.c | 90 ++++++++++++++++++++++++++-- src/bin/pg_dump/t/002_pg_dump.pl | 4 +- 3 files changed, 103 insertions(+), 6 deletions(-) diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c index 197c1295d93fd..30e0da31aa340 100644 --- a/src/bin/pg_dump/pg_backup_archiver.c +++ b/src/bin/pg_dump/pg_backup_archiver.c @@ -31,6 +31,8 @@ #endif #include "catalog/pg_class_d.h" +#include "catalog/pg_largeobject_metadata_d.h" +#include "catalog/pg_shdepend_d.h" #include "common/string.h" #include "compress_io.h" #include "dumputils.h" @@ -2974,6 +2976,19 @@ _tocEntryRequired(TocEntry *te, teSection curSection, ArchiveHandle *AH) int res = REQ_SCHEMA | REQ_DATA; RestoreOptions *ropt = AH->public.ropt; + /* + * For binary upgrade mode, dump pg_largeobject_metadata and the + * associated pg_shdepend rows. This is faster to restore than the + * equivalent set of large object commands. We can only do this for + * upgrades from v12 and newer; in older versions, pg_largeobject_metadata + * was created WITH OIDS, so the OID column is hidden and won't be dumped. + */ + if (ropt->binary_upgrade && AH->public.remoteVersion >= 120000 && + strcmp(te->desc, "TABLE DATA") == 0 && + (te->catalogId.oid == LargeObjectMetadataRelationId || + te->catalogId.oid == SharedDependRelationId)) + return REQ_DATA; + /* These items are treated specially */ if (strcmp(te->desc, "ENCODING") == 0 || strcmp(te->desc, "STDSTRINGS") == 0 || diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index c6226175528bb..604fc109416c9 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -49,8 +49,10 @@ #include "catalog/pg_class_d.h" #include "catalog/pg_default_acl_d.h" #include "catalog/pg_largeobject_d.h" +#include "catalog/pg_largeobject_metadata_d.h" #include "catalog/pg_proc_d.h" #include "catalog/pg_publication_d.h" +#include "catalog/pg_shdepend_d.h" #include "catalog/pg_subscription_d.h" #include "catalog/pg_type_d.h" #include "common/connect.h" @@ -209,6 +211,12 @@ static int nbinaryUpgradeClassOids = 0; static SequenceItem *sequences = NULL; static int nsequences = 0; +/* + * For binary upgrade, the dump ID of pg_largeobject_metadata is saved for use + * as a dependency for pg_shdepend and any large object comments/seclabels. + */ +static DumpId lo_metadata_dumpId; + /* Maximum number of relations to fetch in a fetchAttributeStats() call. */ #define MAX_ATTR_STATS_RELS 64 @@ -1085,6 +1093,36 @@ main(int argc, char **argv) if (!dopt.dumpData && dopt.sequence_data) getTableData(&dopt, tblinfo, numTables, RELKIND_SEQUENCE); + /* + * For binary upgrade mode, dump pg_largeobject_metadata and the + * associated pg_shdepend rows. This is faster to restore than the + * equivalent set of large object commands. We can only do this for + * upgrades from v12 and newer; in older versions, pg_largeobject_metadata + * was created WITH OIDS, so the OID column is hidden and won't be dumped. + */ + if (dopt.binary_upgrade && fout->remoteVersion >= 120000) + { + TableInfo *lo_metadata = findTableByOid(LargeObjectMetadataRelationId); + TableInfo *shdepend = findTableByOid(SharedDependRelationId); + + makeTableDataInfo(&dopt, lo_metadata); + makeTableDataInfo(&dopt, shdepend); + + /* + * Save pg_largeobject_metadata's dump ID for use as a dependency for + * pg_shdepend and any large object comments/seclabels. + */ + lo_metadata_dumpId = lo_metadata->dataObj->dobj.dumpId; + addObjectDependency(&shdepend->dataObj->dobj, lo_metadata_dumpId); + + /* + * Only dump large object shdepend rows for this database. + */ + shdepend->dataObj->filtercond = "WHERE classid = 'pg_largeobject'::regclass " + "AND dbid = (SELECT oid FROM pg_database " + " WHERE datname = current_database())"; + } + /* * In binary-upgrade mode, we do not have to worry about the actual LO * data or the associated metadata that resides in the pg_largeobject and @@ -3924,10 +3962,37 @@ getLOs(Archive *fout) * as it will be copied by pg_upgrade, which simply copies the * pg_largeobject table. We *do* however dump out anything but the * data, as pg_upgrade copies just pg_largeobject, but not - * pg_largeobject_metadata, after the dump is restored. + * pg_largeobject_metadata, after the dump is restored. In versions + * before v12, this is done via proper large object commands. In + * newer versions, we dump the content of pg_largeobject_metadata and + * any associated pg_shdepend rows, which is faster to restore. (On + * From da71717f0a7cf905a6a31ffd34552554922a0374 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Herrera?= Date: Mon, 21 Jul 2025 11:34:10 +0200 Subject: [PATCH 344/602] pg_dump: include comments on not-null constraints on domains, too MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit e5da0fe3c22b introduced catalog entries for not-null constraints on domains; but because commit b0e96f311985 (the original work for catalogued not-null constraints on tables) forgot to teach pg_dump to process the comments for them, this one also forgot. Add that now. We also need to teach repairDependencyLoop() about the new type of constraints being possible for domains. Backpatch-through: 17 Co-authored-by: jian he Co-authored-by: Álvaro Herrera Reported-by: jian he Discussion: https://postgr.es/m/CACJufxF-0bqVR=j4jonS6N2Ka6hHUpFyu3_3TWKNhOW_4yFSSg@mail.gmail.com --- src/bin/pg_dump/pg_dump.c | 160 +++++++++++++++++++++++-------- src/bin/pg_dump/pg_dump.h | 4 +- src/bin/pg_dump/pg_dump_sort.c | 15 +-- src/bin/pg_dump/t/002_pg_dump.pl | 30 +++++- 4 files changed, 160 insertions(+), 49 deletions(-) diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 604fc109416c9..ede10e5291efc 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -47,6 +47,7 @@ #include "catalog/pg_authid_d.h" #include "catalog/pg_cast_d.h" #include "catalog/pg_class_d.h" +#include "catalog/pg_constraint_d.h" #include "catalog/pg_default_acl_d.h" #include "catalog/pg_largeobject_d.h" #include "catalog/pg_largeobject_metadata_d.h" @@ -6187,6 +6188,7 @@ getTypes(Archive *fout) */ tyinfo[i].nDomChecks = 0; tyinfo[i].domChecks = NULL; + tyinfo[i].notnull = NULL; if ((tyinfo[i].dobj.dump & DUMP_COMPONENT_DEFINITION) && tyinfo[i].typtype == TYPTYPE_DOMAIN) getDomainConstraints(fout, &(tyinfo[i])); @@ -8312,27 +8314,33 @@ addConstrChildIdxDeps(DumpableObject *dobj, const IndxInfo *refidx) static void getDomainConstraints(Archive *fout, TypeInfo *tyinfo) { - int i; ConstraintInfo *constrinfo; PQExpBuffer query = createPQExpBuffer(); PGresult *res; int i_tableoid, i_oid, i_conname, - i_consrc; + i_consrc, + i_convalidated, + i_contype; int ntups; if (!fout->is_prepared[PREPQUERY_GETDOMAINCONSTRAINTS]) { - /* Set up query for constraint-specific details */ - appendPQExpBufferStr(query, - "PREPARE getDomainConstraints(pg_catalog.oid) AS\n" - "SELECT tableoid, oid, conname, " - "pg_catalog.pg_get_constraintdef(oid) AS consrc, " - "convalidated " - "FROM pg_catalog.pg_constraint " - "WHERE contypid = $1 AND contype = 'c' " - "ORDER BY conname"); + /* + * Set up query for constraint-specific details. For servers 17 and + * up, domains have constraints of type 'n' as well as 'c', otherwise + * just the latter. + */ + appendPQExpBuffer(query, + "PREPARE getDomainConstraints(pg_catalog.oid) AS\n" + "SELECT tableoid, oid, conname, " + "pg_catalog.pg_get_constraintdef(oid) AS consrc, " + "convalidated, contype " + "FROM pg_catalog.pg_constraint " + "WHERE contypid = $1 AND contype IN (%s) " + "ORDER BY conname", + fout->remoteVersion < 170000 ? "'c'" : "'c', 'n'"); ExecuteSqlStatement(fout, query->data); @@ -8351,33 +8359,50 @@ getDomainConstraints(Archive *fout, TypeInfo *tyinfo) i_oid = PQfnumber(res, "oid"); i_conname = PQfnumber(res, "conname"); i_consrc = PQfnumber(res, "consrc"); + i_convalidated = PQfnumber(res, "convalidated"); + i_contype = PQfnumber(res, "contype"); constrinfo = (ConstraintInfo *) pg_malloc(ntups * sizeof(ConstraintInfo)); - - tyinfo->nDomChecks = ntups; tyinfo->domChecks = constrinfo; - for (i = 0; i < ntups; i++) + /* 'i' tracks result rows; 'j' counts CHECK constraints */ + for (int i = 0, j = 0; i < ntups; i++) { - bool validated = PQgetvalue(res, i, 4)[0] == 't'; - - constrinfo[i].dobj.objType = DO_CONSTRAINT; - constrinfo[i].dobj.catId.tableoid = atooid(PQgetvalue(res, i, i_tableoid)); - constrinfo[i].dobj.catId.oid = atooid(PQgetvalue(res, i, i_oid)); - AssignDumpId(&constrinfo[i].dobj); - constrinfo[i].dobj.name = pg_strdup(PQgetvalue(res, i, i_conname)); - constrinfo[i].dobj.namespace = tyinfo->dobj.namespace; - constrinfo[i].contable = NULL; - constrinfo[i].condomain = tyinfo; - constrinfo[i].contype = 'c'; - constrinfo[i].condef = pg_strdup(PQgetvalue(res, i, i_consrc)); - constrinfo[i].confrelid = InvalidOid; - constrinfo[i].conindex = 0; - constrinfo[i].condeferrable = false; - constrinfo[i].condeferred = false; - constrinfo[i].conislocal = true; - - constrinfo[i].separate = !validated; + bool validated = PQgetvalue(res, i, i_convalidated)[0] == 't'; + char contype = (PQgetvalue(res, i, i_contype))[0]; + ConstraintInfo *constraint; + + if (contype == CONSTRAINT_CHECK) + { + constraint = &constrinfo[j++]; + tyinfo->nDomChecks++; + } + else + { + Assert(contype == CONSTRAINT_NOTNULL); + Assert(tyinfo->notnull == NULL); + /* use last item in array for the not-null constraint */ + tyinfo->notnull = &(constrinfo[ntups - 1]); + constraint = tyinfo->notnull; + } + + constraint->dobj.objType = DO_CONSTRAINT; + constraint->dobj.catId.tableoid = atooid(PQgetvalue(res, i, i_tableoid)); + constraint->dobj.catId.oid = atooid(PQgetvalue(res, i, i_oid)); + AssignDumpId(&(constraint->dobj)); + constraint->dobj.name = pg_strdup(PQgetvalue(res, i, i_conname)); + constraint->dobj.namespace = tyinfo->dobj.namespace; + constraint->contable = NULL; + constraint->condomain = tyinfo; + constraint->contype = contype; + constraint->condef = pg_strdup(PQgetvalue(res, i, i_consrc)); + constraint->confrelid = InvalidOid; + constraint->conindex = 0; + constraint->condeferrable = false; + constraint->condeferred = false; + constraint->conislocal = true; + + constraint->separate = !validated; /* * Make the domain depend on the constraint, ensuring it won't be @@ -8386,8 +8411,7 @@ getDomainConstraints(Archive *fout, TypeInfo *tyinfo) * anyway, so this doesn't matter. */ if (validated) - addObjectDependency(&tyinfo->dobj, - constrinfo[i].dobj.dumpId); + addObjectDependency(&tyinfo->dobj, constraint->dobj.dumpId); } PQclear(res); @@ -12597,8 +12621,36 @@ dumpDomain(Archive *fout, const TypeInfo *tyinfo) appendPQExpBuffer(q, " COLLATE %s", fmtQualifiedDumpable(coll)); } + /* + * Print a not-null constraint if there's one. In servers older than 17 + * these don't have names, so just print it unadorned; in newer ones they + * do, but most of the time it's going to be the standard generated one, + * so omit the name in that case also. + */ if (typnotnull[0] == 't') - appendPQExpBufferStr(q, " NOT NULL"); + { + if (fout->remoteVersion < 170000 || tyinfo->notnull == NULL) + appendPQExpBufferStr(q, " NOT NULL"); + else + { + ConstraintInfo *notnull = tyinfo->notnull; + + if (!notnull->separate) + { + char *default_name; + + /* XXX should match ChooseConstraintName better */ + default_name = psprintf("%s_not_null", tyinfo->dobj.name); + + if (strcmp(default_name, notnull->dobj.name) == 0) + appendPQExpBufferStr(q, " NOT NULL"); + else + appendPQExpBuffer(q, " CONSTRAINT %s %s", + fmtId(notnull->dobj.name), notnull->condef); + free(default_name); + } + } + } if (typdefault != NULL) { @@ -12618,7 +12670,7 @@ dumpDomain(Archive *fout, const TypeInfo *tyinfo) { ConstraintInfo *domcheck = &(tyinfo->domChecks[i]); - if (!domcheck->separate) + if (!domcheck->separate && domcheck->contype == 'c') appendPQExpBuffer(q, "\n\tCONSTRAINT %s %s", fmtId(domcheck->dobj.name), domcheck->condef); } @@ -12682,6 +12734,25 @@ dumpDomain(Archive *fout, const TypeInfo *tyinfo) destroyPQExpBuffer(conprefix); } + /* + * And a comment on the not-null constraint, if there's one -- but only if + * the constraint itself was dumped here + */ + if (tyinfo->notnull != NULL && !tyinfo->notnull->separate) + { + PQExpBuffer conprefix = createPQExpBuffer(); + + appendPQExpBuffer(conprefix, "CONSTRAINT %s ON DOMAIN", + fmtId(tyinfo->notnull->dobj.name)); + + if (tyinfo->notnull->dobj.dump & DUMP_COMPONENT_COMMENT) + dumpComment(fout, conprefix->data, qtypname, + tyinfo->dobj.namespace->dobj.name, + tyinfo->rolname, + tyinfo->notnull->dobj.catId, 0, tyinfo->dobj.dumpId); + destroyPQExpBuffer(conprefix); + } + destroyPQExpBuffer(q); destroyPQExpBuffer(delq); destroyPQExpBuffer(query); @@ -18543,14 +18614,23 @@ dumpConstraint(Archive *fout, const ConstraintInfo *coninfo) .dropStmt = delq->data)); } } - else if (coninfo->contype == 'c' && tbinfo == NULL) + else if (tbinfo == NULL) { - /* CHECK constraint on a domain */ + /* CHECK, NOT NULL constraint on a domain */ TypeInfo *tyinfo = coninfo->condomain; + Assert(coninfo->contype == 'c' || coninfo->contype == 'n'); + /* Ignore if not to be dumped separately */ if (coninfo->separate) { + const char *keyword; + + if (coninfo->contype == 'c') + keyword = "CHECK CONSTRAINT"; + else + keyword = "CONSTRAINT"; + appendPQExpBuffer(q, "ALTER DOMAIN %s\n", fmtQualifiedDumpable(tyinfo)); appendPQExpBuffer(q, " ADD CONSTRAINT %s %s;\n", @@ -18569,7 +18649,7 @@ dumpConstraint(Archive *fout, const ConstraintInfo *coninfo) ARCHIVE_OPTS(.tag = tag, .namespace = tyinfo->dobj.namespace->dobj.name, .owner = tyinfo->rolname, - .description = "CHECK CONSTRAINT", + .description = keyword, .section = SECTION_POST_DATA, .createStmt = q->data, .dropStmt = delq->data)); diff --git a/src/bin/pg_dump/pg_dump.h b/src/bin/pg_dump/pg_dump.h index 39eef1d6617f4..2370c98d192a6 100644 --- a/src/bin/pg_dump/pg_dump.h +++ b/src/bin/pg_dump/pg_dump.h @@ -222,7 +222,9 @@ typedef struct _typeInfo bool isDefined; /* true if typisdefined */ /* If needed, we'll create a "shell type" entry for it; link that here: */ struct _shellTypeInfo *shellType; /* shell-type entry, or NULL */ - /* If it's a domain, we store links to its constraints here: */ + /* If it's a domain, its not-null constraint is here: */ + struct _constraintInfo *notnull; + /* If it's a domain, we store links to its CHECK constraints here: */ int nDomChecks; struct _constraintInfo *domChecks; } TypeInfo; diff --git a/src/bin/pg_dump/pg_dump_sort.c b/src/bin/pg_dump/pg_dump_sort.c index 538e7dcb49357..f99a0797ea7fb 100644 --- a/src/bin/pg_dump/pg_dump_sort.c +++ b/src/bin/pg_dump/pg_dump_sort.c @@ -907,7 +907,7 @@ repairTableAttrDefMultiLoop(DumpableObject *tableobj, } /* - * CHECK constraints on domains work just like those on tables ... + * CHECK, NOT NULL constraints on domains work just like those on tables ... */ static void repairDomainConstraintLoop(DumpableObject *domainobj, @@ -1173,11 +1173,12 @@ repairDependencyLoop(DumpableObject **loop, } } - /* Domain and CHECK constraint */ + /* Domain and CHECK or NOT NULL constraint */ if (nLoop == 2 && loop[0]->objType == DO_TYPE && loop[1]->objType == DO_CONSTRAINT && - ((ConstraintInfo *) loop[1])->contype == 'c' && + (((ConstraintInfo *) loop[1])->contype == 'c' || + ((ConstraintInfo *) loop[1])->contype == 'n') && ((ConstraintInfo *) loop[1])->condomain == (TypeInfo *) loop[0]) { repairDomainConstraintLoop(loop[0], loop[1]); @@ -1186,14 +1187,15 @@ repairDependencyLoop(DumpableObject **loop, if (nLoop == 2 && loop[1]->objType == DO_TYPE && loop[0]->objType == DO_CONSTRAINT && - ((ConstraintInfo *) loop[0])->contype == 'c' && + (((ConstraintInfo *) loop[0])->contype == 'c' || + ((ConstraintInfo *) loop[0])->contype == 'n') && ((ConstraintInfo *) loop[0])->condomain == (TypeInfo *) loop[1]) { repairDomainConstraintLoop(loop[1], loop[0]); return; } - /* Indirect loop involving domain and CHECK constraint */ + /* Indirect loop involving domain and CHECK or NOT NULL constraint */ if (nLoop > 2) { for (i = 0; i < nLoop; i++) @@ -1203,7 +1205,8 @@ repairDependencyLoop(DumpableObject **loop, for (j = 0; j < nLoop; j++) { if (loop[j]->objType == DO_CONSTRAINT && - ((ConstraintInfo *) loop[j])->contype == 'c' && + (((ConstraintInfo *) loop[j])->contype == 'c' || + ((ConstraintInfo *) loop[j])->contype == 'n') && ((ConstraintInfo *) loop[j])->condomain == (TypeInfo *) loop[i]) { repairDomainConstraintMultiLoop(loop[i], loop[j]); diff --git a/src/bin/pg_dump/t/002_pg_dump.pl b/src/bin/pg_dump/t/002_pg_dump.pl index d8330e2bd17d3..6c7ec80e271ce 100644 --- a/src/bin/pg_dump/t/002_pg_dump.pl +++ b/src/bin/pg_dump/t/002_pg_dump.pl @@ -2379,17 +2379,19 @@ create_sql => 'CREATE DOMAIN dump_test.us_postal_code AS TEXT COLLATE "C" DEFAULT \'10014\' + CONSTRAINT nn NOT NULL CHECK(VALUE ~ \'^\d{5}$\' OR VALUE ~ \'^\d{5}-\d{4}$\'); + COMMENT ON CONSTRAINT nn + ON DOMAIN dump_test.us_postal_code IS \'not null\'; COMMENT ON CONSTRAINT us_postal_code_check ON DOMAIN dump_test.us_postal_code IS \'check it\';', regexp => qr/^ - \QCREATE DOMAIN dump_test.us_postal_code AS text COLLATE pg_catalog."C" DEFAULT '10014'::text\E\n\s+ + \QCREATE DOMAIN dump_test.us_postal_code AS text COLLATE pg_catalog."C" CONSTRAINT nn NOT NULL DEFAULT '10014'::text\E\n\s+ \QCONSTRAINT us_postal_code_check CHECK \E \Q(((VALUE ~ '^\d{5}\E \$\Q'::text) OR (VALUE ~ '^\d{5}-\d{4}\E\$ \Q'::text)));\E(.|\n)* - \QCOMMENT ON CONSTRAINT us_postal_code_check ON DOMAIN dump_test.us_postal_code IS 'check it';\E /xm, like => { %full_runs, %dump_test_schema_runs, section_pre_data => 1, }, @@ -2399,6 +2401,30 @@ }, }, + 'COMMENT ON CONSTRAINT ON DOMAIN (1)' => { + regexp => qr/^ + \QCOMMENT ON CONSTRAINT nn ON DOMAIN dump_test.us_postal_code IS 'not null';\E + /xm, + like => + { %full_runs, %dump_test_schema_runs, section_pre_data => 1, }, + unlike => { + exclude_dump_test_schema => 1, + only_dump_measurement => 1, + }, + }, + + 'COMMENT ON CONSTRAINT ON DOMAIN (2)' => { + regexp => qr/^ + \QCOMMENT ON CONSTRAINT us_postal_code_check ON DOMAIN dump_test.us_postal_code IS 'check it';\E + /xm, + like => + { %full_runs, %dump_test_schema_runs, section_pre_data => 1, }, + unlike => { + exclude_dump_test_schema => 1, + only_dump_measurement => 1, + }, + }, + 'CREATE FUNCTION dump_test.pltestlang_call_handler' => { create_order => 17, create_sql => 'CREATE FUNCTION dump_test.pltestlang_call_handler() From 0810fbb02dbe70b8a7a7bcc51580827b8bbddbdc Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Mon, 21 Jul 2025 15:07:34 +0300 Subject: [PATCH 345/602] Update comment for ReplicationSlot.last_saved_restart_lsn Document that restart_lsn can go backwards and explain why this could happen. Discussion: https://postgr.es/m/1d12d2-67235980-35-19a406a0%4063439497 Discussion: https://postgr.es/m/CAPpHfdvuyMrUg0Vs5jPfwLOo1M9B-GP5j_My9URnBX0B%3DnrHKw%40mail.gmail.com Author: Hayato Kuroda Co-authored-by: Amit Kapila Reviewed-by: Vignesh C Reviewed-by: Amit Kapila Reviewed-by: Alexander Korotkov --- src/include/replication/slot.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/include/replication/slot.h b/src/include/replication/slot.h index 76aeeb92242e9..19b4e8b6a030e 100644 --- a/src/include/replication/slot.h +++ b/src/include/replication/slot.h @@ -220,6 +220,25 @@ typedef struct ReplicationSlot * Latest restart_lsn that has been flushed to disk. For persistent slots * the flushed LSN should be taken into account when calculating the * oldest LSN for WAL segments removal. + * + * Do not assume that restart_lsn will always move forward, i.e., that the + * previously flushed restart_lsn is always behind data.restart_lsn. In + * streaming replication using a physical slot, the restart_lsn is updated + * based on the flushed WAL position reported by the walreceiver. + * + * This replication mode allows duplicate WAL records to be received and + * overwritten. If the walreceiver receives older WAL records and then + * reports them as flushed to the walsender, the restart_lsn may appear to + * move backward. + * + * This typically occurs at the beginning of replication. One reason is + * that streaming replication starts at the beginning of a segment, so, if + * restart_lsn is in the middle of a segment, it will be updated to an + * earlier LSN, see RequestXLogStreaming. Another reason is that the + * walreceiver chooses its startpoint based on the replayed LSN, so, if + * some records have been received but not yet applied, they will be + * received again and leads to updating the restart_lsn to an earlier + * position. */ XLogRecPtr last_saved_restart_lsn; From e0d05295268e3811e6743403cb779f21d1662426 Mon Sep 17 00:00:00 2001 From: Richard Guo Date: Tue, 22 Jul 2025 11:19:17 +0900 Subject: [PATCH 346/602] Expand virtual generated columns before sublink pull-up Currently, we expand virtual generated columns after we have pulled up any SubLinks within the query's quals. This ensures that the virtual generated column references within SubLinks that should be transformed into joins are correctly expanded. This approach works well and has posed no issues. In an upcoming patch, we plan to centralize the collection of catalog information needed early in the planner. This will help avoid repeated table_open/table_close calls for relations in the rangetable. Since this information is required during sublink pull-up, we are moving the expansion of virtual generated columns to occur beforehand. To achieve this, if any EXISTS SubLinks can be pulled up, their rangetables are processed just before pulling them up. Author: Richard Guo Reviewed-by: Robert Haas Reviewed-by: Tom Lane Discussion: https://postgr.es/m/CAMbWs4-bFJ1At4btk5wqbezdu8PLtQ3zv-aiaY3ry9Ymm=jgFQ@mail.gmail.com --- src/backend/optimizer/plan/planner.c | 17 ++++++------ src/backend/optimizer/plan/subselect.c | 27 +++++++++++++++++++ src/backend/optimizer/prep/prepjointree.c | 20 ++++++-------- src/include/optimizer/prep.h | 2 +- .../regress/expected/generated_virtual.out | 22 +++++++++++++++ src/test/regress/sql/generated_virtual.sql | 9 +++++++ 6 files changed, 76 insertions(+), 21 deletions(-) diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 549aedcfa991a..fbbc42f160081 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -720,6 +720,15 @@ subquery_planner(PlannerGlobal *glob, Query *parse, PlannerInfo *parent_root, */ transform_MERGE_to_join(parse); + /* + * Scan the rangetable for relations with virtual generated columns, and + * replace all Var nodes in the query that reference these columns with + * the generation expressions. Note that this step does not descend into + * sublinks and subqueries; if we pull up any sublinks or subqueries + * below, their rangetables are processed just before pulling them up. + */ + parse = root->parse = expand_virtual_generated_columns(root); + /* * If the FROM clause is empty, replace it with a dummy RTE_RESULT RTE, so * that we don't need so many special cases to deal with that situation. @@ -743,14 +752,6 @@ subquery_planner(PlannerGlobal *glob, Query *parse, PlannerInfo *parent_root, */ preprocess_function_rtes(root); - /* - * Scan the rangetable for relations with virtual generated columns, and - * replace all Var nodes in the query that reference these columns with - * the generation expressions. Recursion issues here are handled in the - * same way as for SubLinks. - */ - parse = root->parse = expand_virtual_generated_columns(root); - /* * Check to see if any subqueries in the jointree can be merged into this * query. diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c index e7cb3fede6658..575303b294a99 100644 --- a/src/backend/optimizer/plan/subselect.c +++ b/src/backend/optimizer/plan/subselect.c @@ -1454,6 +1454,7 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink, Query *parse = root->parse; Query *subselect = (Query *) sublink->subselect; Node *whereClause; + PlannerInfo subroot; int rtoffset; int varno; Relids clause_varnos; @@ -1515,6 +1516,32 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink, if (contain_volatile_functions(whereClause)) return NULL; + /* + * Scan the rangetable for relations with virtual generated columns, and + * replace all Var nodes in the subquery that reference these columns with + * the generation expressions. + * + * Note: we construct up an entirely dummy PlannerInfo for use here. This + * is fine because only the "glob" and "parse" links will be used in this + * case. + * + * Note: we temporarily assign back the WHERE clause so that any virtual + * generated column references within it can be expanded. It should be + * separated out again afterward. + */ + MemSet(&subroot, 0, sizeof(subroot)); + subroot.type = T_PlannerInfo; + subroot.glob = root->glob; + subroot.parse = subselect; + subselect->jointree->quals = whereClause; + subselect = expand_virtual_generated_columns(&subroot); + + /* + * Now separate out the WHERE clause again. + */ + whereClause = subselect->jointree->quals; + subselect->jointree->quals = NULL; + /* * The subquery must have a nonempty jointree, but we can make it so. */ diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c index 87dc6f56b576f..8140d22de703c 100644 --- a/src/backend/optimizer/prep/prepjointree.c +++ b/src/backend/optimizer/prep/prepjointree.c @@ -4,10 +4,10 @@ * Planner preprocessing for subqueries and join tree manipulation. * * NOTE: the intended sequence for invoking these operations is + * expand_virtual_generated_columns * replace_empty_jointree * pull_up_sublinks * preprocess_function_rtes - * expand_virtual_generated_columns * pull_up_subqueries * flatten_simple_union_all * do expression preprocessing (including flattening JOIN alias vars) @@ -958,10 +958,6 @@ preprocess_function_rtes(PlannerInfo *root) * generation expressions. Note that we do not descend into subqueries; that * is taken care of when the subqueries are planned. * - * This has to be done after we have pulled up any SubLinks within the query's - * quals; otherwise any virtual generated column references within the SubLinks - * that should be transformed into joins wouldn't get expanded. - * * Returns a modified copy of the query tree, if any relations with virtual * generated columns are present. */ @@ -1333,6 +1329,13 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte, /* No CTEs to worry about */ Assert(subquery->cteList == NIL); + /* + * Scan the rangetable for relations with virtual generated columns, and + * replace all Var nodes in the subquery that reference these columns with + * the generation expressions. + */ + subquery = subroot->parse = expand_virtual_generated_columns(subroot); + /* * If the FROM clause is empty, replace it with a dummy RTE_RESULT RTE, so * that we don't need so many special cases to deal with that situation. @@ -1352,13 +1355,6 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte, */ preprocess_function_rtes(subroot); - /* - * Scan the rangetable for relations with virtual generated columns, and - * replace all Var nodes in the query that reference these columns with - * the generation expressions. - */ - subquery = subroot->parse = expand_virtual_generated_columns(subroot); - /* * Recursively pull up the subquery's subqueries, so that * pull_up_subqueries' processing is complete for its jointree and diff --git a/src/include/optimizer/prep.h b/src/include/optimizer/prep.h index df56202777c7f..ceb731bcf5ef6 100644 --- a/src/include/optimizer/prep.h +++ b/src/include/optimizer/prep.h @@ -22,10 +22,10 @@ * prototypes for prepjointree.c */ extern void transform_MERGE_to_join(Query *parse); +extern Query *expand_virtual_generated_columns(PlannerInfo *root); extern void replace_empty_jointree(Query *parse); extern void pull_up_sublinks(PlannerInfo *root); extern void preprocess_function_rtes(PlannerInfo *root); -extern Query *expand_virtual_generated_columns(PlannerInfo *root); extern void pull_up_subqueries(PlannerInfo *root); extern void flatten_simple_union_all(PlannerInfo *root); extern void reduce_outer_joins(PlannerInfo *root); diff --git a/src/test/regress/expected/generated_virtual.out b/src/test/regress/expected/generated_virtual.out index 3b40e15a95ad0..a635cb1e7760b 100644 --- a/src/test/regress/expected/generated_virtual.out +++ b/src/test/regress/expected/generated_virtual.out @@ -1613,4 +1613,26 @@ select * from gtest32 t group by grouping sets (a, b, c, d, e) having c = 20; -- Ensure that the virtual generated columns in ALTER COLUMN TYPE USING expression are expanded alter table gtest32 alter column e type bigint using b; +-- Ensure that virtual generated column references within SubLinks that should +-- be transformed into joins can get expanded +explain (costs off) +select 1 from gtest32 t1 where exists + (select 1 from gtest32 t2 where t1.a > t2.a and t2.b = 2); + QUERY PLAN +------------------------------------- + Nested Loop Semi Join + Join Filter: (t1.a > t2.a) + -> Seq Scan on gtest32 t1 + -> Materialize + -> Seq Scan on gtest32 t2 + Filter: ((a * 2) = 2) +(6 rows) + +select 1 from gtest32 t1 where exists + (select 1 from gtest32 t2 where t1.a > t2.a and t2.b = 2); + ?column? +---------- + 1 +(1 row) + drop table gtest32; diff --git a/src/test/regress/sql/generated_virtual.sql b/src/test/regress/sql/generated_virtual.sql index e2b31853e0132..ba19bc4c701e0 100644 --- a/src/test/regress/sql/generated_virtual.sql +++ b/src/test/regress/sql/generated_virtual.sql @@ -858,4 +858,13 @@ select * from gtest32 t group by grouping sets (a, b, c, d, e) having c = 20; -- Ensure that the virtual generated columns in ALTER COLUMN TYPE USING expression are expanded alter table gtest32 alter column e type bigint using b; +-- Ensure that virtual generated column references within SubLinks that should +-- be transformed into joins can get expanded +explain (costs off) +select 1 from gtest32 t1 where exists + (select 1 from gtest32 t2 where t1.a > t2.a and t2.b = 2); + +select 1 from gtest32 t1 where exists + (select 1 from gtest32 t2 where t1.a > t2.a and t2.b = 2); + drop table gtest32; From 904f6a593a06649f77597ab9a72ef97c21e39a93 Mon Sep 17 00:00:00 2001 From: Richard Guo Date: Tue, 22 Jul 2025 11:20:40 +0900 Subject: [PATCH 347/602] Centralize collection of catalog info needed early in the planner There are several pieces of catalog information that need to be retrieved for a relation during the early stage of planning. These include relhassubclass, which is used to clear the inh flag if the relation has no children, as well as a column's attgenerated and default value, which are needed to expand virtual generated columns. More such information may be required in the future. Currently, these pieces of catalog data are collected in multiple places, resulting in repeated table_open/table_close calls for each relation in the rangetable. This patch centralizes the collection of all required early-stage catalog information into a single loop over the rangetable, allowing each relation to be opened and closed only once. Author: Richard Guo Reviewed-by: Robert Haas Reviewed-by: Tom Lane Discussion: https://postgr.es/m/CAMbWs4-bFJ1At4btk5wqbezdu8PLtQ3zv-aiaY3ry9Ymm=jgFQ@mail.gmail.com --- src/backend/optimizer/plan/planner.c | 31 +-- src/backend/optimizer/plan/subselect.c | 9 +- src/backend/optimizer/prep/prepjointree.c | 299 +++++++++++++--------- src/include/optimizer/prep.h | 2 +- 4 files changed, 190 insertions(+), 151 deletions(-) diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index fbbc42f160081..fc13d921d0c68 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -721,13 +721,15 @@ subquery_planner(PlannerGlobal *glob, Query *parse, PlannerInfo *parent_root, transform_MERGE_to_join(parse); /* - * Scan the rangetable for relations with virtual generated columns, and - * replace all Var nodes in the query that reference these columns with - * the generation expressions. Note that this step does not descend into - * sublinks and subqueries; if we pull up any sublinks or subqueries - * below, their rangetables are processed just before pulling them up. + * Scan the rangetable for relation RTEs and retrieve the necessary + * catalog information for each relation. Using this information, clear + * the inh flag for any relation that has no children, and expand virtual + * generated columns for any relation that contains them. Note that this + * step does not descend into sublinks and subqueries; if we pull up any + * sublinks or subqueries below, their relation RTEs are processed just + * before pulling them up. */ - parse = root->parse = expand_virtual_generated_columns(root); + parse = root->parse = preprocess_relation_rtes(root); /* * If the FROM clause is empty, replace it with a dummy RTE_RESULT RTE, so @@ -788,23 +790,6 @@ subquery_planner(PlannerGlobal *glob, Query *parse, PlannerInfo *parent_root, switch (rte->rtekind) { - case RTE_RELATION: - if (rte->inh) - { - /* - * Check to see if the relation actually has any children; - * if not, clear the inh flag so we can treat it as a - * plain base relation. - * - * Note: this could give a false-positive result, if the - * rel once had children but no longer does. We used to - * be able to clear rte->inh later on when we discovered - * that, but no more; we have to handle such cases as - * full-fledged inheritance. - */ - rte->inh = has_subclass(rte->relid); - } - break; case RTE_JOIN: root->hasJoinRTEs = true; if (IS_OUTER_JOIN(rte->jointype)) diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c index 575303b294a99..4bdca59df64bf 100644 --- a/src/backend/optimizer/plan/subselect.c +++ b/src/backend/optimizer/plan/subselect.c @@ -1517,9 +1517,10 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink, return NULL; /* - * Scan the rangetable for relations with virtual generated columns, and - * replace all Var nodes in the subquery that reference these columns with - * the generation expressions. + * Scan the rangetable for relation RTEs and retrieve the necessary + * catalog information for each relation. Using this information, clear + * the inh flag for any relation that has no children, and expand virtual + * generated columns for any relation that contains them. * * Note: we construct up an entirely dummy PlannerInfo for use here. This * is fine because only the "glob" and "parse" links will be used in this @@ -1534,7 +1535,7 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink, subroot.glob = root->glob; subroot.parse = subselect; subselect->jointree->quals = whereClause; - subselect = expand_virtual_generated_columns(&subroot); + subselect = preprocess_relation_rtes(&subroot); /* * Now separate out the WHERE clause again. diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c index 8140d22de703c..4b38851bd42b1 100644 --- a/src/backend/optimizer/prep/prepjointree.c +++ b/src/backend/optimizer/prep/prepjointree.c @@ -4,7 +4,7 @@ * Planner preprocessing for subqueries and join tree manipulation. * * NOTE: the intended sequence for invoking these operations is - * expand_virtual_generated_columns + * preprocess_relation_rtes * replace_empty_jointree * pull_up_sublinks * preprocess_function_rtes @@ -102,6 +102,9 @@ typedef struct reduce_outer_joins_partial_state Relids unreduced_side; /* relids in its still-nullable side */ } reduce_outer_joins_partial_state; +static Query *expand_virtual_generated_columns(PlannerInfo *root, Query *parse, + RangeTblEntry *rte, int rt_index, + Relation relation); static Node *pull_up_sublinks_jointree_recurse(PlannerInfo *root, Node *jtnode, Relids *relids); static Node *pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node, @@ -392,6 +395,173 @@ transform_MERGE_to_join(Query *parse) parse->mergeJoinCondition = NULL; /* join condition not needed */ } +/* + * preprocess_relation_rtes + * Do the preprocessing work for any relation RTEs in the FROM clause. + * + * This scans the rangetable for relation RTEs and retrieves the necessary + * catalog information for each relation. Using this information, it clears + * the inh flag for any relation that has no children, and expands virtual + * generated columns for any relation that contains them. + * + * Note that expanding virtual generated columns may cause the query tree to + * have new copies of rangetable entries. Therefore, we have to use list_nth + * instead of foreach when iterating over the query's rangetable. + * + * Returns a modified copy of the query tree, if any relations with virtual + * generated columns are present. + */ +Query * +preprocess_relation_rtes(PlannerInfo *root) +{ + Query *parse = root->parse; + int rtable_size; + int rt_index; + + rtable_size = list_length(parse->rtable); + + for (rt_index = 0; rt_index < rtable_size; rt_index++) + { + RangeTblEntry *rte = rt_fetch(rt_index + 1, parse->rtable); + Relation relation; + + /* We only care about relation RTEs. */ + if (rte->rtekind != RTE_RELATION) + continue; + + /* + * We need not lock the relation since it was already locked by the + * rewriter. + */ + relation = table_open(rte->relid, NoLock); + + /* + * Check to see if the relation actually has any children; if not, + * clear the inh flag so we can treat it as a plain base relation. + * + * Note: this could give a false-positive result, if the rel once had + * children but no longer does. We used to be able to clear rte->inh + * later on when we discovered that, but no more; we have to handle + * such cases as full-fledged inheritance. + */ + if (rte->inh) + rte->inh = relation->rd_rel->relhassubclass; + + /* + * Check to see if the relation has any virtual generated columns; if + * so, replace all Var nodes in the query that reference these columns + * with the generation expressions. + */ + parse = expand_virtual_generated_columns(root, parse, + rte, rt_index + 1, + relation); + + table_close(relation, NoLock); + } + + return parse; +} + +/* + * expand_virtual_generated_columns + * Expand virtual generated columns for the given relation. + * + * This checks whether the given relation has any virtual generated columns, + * and if so, replaces all Var nodes in the query that reference those columns + * with their generation expressions. + * + * Returns a modified copy of the query tree if the relation contains virtual + * generated columns. + */ +static Query * +expand_virtual_generated_columns(PlannerInfo *root, Query *parse, + RangeTblEntry *rte, int rt_index, + Relation relation) +{ + TupleDesc tupdesc; + + /* Only normal relations can have virtual generated columns */ + Assert(rte->rtekind == RTE_RELATION); + + tupdesc = RelationGetDescr(relation); + if (tupdesc->constr && tupdesc->constr->has_generated_virtual) + { + List *tlist = NIL; + pullup_replace_vars_context rvcontext; + + for (int i = 0; i < tupdesc->natts; i++) + { + Form_pg_attribute attr = TupleDescAttr(tupdesc, i); + TargetEntry *tle; + + if (attr->attgenerated == ATTRIBUTE_GENERATED_VIRTUAL) + { + Node *defexpr; + + defexpr = build_generation_expression(relation, i + 1); + ChangeVarNodes(defexpr, 1, rt_index, 0); + + tle = makeTargetEntry((Expr *) defexpr, i + 1, 0, false); + tlist = lappend(tlist, tle); + } + else + { + Var *var; + + var = makeVar(rt_index, + i + 1, + attr->atttypid, + attr->atttypmod, + attr->attcollation, + 0); + + tle = makeTargetEntry((Expr *) var, i + 1, 0, false); + tlist = lappend(tlist, tle); + } + } + + Assert(list_length(tlist) > 0); + Assert(!rte->lateral); + + /* + * The relation's targetlist items are now in the appropriate form to + * insert into the query, except that we may need to wrap them in + * PlaceHolderVars. Set up required context data for + * pullup_replace_vars. + */ + rvcontext.root = root; + rvcontext.targetlist = tlist; + rvcontext.target_rte = rte; + rvcontext.result_relation = parse->resultRelation; + /* won't need these values */ + rvcontext.relids = NULL; + rvcontext.nullinfo = NULL; + /* pass NULL for outer_hasSubLinks */ + rvcontext.outer_hasSubLinks = NULL; + rvcontext.varno = rt_index; + /* this flag will be set below, if needed */ + rvcontext.wrap_option = REPLACE_WRAP_NONE; + /* initialize cache array with indexes 0 .. length(tlist) */ + rvcontext.rv_cache = palloc0((list_length(tlist) + 1) * + sizeof(Node *)); + + /* + * If the query uses grouping sets, we need a PlaceHolderVar for each + * expression of the relation's targetlist items. (See comments in + * pull_up_simple_subquery().) + */ + if (parse->groupingSets) + rvcontext.wrap_option = REPLACE_WRAP_ALL; + + /* + * Apply pullup variable replacement throughout the query tree. + */ + parse = (Query *) pullup_replace_vars((Node *) parse, &rvcontext); + } + + return parse; +} + /* * replace_empty_jointree * If the Query's jointree is empty, replace it with a dummy RTE_RESULT @@ -949,124 +1119,6 @@ preprocess_function_rtes(PlannerInfo *root) } } -/* - * expand_virtual_generated_columns - * Expand all virtual generated column references in a query. - * - * This scans the rangetable for relations with virtual generated columns, and - * replaces all Var nodes in the query that reference these columns with the - * generation expressions. Note that we do not descend into subqueries; that - * is taken care of when the subqueries are planned. - * - * Returns a modified copy of the query tree, if any relations with virtual - * generated columns are present. - */ -Query * -expand_virtual_generated_columns(PlannerInfo *root) -{ - Query *parse = root->parse; - int rt_index; - ListCell *lc; - - rt_index = 0; - foreach(lc, parse->rtable) - { - RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc); - Relation rel; - TupleDesc tupdesc; - - ++rt_index; - - /* - * Only normal relations can have virtual generated columns. - */ - if (rte->rtekind != RTE_RELATION) - continue; - - rel = table_open(rte->relid, NoLock); - - tupdesc = RelationGetDescr(rel); - if (tupdesc->constr && tupdesc->constr->has_generated_virtual) - { - List *tlist = NIL; - pullup_replace_vars_context rvcontext; - - for (int i = 0; i < tupdesc->natts; i++) - { - Form_pg_attribute attr = TupleDescAttr(tupdesc, i); - TargetEntry *tle; - - if (attr->attgenerated == ATTRIBUTE_GENERATED_VIRTUAL) - { - Node *defexpr; - - defexpr = build_generation_expression(rel, i + 1); - ChangeVarNodes(defexpr, 1, rt_index, 0); - - tle = makeTargetEntry((Expr *) defexpr, i + 1, 0, false); - tlist = lappend(tlist, tle); - } - else - { - Var *var; - - var = makeVar(rt_index, - i + 1, - attr->atttypid, - attr->atttypmod, - attr->attcollation, - 0); - - tle = makeTargetEntry((Expr *) var, i + 1, 0, false); - tlist = lappend(tlist, tle); - } - } - - Assert(list_length(tlist) > 0); - Assert(!rte->lateral); - - /* - * The relation's targetlist items are now in the appropriate form - * to insert into the query, except that we may need to wrap them - * in PlaceHolderVars. Set up required context data for - * pullup_replace_vars. - */ - rvcontext.root = root; - rvcontext.targetlist = tlist; - rvcontext.target_rte = rte; - rvcontext.result_relation = parse->resultRelation; - /* won't need these values */ - rvcontext.relids = NULL; - rvcontext.nullinfo = NULL; - /* pass NULL for outer_hasSubLinks */ - rvcontext.outer_hasSubLinks = NULL; - rvcontext.varno = rt_index; - /* this flag will be set below, if needed */ - rvcontext.wrap_option = REPLACE_WRAP_NONE; - /* initialize cache array with indexes 0 .. length(tlist) */ - rvcontext.rv_cache = palloc0((list_length(tlist) + 1) * - sizeof(Node *)); - - /* - * If the query uses grouping sets, we need a PlaceHolderVar for - * each expression of the relation's targetlist items. (See - * comments in pull_up_simple_subquery().) - */ - if (parse->groupingSets) - rvcontext.wrap_option = REPLACE_WRAP_ALL; - - /* - * Apply pullup variable replacement throughout the query tree. - */ - parse = (Query *) pullup_replace_vars((Node *) parse, &rvcontext); - } - - table_close(rel, NoLock); - } - - return parse; -} - /* * pull_up_subqueries * Look for subqueries in the rangetable that can be pulled up into @@ -1330,11 +1382,12 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte, Assert(subquery->cteList == NIL); /* - * Scan the rangetable for relations with virtual generated columns, and - * replace all Var nodes in the subquery that reference these columns with - * the generation expressions. + * Scan the rangetable for relation RTEs and retrieve the necessary + * catalog information for each relation. Using this information, clear + * the inh flag for any relation that has no children, and expand virtual + * generated columns for any relation that contains them. */ - subquery = subroot->parse = expand_virtual_generated_columns(subroot); + subquery = subroot->parse = preprocess_relation_rtes(subroot); /* * If the FROM clause is empty, replace it with a dummy RTE_RESULT RTE, so diff --git a/src/include/optimizer/prep.h b/src/include/optimizer/prep.h index ceb731bcf5ef6..4fbecdb446272 100644 --- a/src/include/optimizer/prep.h +++ b/src/include/optimizer/prep.h @@ -22,7 +22,7 @@ * prototypes for prepjointree.c */ extern void transform_MERGE_to_join(Query *parse); -extern Query *expand_virtual_generated_columns(PlannerInfo *root); +extern Query *preprocess_relation_rtes(PlannerInfo *root); extern void replace_empty_jointree(Query *parse); extern void pull_up_sublinks(PlannerInfo *root); extern void preprocess_function_rtes(PlannerInfo *root); From e2debb64380ebcf0979708a0fa88d9c8d924005b Mon Sep 17 00:00:00 2001 From: Richard Guo Date: Tue, 22 Jul 2025 11:21:36 +0900 Subject: [PATCH 348/602] Reduce "Var IS [NOT] NULL" quals during constant folding In commit b262ad440, we introduced an optimization that reduces an IS [NOT] NULL qual on a NOT NULL column to constant true or constant false, provided we can prove that the input expression of the NullTest is not nullable by any outer joins or grouping sets. This deduction happens quite late in the planner, during the distribution of quals to rels in query_planner. However, this approach has some drawbacks: we can't perform any further folding with the constant, and it turns out to be prone to bugs. Ideally, this deduction should happen during constant folding. However, the per-relation information about which columns are defined as NOT NULL is not available at that point. This information is currently collected from catalogs when building RelOptInfos for base or "other" relations. This patch moves the collection of NOT NULL attribute information for relations before pull_up_sublinks, storing it in a hash table keyed by relation OID. It then uses this information to perform the NullTest deduction for Vars during constant folding. This also makes it possible to leverage this information to pull up NOT IN subqueries. Note that this patch does not get rid of restriction_is_always_true and restriction_is_always_false. Removing them would prevent us from reducing some IS [NOT] NULL quals that we were previously able to reduce, because (a) the self-join elimination may introduce new IS NOT NULL quals after constant folding, and (b) if some outer joins are converted to inner joins, previously irreducible NullTest quals may become reducible. Author: Richard Guo Reviewed-by: Robert Haas Reviewed-by: Tom Lane Discussion: https://postgr.es/m/CAMbWs4-bFJ1At4btk5wqbezdu8PLtQ3zv-aiaY3ry9Ymm=jgFQ@mail.gmail.com --- .../postgres_fdw/expected/postgres_fdw.out | 8 +- contrib/postgres_fdw/sql/postgres_fdw.sql | 2 +- src/backend/optimizer/plan/initsplan.c | 24 +--- src/backend/optimizer/plan/planner.c | 12 +- src/backend/optimizer/plan/subselect.c | 20 ++- src/backend/optimizer/prep/prepjointree.c | 19 ++- src/backend/optimizer/util/clauses.c | 92 ++++++++++++- src/backend/optimizer/util/inherit.c | 10 +- src/backend/optimizer/util/plancat.c | 127 +++++++++++++++--- src/include/nodes/pathnodes.h | 12 +- src/include/optimizer/optimizer.h | 2 + src/include/optimizer/plancat.h | 4 + .../regress/expected/generated_virtual.out | 6 +- src/test/regress/expected/join.out | 6 +- src/test/regress/expected/predicate.out | 54 +++++++- src/test/regress/sql/predicate.sql | 18 +++ src/tools/pgindent/typedefs.list | 1 + 17 files changed, 336 insertions(+), 81 deletions(-) diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out index ff2b30cc91221..4b6e49a5d950d 100644 --- a/contrib/postgres_fdw/expected/postgres_fdw.out +++ b/contrib/postgres_fdw/expected/postgres_fdw.out @@ -710,12 +710,12 @@ EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ft1 t1 WHERE c1 = -c1; -- Op Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE (("C 1" = (- "C 1"))) (3 rows) -EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ft1 t1 WHERE (c1 IS NOT NULL) IS DISTINCT FROM (c1 IS NOT NULL); -- DistinctExpr - QUERY PLAN --------------------------------------------------------------------------------------------------------------------------------------------- +EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ft1 t1 WHERE (c3 IS NOT NULL) IS DISTINCT FROM (c3 IS NOT NULL); -- DistinctExpr + QUERY PLAN +-------------------------------------------------------------------------------------------------------------------------------------- Foreign Scan on public.ft1 t1 Output: c1, c2, c3, c4, c5, c6, c7, c8 - Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE ((("C 1" IS NOT NULL) IS DISTINCT FROM ("C 1" IS NOT NULL))) + Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE (((c3 IS NOT NULL) IS DISTINCT FROM (c3 IS NOT NULL))) (3 rows) EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ft1 t1 WHERE c1 = ANY(ARRAY[c2, 1, c1 + 0]); -- ScalarArrayOpExpr diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql index 7267732f569e5..31b6c685b551b 100644 --- a/contrib/postgres_fdw/sql/postgres_fdw.sql +++ b/contrib/postgres_fdw/sql/postgres_fdw.sql @@ -352,7 +352,7 @@ EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ft1 t1 WHERE c3 IS NULL; -- Nu EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ft1 t1 WHERE c3 IS NOT NULL; -- NullTest EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ft1 t1 WHERE round(abs(c1), 0) = 1; -- FuncExpr EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ft1 t1 WHERE c1 = -c1; -- OpExpr(l) -EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ft1 t1 WHERE (c1 IS NOT NULL) IS DISTINCT FROM (c1 IS NOT NULL); -- DistinctExpr +EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ft1 t1 WHERE (c3 IS NOT NULL) IS DISTINCT FROM (c3 IS NOT NULL); -- DistinctExpr EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ft1 t1 WHERE c1 = ANY(ARRAY[c2, 1, c1 + 0]); -- ScalarArrayOpExpr EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ft1 t1 WHERE c1 = (ARRAY[c1,c2,3])[1]; -- SubscriptingRef EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ft1 t1 WHERE c6 = E'foo''s\\bar'; -- check special chars diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c index 01804b085b3ba..3e3fec8925291 100644 --- a/src/backend/optimizer/plan/initsplan.c +++ b/src/backend/optimizer/plan/initsplan.c @@ -3048,36 +3048,16 @@ add_base_clause_to_rel(PlannerInfo *root, Index relid, * expr_is_nonnullable * Check to see if the Expr cannot be NULL * - * If the Expr is a simple Var that is defined NOT NULL and meanwhile is not - * nulled by any outer joins, then we can know that it cannot be NULL. + * Currently we only support simple Vars. */ static bool expr_is_nonnullable(PlannerInfo *root, Expr *expr) { - RelOptInfo *rel; - Var *var; - /* For now only check simple Vars */ if (!IsA(expr, Var)) return false; - var = (Var *) expr; - - /* could the Var be nulled by any outer joins? */ - if (!bms_is_empty(var->varnullingrels)) - return false; - - /* system columns cannot be NULL */ - if (var->varattno < 0) - return true; - - /* is the column defined NOT NULL? */ - rel = find_base_rel(root, var->varno); - if (var->varattno > 0 && - bms_is_member(var->varattno, rel->notnullattnums)) - return true; - - return false; + return var_is_nonnullable(root, (Var *) expr, true); } /* diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index fc13d921d0c68..c989e72cac5cf 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -342,6 +342,7 @@ standard_planner(Query *parse, const char *query_string, int cursorOptions, glob->transientPlan = false; glob->dependsOnRole = false; glob->partition_directory = NULL; + glob->rel_notnullatts_hash = NULL; /* * Assess whether it's feasible to use parallel mode for this query. We @@ -723,11 +724,12 @@ subquery_planner(PlannerGlobal *glob, Query *parse, PlannerInfo *parent_root, /* * Scan the rangetable for relation RTEs and retrieve the necessary * catalog information for each relation. Using this information, clear - * the inh flag for any relation that has no children, and expand virtual - * generated columns for any relation that contains them. Note that this - * step does not descend into sublinks and subqueries; if we pull up any - * sublinks or subqueries below, their relation RTEs are processed just - * before pulling them up. + * the inh flag for any relation that has no children, collect not-null + * attribute numbers for any relation that has column not-null + * constraints, and expand virtual generated columns for any relation that + * contains them. Note that this step does not descend into sublinks and + * subqueries; if we pull up any sublinks or subqueries below, their + * relation RTEs are processed just before pulling them up. */ parse = root->parse = preprocess_relation_rtes(root); diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c index 4bdca59df64bf..d71ed958e31b3 100644 --- a/src/backend/optimizer/plan/subselect.c +++ b/src/backend/optimizer/plan/subselect.c @@ -1519,8 +1519,10 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink, /* * Scan the rangetable for relation RTEs and retrieve the necessary * catalog information for each relation. Using this information, clear - * the inh flag for any relation that has no children, and expand virtual - * generated columns for any relation that contains them. + * the inh flag for any relation that has no children, collect not-null + * attribute numbers for any relation that has column not-null + * constraints, and expand virtual generated columns for any relation that + * contains them. * * Note: we construct up an entirely dummy PlannerInfo for use here. This * is fine because only the "glob" and "parse" links will be used in this @@ -1760,6 +1762,7 @@ convert_EXISTS_to_ANY(PlannerInfo *root, Query *subselect, Node **testexpr, List **paramIds) { Node *whereClause; + PlannerInfo subroot; List *leftargs, *rightargs, *opids, @@ -1819,12 +1822,15 @@ convert_EXISTS_to_ANY(PlannerInfo *root, Query *subselect, * parent aliases were flattened already, and we're not going to pull any * child Vars (of any description) into the parent. * - * Note: passing the parent's root to eval_const_expressions is - * technically wrong, but we can get away with it since only the - * boundParams (if any) are used, and those would be the same in a - * subroot. + * Note: we construct up an entirely dummy PlannerInfo to pass to + * eval_const_expressions. This is fine because only the "glob" and + * "parse" links are used by eval_const_expressions. */ - whereClause = eval_const_expressions(root, whereClause); + MemSet(&subroot, 0, sizeof(subroot)); + subroot.type = T_PlannerInfo; + subroot.glob = root->glob; + subroot.parse = subselect; + whereClause = eval_const_expressions(&subroot, whereClause); whereClause = (Node *) canonicalize_qual((Expr *) whereClause, false); whereClause = (Node *) make_ands_implicit((Expr *) whereClause); diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c index 4b38851bd42b1..35e8d3c183b47 100644 --- a/src/backend/optimizer/prep/prepjointree.c +++ b/src/backend/optimizer/prep/prepjointree.c @@ -36,6 +36,7 @@ #include "optimizer/clauses.h" #include "optimizer/optimizer.h" #include "optimizer/placeholder.h" +#include "optimizer/plancat.h" #include "optimizer/prep.h" #include "optimizer/subselect.h" #include "optimizer/tlist.h" @@ -401,8 +402,9 @@ transform_MERGE_to_join(Query *parse) * * This scans the rangetable for relation RTEs and retrieves the necessary * catalog information for each relation. Using this information, it clears - * the inh flag for any relation that has no children, and expands virtual - * generated columns for any relation that contains them. + * the inh flag for any relation that has no children, collects not-null + * attribute numbers for any relation that has column not-null constraints, and + * expands virtual generated columns for any relation that contains them. * * Note that expanding virtual generated columns may cause the query tree to * have new copies of rangetable entries. Therefore, we have to use list_nth @@ -447,6 +449,13 @@ preprocess_relation_rtes(PlannerInfo *root) if (rte->inh) rte->inh = relation->rd_rel->relhassubclass; + /* + * Check to see if the relation has any column not-null constraints; + * if so, retrieve the constraint information and store it in a + * relation OID based hash table. + */ + get_relation_notnullatts(root, relation); + /* * Check to see if the relation has any virtual generated columns; if * so, replace all Var nodes in the query that reference these columns @@ -1384,8 +1393,10 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte, /* * Scan the rangetable for relation RTEs and retrieve the necessary * catalog information for each relation. Using this information, clear - * the inh flag for any relation that has no children, and expand virtual - * generated columns for any relation that contains them. + * the inh flag for any relation that has no children, collect not-null + * attribute numbers for any relation that has column not-null + * constraints, and expand virtual generated columns for any relation that + * contains them. */ subquery = subroot->parse = preprocess_relation_rtes(subroot); diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c index f45131c34c5a3..6f0b338d2cdf1 100644 --- a/src/backend/optimizer/util/clauses.c +++ b/src/backend/optimizer/util/clauses.c @@ -20,6 +20,7 @@ #include "postgres.h" #include "access/htup_details.h" +#include "catalog/pg_class.h" #include "catalog/pg_language.h" #include "catalog/pg_operator.h" #include "catalog/pg_proc.h" @@ -36,6 +37,7 @@ #include "optimizer/clauses.h" #include "optimizer/cost.h" #include "optimizer/optimizer.h" +#include "optimizer/pathnode.h" #include "optimizer/plancat.h" #include "optimizer/planmain.h" #include "parser/analyze.h" @@ -43,6 +45,7 @@ #include "parser/parse_collate.h" #include "parser/parse_func.h" #include "parser/parse_oper.h" +#include "parser/parsetree.h" #include "rewrite/rewriteHandler.h" #include "rewrite/rewriteManip.h" #include "tcop/tcopprot.h" @@ -2242,7 +2245,8 @@ rowtype_field_matches(Oid rowtypeid, int fieldnum, * only operators and functions that are reasonable to try to execute. * * NOTE: "root" can be passed as NULL if the caller never wants to do any - * Param substitutions nor receive info about inlined functions. + * Param substitutions nor receive info about inlined functions nor reduce + * NullTest for Vars to constant true or constant false. * * NOTE: the planner assumes that this will always flatten nested AND and * OR clauses into N-argument form. See comments in prepqual.c. @@ -3544,6 +3548,31 @@ eval_const_expressions_mutator(Node *node, return makeBoolConst(result, false); } + if (!ntest->argisrow && arg && IsA(arg, Var) && context->root) + { + Var *varg = (Var *) arg; + bool result; + + if (var_is_nonnullable(context->root, varg, false)) + { + switch (ntest->nulltesttype) + { + case IS_NULL: + result = false; + break; + case IS_NOT_NULL: + result = true; + break; + default: + elog(ERROR, "unrecognized nulltesttype: %d", + (int) ntest->nulltesttype); + result = false; /* keep compiler quiet */ + break; + } + + return makeBoolConst(result, false); + } + } newntest = makeNode(NullTest); newntest->arg = (Expr *) arg; @@ -4162,6 +4191,67 @@ simplify_function(Oid funcid, Oid result_type, int32 result_typmod, return newexpr; } +/* + * var_is_nonnullable: check to see if the Var cannot be NULL + * + * If the Var is defined NOT NULL and meanwhile is not nulled by any outer + * joins or grouping sets, then we can know that it cannot be NULL. + * + * use_rel_info indicates whether the corresponding RelOptInfo is available for + * use. + */ +bool +var_is_nonnullable(PlannerInfo *root, Var *var, bool use_rel_info) +{ + Relids notnullattnums = NULL; + + Assert(IsA(var, Var)); + + /* skip upper-level Vars */ + if (var->varlevelsup != 0) + return false; + + /* could the Var be nulled by any outer joins or grouping sets? */ + if (!bms_is_empty(var->varnullingrels)) + return false; + + /* system columns cannot be NULL */ + if (var->varattno < 0) + return true; + + /* + * Check if the Var is defined as NOT NULL. We retrieve the column NOT + * NULL constraint information from the corresponding RelOptInfo if it is + * available; otherwise, we search the hash table for this information. + */ + if (use_rel_info) + { + RelOptInfo *rel = find_base_rel(root, var->varno); + + notnullattnums = rel->notnullattnums; + } + else + { + RangeTblEntry *rte = planner_rt_fetch(var->varno, root); + + /* + * We must skip inheritance parent tables, as some child tables may + * have a NOT NULL constraint for a column while others may not. This + * cannot happen with partitioned tables, though. + */ + if (rte->inh && rte->relkind != RELKIND_PARTITIONED_TABLE) + return false; + + notnullattnums = find_relation_notnullatts(root, rte->relid); + } + + if (var->varattno > 0 && + bms_is_member(var->varattno, notnullattnums)) + return true; + + return false; +} + /* * expand_function_arguments: convert named-notation args to positional args * and/or insert default args, as needed diff --git a/src/backend/optimizer/util/inherit.c b/src/backend/optimizer/util/inherit.c index 17e51cd75d744..30d158069e332 100644 --- a/src/backend/optimizer/util/inherit.c +++ b/src/backend/optimizer/util/inherit.c @@ -466,8 +466,7 @@ expand_single_inheritance_child(PlannerInfo *root, RangeTblEntry *parentrte, Index *childRTindex_p) { Query *parse = root->parse; - Oid parentOID PG_USED_FOR_ASSERTS_ONLY = - RelationGetRelid(parentrel); + Oid parentOID = RelationGetRelid(parentrel); Oid childOID = RelationGetRelid(childrel); RangeTblEntry *childrte; Index childRTindex; @@ -513,6 +512,13 @@ expand_single_inheritance_child(PlannerInfo *root, RangeTblEntry *parentrte, *childrte_p = childrte; *childRTindex_p = childRTindex; + /* + * Retrieve column not-null constraint information for the child relation + * if its relation OID is different from the parent's. + */ + if (childOID != parentOID) + get_relation_notnullatts(root, childrel); + /* * Build an AppendRelInfo struct for each parent/child pair. */ diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index 59233b647302d..c6a58afc5e506 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -59,6 +59,12 @@ int constraint_exclusion = CONSTRAINT_EXCLUSION_PARTITION; /* Hook for plugins to get control in get_relation_info() */ get_relation_info_hook_type get_relation_info_hook = NULL; +typedef struct NotnullHashEntry +{ + Oid relid; /* OID of the relation */ + Relids notnullattnums; /* attnums of NOT NULL columns */ +} NotnullHashEntry; + static void get_relation_foreign_keys(PlannerInfo *root, RelOptInfo *rel, Relation relation, bool inhparent); @@ -172,27 +178,7 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, * RangeTblEntry does get populated. */ if (!inhparent || relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) - { - for (int i = 0; i < relation->rd_att->natts; i++) - { - CompactAttribute *attr = TupleDescCompactAttr(relation->rd_att, i); - - Assert(attr->attnullability != ATTNULLABLE_UNKNOWN); - - if (attr->attnullability == ATTNULLABLE_VALID) - { - rel->notnullattnums = bms_add_member(rel->notnullattnums, - i + 1); - - /* - * Per RemoveAttributeById(), dropped columns will have their - * attnotnull unset, so we needn't check for dropped columns - * in the above condition. - */ - Assert(!attr->attisdropped); - } - } - } + rel->notnullattnums = find_relation_notnullatts(root, relationObjectId); /* * Estimate relation size --- unless it's an inheritance parent, in which @@ -683,6 +669,105 @@ get_relation_foreign_keys(PlannerInfo *root, RelOptInfo *rel, } } +/* + * get_relation_notnullatts - + * Retrieves column not-null constraint information for a given relation. + * + * We do this while we have the relcache entry open, and store the column + * not-null constraint information in a hash table based on the relation OID. + */ +void +get_relation_notnullatts(PlannerInfo *root, Relation relation) +{ + Oid relid = RelationGetRelid(relation); + NotnullHashEntry *hentry; + bool found; + Relids notnullattnums = NULL; + + /* bail out if the relation has no not-null constraints */ + if (relation->rd_att->constr == NULL || + !relation->rd_att->constr->has_not_null) + return; + + /* create the hash table if it hasn't been created yet */ + if (root->glob->rel_notnullatts_hash == NULL) + { + HTAB *hashtab; + HASHCTL hash_ctl; + + hash_ctl.keysize = sizeof(Oid); + hash_ctl.entrysize = sizeof(NotnullHashEntry); + hash_ctl.hcxt = CurrentMemoryContext; + + hashtab = hash_create("Relation NOT NULL attnums", + 64L, /* arbitrary initial size */ + &hash_ctl, + HASH_ELEM | HASH_BLOBS | HASH_CONTEXT); + + root->glob->rel_notnullatts_hash = hashtab; + } + + /* + * Create a hash entry for this relation OID, if we don't have one + * already. + */ + hentry = (NotnullHashEntry *) hash_search(root->glob->rel_notnullatts_hash, + &relid, + HASH_ENTER, + &found); + + /* bail out if a hash entry already exists for this relation OID */ + if (found) + return; + + /* collect the column not-null constraint information for this relation */ + for (int i = 0; i < relation->rd_att->natts; i++) + { + CompactAttribute *attr = TupleDescCompactAttr(relation->rd_att, i); + + Assert(attr->attnullability != ATTNULLABLE_UNKNOWN); + + if (attr->attnullability == ATTNULLABLE_VALID) + { + notnullattnums = bms_add_member(notnullattnums, i + 1); + + /* + * Per RemoveAttributeById(), dropped columns will have their + * attnotnull unset, so we needn't check for dropped columns in + * the above condition. + */ + Assert(!attr->attisdropped); + } + } + + /* ... and initialize the new hash entry */ + hentry->notnullattnums = notnullattnums; +} + +/* + * find_relation_notnullatts - + * Searches the hash table and returns the column not-null constraint + * information for a given relation. + */ +Relids +find_relation_notnullatts(PlannerInfo *root, Oid relid) +{ + NotnullHashEntry *hentry; + bool found; + + if (root->glob->rel_notnullatts_hash == NULL) + return NULL; + + hentry = (NotnullHashEntry *) hash_search(root->glob->rel_notnullatts_hash, + &relid, + HASH_FIND, + &found); + if (!found) + return NULL; + + return hentry->notnullattnums; +} + /* * infer_arbiter_indexes - * Determine the unique indexes used to arbitrate speculative insertion. diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index 6567759595daa..e5dd15098f635 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -179,6 +179,9 @@ typedef struct PlannerGlobal /* partition descriptors */ PartitionDirectory partition_directory pg_node_attr(read_write_ignore); + + /* hash table for NOT NULL attnums of relations */ + struct HTAB *rel_notnullatts_hash pg_node_attr(read_write_ignore); } PlannerGlobal; /* macro for fetching the Plan associated with a SubPlan node */ @@ -719,6 +722,9 @@ typedef struct PartitionSchemeData *PartitionScheme; * the attribute is needed as part of final targetlist * attr_widths - cache space for per-attribute width estimates; * zero means not computed yet + * notnullattnums - zero-based set containing attnums of NOT NULL + * columns (not populated for rels corresponding to + * non-partitioned inh==true RTEs) * nulling_relids - relids of outer joins that can null this rel * lateral_vars - lateral cross-references of rel, if any (list of * Vars and PlaceHolderVars) @@ -952,11 +958,7 @@ typedef struct RelOptInfo Relids *attr_needed pg_node_attr(read_write_ignore); /* array indexed [min_attr .. max_attr] */ int32 *attr_widths pg_node_attr(read_write_ignore); - - /* - * Zero-based set containing attnums of NOT NULL columns. Not populated - * for rels corresponding to non-partitioned inh==true RTEs. - */ + /* zero-based set containing attnums of NOT NULL columns */ Bitmapset *notnullattnums; /* relids of outer joins that can null this baserel */ Relids nulling_relids; diff --git a/src/include/optimizer/optimizer.h b/src/include/optimizer/optimizer.h index 546828b54bd27..37bc13c2cbde6 100644 --- a/src/include/optimizer/optimizer.h +++ b/src/include/optimizer/optimizer.h @@ -154,6 +154,8 @@ extern Node *estimate_expression_value(PlannerInfo *root, Node *node); extern Expr *evaluate_expr(Expr *expr, Oid result_type, int32 result_typmod, Oid result_collation); +extern bool var_is_nonnullable(PlannerInfo *root, Var *var, bool use_rel_info); + extern List *expand_function_arguments(List *args, bool include_out_arguments, Oid result_type, struct HeapTupleData *func_tuple); diff --git a/src/include/optimizer/plancat.h b/src/include/optimizer/plancat.h index cd74e4b1e8b36..d6f6f4ad2d788 100644 --- a/src/include/optimizer/plancat.h +++ b/src/include/optimizer/plancat.h @@ -28,6 +28,10 @@ extern PGDLLIMPORT get_relation_info_hook_type get_relation_info_hook; extern void get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, RelOptInfo *rel); +extern void get_relation_notnullatts(PlannerInfo *root, Relation relation); + +extern Relids find_relation_notnullatts(PlannerInfo *root, Oid relid); + extern List *infer_arbiter_indexes(PlannerInfo *root); extern void estimate_rel_size(Relation rel, int32 *attr_widths, diff --git a/src/test/regress/expected/generated_virtual.out b/src/test/regress/expected/generated_virtual.out index a635cb1e7760b..aca6347babe96 100644 --- a/src/test/regress/expected/generated_virtual.out +++ b/src/test/regress/expected/generated_virtual.out @@ -1550,11 +1550,11 @@ where coalesce(t2.b, 1) = 2; explain (costs off) select t1.a from gtest32 t1 left join gtest32 t2 on t1.a = t2.a where coalesce(t2.b, 1) = 2 or t1.a is null; - QUERY PLAN -------------------------------------------------------------- + QUERY PLAN +----------------------------------------- Hash Left Join Hash Cond: (t1.a = t2.a) - Filter: ((COALESCE((t2.a * 2), 1) = 2) OR (t1.a IS NULL)) + Filter: (COALESCE((t2.a * 2), 1) = 2) -> Seq Scan on gtest32 t1 -> Hash -> Seq Scan on gtest32 t2 diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index 46ddfa844c595..4d5d35d07270d 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -3639,8 +3639,8 @@ from nt3 as nt3 ) as ss2 on ss2.id = nt3.nt2_id where nt3.id = 1 and ss2.b3; - QUERY PLAN ------------------------------------------------ + QUERY PLAN +---------------------------------------------- Nested Loop -> Nested Loop -> Index Scan using nt3_pkey on nt3 @@ -3649,7 +3649,7 @@ where nt3.id = 1 and ss2.b3; Index Cond: (id = nt3.nt2_id) -> Index Only Scan using nt1_pkey on nt1 Index Cond: (id = nt2.nt1_id) - Filter: (nt2.b1 AND (id IS NOT NULL)) + Filter: (nt2.b1 AND true) (9 rows) select nt3.id diff --git a/src/test/regress/expected/predicate.out b/src/test/regress/expected/predicate.out index b79037748b7e6..59bfe33bb1ce1 100644 --- a/src/test/regress/expected/predicate.out +++ b/src/test/regress/expected/predicate.out @@ -84,10 +84,10 @@ SELECT * FROM pred_tab t WHERE t.a IS NULL OR t.c IS NULL; -- are provably false EXPLAIN (COSTS OFF) SELECT * FROM pred_tab t WHERE t.b IS NULL OR t.c IS NULL; - QUERY PLAN ----------------------------------------- + QUERY PLAN +------------------------ Seq Scan on pred_tab t - Filter: ((b IS NULL) OR (c IS NULL)) + Filter: (b IS NULL) (2 rows) -- @@ -231,6 +231,54 @@ SELECT * FROM pred_tab t1 -> Seq Scan on pred_tab t3 (9 rows) +-- +-- Tests for NullTest reduction in EXISTS sublink +-- +-- Ensure the IS_NOT_NULL qual is ignored +EXPLAIN (COSTS OFF) +SELECT * FROM pred_tab t1 + LEFT JOIN pred_tab t2 ON EXISTS + (SELECT 1 FROM pred_tab t3, pred_tab t4, pred_tab t5, pred_tab t6 + WHERE t1.a = t3.a AND t6.a IS NOT NULL); + QUERY PLAN +--------------------------------------------------------- + Nested Loop Left Join + Join Filter: EXISTS(SubPlan 1) + -> Seq Scan on pred_tab t1 + -> Materialize + -> Seq Scan on pred_tab t2 + SubPlan 1 + -> Nested Loop + -> Nested Loop + -> Nested Loop + -> Seq Scan on pred_tab t4 + -> Materialize + -> Seq Scan on pred_tab t3 + Filter: (t1.a = a) + -> Materialize + -> Seq Scan on pred_tab t5 + -> Materialize + -> Seq Scan on pred_tab t6 +(17 rows) + +-- Ensure the IS_NULL qual is reduced to constant-FALSE +EXPLAIN (COSTS OFF) +SELECT * FROM pred_tab t1 + LEFT JOIN pred_tab t2 ON EXISTS + (SELECT 1 FROM pred_tab t3, pred_tab t4, pred_tab t5, pred_tab t6 + WHERE t1.a = t3.a AND t6.a IS NULL); + QUERY PLAN +------------------------------------- + Nested Loop Left Join + Join Filter: (InitPlan 1).col1 + InitPlan 1 + -> Result + One-Time Filter: false + -> Seq Scan on pred_tab t1 + -> Materialize + -> Seq Scan on pred_tab t2 +(8 rows) + DROP TABLE pred_tab; -- Validate we handle IS NULL and IS NOT NULL quals correctly with inheritance -- parents. diff --git a/src/test/regress/sql/predicate.sql b/src/test/regress/sql/predicate.sql index 9dcb81b1bc52f..d92277353a019 100644 --- a/src/test/regress/sql/predicate.sql +++ b/src/test/regress/sql/predicate.sql @@ -115,6 +115,24 @@ SELECT * FROM pred_tab t1 LEFT JOIN pred_tab t2 ON t1.a = 1 LEFT JOIN pred_tab t3 ON t2.a IS NULL OR t2.c IS NULL; +-- +-- Tests for NullTest reduction in EXISTS sublink +-- + +-- Ensure the IS_NOT_NULL qual is ignored +EXPLAIN (COSTS OFF) +SELECT * FROM pred_tab t1 + LEFT JOIN pred_tab t2 ON EXISTS + (SELECT 1 FROM pred_tab t3, pred_tab t4, pred_tab t5, pred_tab t6 + WHERE t1.a = t3.a AND t6.a IS NOT NULL); + +-- Ensure the IS_NULL qual is reduced to constant-FALSE +EXPLAIN (COSTS OFF) +SELECT * FROM pred_tab t1 + LEFT JOIN pred_tab t2 ON EXISTS + (SELECT 1 FROM pred_tab t3, pred_tab t4, pred_tab t5, pred_tab t6 + WHERE t1.a = t3.a AND t6.a IS NULL); + DROP TABLE pred_tab; -- Validate we handle IS NULL and IS NOT NULL quals correctly with inheritance diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index ff050e93a5077..cd89746708889 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -1759,6 +1759,7 @@ NonEmptyRange Notification NotificationList NotifyStmt +NotnullHashEntry Nsrt NtDllRoutine NtFlushBuffersFileEx_t From 1b8bbee05d70deae34d0f7484afde03518c07e42 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Tue, 22 Jul 2025 14:00:00 +0900 Subject: [PATCH 349/602] ecpg: Fix NULL pointer dereference during connection lookup ECPGconnect() caches established connections to the server, supporting the case of a NULL connection name when a database name is not specified by its caller. A follow-up call to ECPGget_PGconn() to get an established connection from the cached set with a non-NULL name could cause a NULL pointer dereference if a NULL connection was listed in the cache and checked for a match. At least two connections are necessary to reproduce the issue: one with a NULL name and one with a non-NULL name. Author: Aleksander Alekseev Discussion: https://postgr.es/m/CAJ7c6TNvFTPUTZQuNAoqgzaSGz-iM4XR61D7vEj5PsQXwg2RyA@mail.gmail.com Backpatch-through: 13 --- src/interfaces/ecpg/ecpglib/connect.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/interfaces/ecpg/ecpglib/connect.c b/src/interfaces/ecpg/ecpglib/connect.c index 2bbb70333dcb4..713cbbf6360be 100644 --- a/src/interfaces/ecpg/ecpglib/connect.c +++ b/src/interfaces/ecpg/ecpglib/connect.c @@ -58,7 +58,12 @@ ecpg_get_connection_nr(const char *connection_name) for (con = all_connections; con != NULL; con = con->next) { - if (strcmp(connection_name, con->name) == 0) + /* + * Check for the case of a NULL connection name, stored as such in + * the connection information by ECPGconnect() when the database + * name is not specified by its caller. + */ + if (con->name != NULL && strcmp(connection_name, con->name) == 0) break; } ret = con; From 112faf1378ee62db75cd9e3223c86bf53b8c2736 Mon Sep 17 00:00:00 2001 From: Fujii Masao Date: Tue, 22 Jul 2025 14:16:45 +0900 Subject: [PATCH 350/602] Log remote NOTICE, WARNING, and similar messages using ereport(). MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously, NOTICE, WARNING, and similar messages received from remote servers over replication, postgres_fdw, or dblink connections were printed directly to stderr on the local server (e.g., the subscriber). As a result, these messages lacked log prefixes (e.g., timestamp), making them harder to trace and correlate with other log entries. This commit addresses the issue by introducing a custom notice receiver for replication, postgres_fdw, and dblink connections. These messages are now logged via ereport(), ensuring they appear in the logs with proper formatting and context, which improves clarity and aids in debugging. Author: Vignesh C Reviewed-by: Álvaro Herrera Reviewed-by: Fujii Masao Discussion: https://postgr.es/m/CALDaNm2xsHpWRtLm-VL_HJCsaE3+1Y_n-jDEAr3-suxVqc3xoQ@mail.gmail.com --- contrib/dblink/dblink.c | 7 +++++ contrib/postgres_fdw/connection.c | 3 ++ .../libpqwalreceiver/libpqwalreceiver.c | 3 ++ src/include/libpq/libpq-be-fe-helpers.h | 30 +++++++++++++++++++ 4 files changed, 43 insertions(+) diff --git a/contrib/dblink/dblink.c b/contrib/dblink/dblink.c index 8a0b112a7ff29..c459a842fa95e 100644 --- a/contrib/dblink/dblink.c +++ b/contrib/dblink/dblink.c @@ -240,6 +240,10 @@ dblink_get_conn(char *conname_or_str, errmsg("could not establish connection"), errdetail_internal("%s", msg))); } + + PQsetNoticeReceiver(conn, libpqsrv_notice_receiver, + gettext_noop("received message via remote connection")); + dblink_security_check(conn, NULL, connstr); if (PQclientEncoding(conn) != GetDatabaseEncoding()) PQsetClientEncoding(conn, GetDatabaseEncodingName()); @@ -338,6 +342,9 @@ dblink_connect(PG_FUNCTION_ARGS) errdetail_internal("%s", msg))); } + PQsetNoticeReceiver(conn, libpqsrv_notice_receiver, + gettext_noop("received message via remote connection")); + /* check password actually used if not superuser */ dblink_security_check(conn, connname, connstr); diff --git a/contrib/postgres_fdw/connection.c b/contrib/postgres_fdw/connection.c index 304f3c20f8356..e41d47c3bbd1a 100644 --- a/contrib/postgres_fdw/connection.c +++ b/contrib/postgres_fdw/connection.c @@ -625,6 +625,9 @@ connect_pg_server(ForeignServer *server, UserMapping *user) server->servername), errdetail_internal("%s", pchomp(PQerrorMessage(conn))))); + PQsetNoticeReceiver(conn, libpqsrv_notice_receiver, + gettext_noop("received message via remote connection")); + /* Perform post-connection security checks. */ pgfdw_security_check(keywords, values, user, conn); diff --git a/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c b/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c index f7b5d093681a2..0c75fe064d5ec 100644 --- a/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c +++ b/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c @@ -232,6 +232,9 @@ libpqrcv_connect(const char *conninfo, bool replication, bool logical, errhint("Target server's authentication method must be changed, or set password_required=false in the subscription parameters."))); } + PQsetNoticeReceiver(conn->streamConn, libpqsrv_notice_receiver, + gettext_noop("received message via replication")); + /* * Set always-secure search path for the cases where the connection is * used to run SQL queries, so malicious users can't get control. diff --git a/src/include/libpq/libpq-be-fe-helpers.h b/src/include/libpq/libpq-be-fe-helpers.h index 16205b824fa55..49137a0a5706a 100644 --- a/src/include/libpq/libpq-be-fe-helpers.h +++ b/src/include/libpq/libpq-be-fe-helpers.h @@ -454,4 +454,34 @@ exit: ; return error; } +/* + * libpqsrv_notice_receiver + * + * Custom notice receiver for libpq connections. + * + * This function is intended to be set via PQsetNoticeReceiver() so that + * NOTICE, WARNING, and similar messages from the connection are reported via + * ereport(), instead of being printed to stderr. + */ +static inline void +libpqsrv_notice_receiver(void *arg, const PGresult *res) +{ + char *message; + int len; + char *prefix = (char *) arg; + + /* + * Trim the trailing newline from the message text returned from + * PQresultErrorMessage(), as it always includes one, to produce cleaner + * log output. + */ + message = PQresultErrorMessage(res); + len = strlen(message); + if (len > 0 && message[len - 1] == '\n') + len--; + + ereport(LOG, + errmsg_internal("%s: %.*s", _(prefix), len, message)); +} + #endif /* LIBPQ_BE_FE_HELPERS_H */ From 19179dbffc8fb9ff12b73fa157a340cff6867ca0 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Tue, 22 Jul 2025 14:34:15 +0900 Subject: [PATCH 351/602] doc: Inform about aminsertcleanup optional NULLness This index AM callback has been introduced in c1ec02be1d79 and it is optional, currently only being used by BRIN. Optional callbacks are documented with NULL as possible value in amapi.h and indexam.sgml, but this callback has missed this part of the description. Reported-by: Peter Smith Reviewed-by: Japin Li Discussion: https://postgr.es/m/CAHut+PvgYcPmPDi1YdHMJY5upnyGRpc0N8pk1xNB11xDSBwNog@mail.gmail.com Backpatch-through: 17 --- doc/src/sgml/indexam.sgml | 2 +- src/include/access/amapi.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/src/sgml/indexam.sgml b/doc/src/sgml/indexam.sgml index 1aa4741a8eaee..63d7e376f195e 100644 --- a/doc/src/sgml/indexam.sgml +++ b/doc/src/sgml/indexam.sgml @@ -147,7 +147,7 @@ typedef struct IndexAmRoutine ambuild_function ambuild; ambuildempty_function ambuildempty; aminsert_function aminsert; - aminsertcleanup_function aminsertcleanup; + aminsertcleanup_function aminsertcleanup; /* can be NULL */ ambulkdelete_function ambulkdelete; amvacuumcleanup_function amvacuumcleanup; amcanreturn_function amcanreturn; /* can be NULL */ diff --git a/src/include/access/amapi.h b/src/include/access/amapi.h index 52916bab7a31f..70949de56ac70 100644 --- a/src/include/access/amapi.h +++ b/src/include/access/amapi.h @@ -293,7 +293,7 @@ typedef struct IndexAmRoutine ambuild_function ambuild; ambuildempty_function ambuildempty; aminsert_function aminsert; - aminsertcleanup_function aminsertcleanup; + aminsertcleanup_function aminsertcleanup; /* can be NULL */ ambulkdelete_function ambulkdelete; amvacuumcleanup_function amvacuumcleanup; amcanreturn_function amcanreturn; /* can be NULL */ From ce6513e96a170510e2c54e82e3ad39fa46babb40 Mon Sep 17 00:00:00 2001 From: Amit Kapila Date: Tue, 22 Jul 2025 06:00:21 +0000 Subject: [PATCH 352/602] Doc: Fix logical replication examples. The definition of \dRp+ was modified in commit 7054186c4e. This patch updates the column list and row filter examples to align with the revised definition. Author: Shlok Kyal Reviewed by: Peter Smith Backpatch-through: 18, where it was introduced Discussion: https://postgr.es/m/CANhcyEUvqkSO6b9zi_fs_BBPEge5acj4mf8QKmq2TX-7axa7EQ@mail.gmail.com --- doc/src/sgml/logical-replication.sgml | 42 +++++++++++++-------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/doc/src/sgml/logical-replication.sgml b/doc/src/sgml/logical-replication.sgml index e26f7f59d4a5a..48edb35a26070 100644 --- a/doc/src/sgml/logical-replication.sgml +++ b/doc/src/sgml/logical-replication.sgml @@ -1048,28 +1048,28 @@ HINT: To initiate replication, you must manually create the replication slot, e defined) for each publication. 5) AND (c = 'NSW'::text)) + "public.t1" WHERE ((a > 5) AND (c = 'NSW'::text)) - Publication p2 - Owner | All tables | Inserts | Updates | Deletes | Truncates | Via root -----------+------------+---------+---------+---------+-----------+---------- - postgres | f | t | t | t | t | f + Publication p2 + Owner | All tables | Inserts | Updates | Deletes | Truncates | Generated columns | Via root +----------+------------+---------+---------+---------+-----------+-------------------+---------- + postgres | f | t | t | t | t | none | f Tables: - "public.t1" - "public.t2" WHERE (e = 99) + "public.t1" + "public.t2" WHERE (e = 99) - Publication p3 - Owner | All tables | Inserts | Updates | Deletes | Truncates | Via root -----------+------------+---------+---------+---------+-----------+---------- - postgres | f | t | t | t | t | f + Publication p3 + Owner | All tables | Inserts | Updates | Deletes | Truncates | Generated columns | Via root +----------+------------+---------+---------+---------+-----------+-------------------+---------- + postgres | f | t | t | t | t | none | f Tables: - "public.t2" WHERE (d = 10) - "public.t3" WHERE (g = 10) + "public.t2" WHERE (d = 10) + "public.t3" WHERE (g = 10) ]]> @@ -1491,10 +1491,10 @@ Publications: for each publication. /* pub # */ \dRp+ - Publication p1 - Owner | All tables | Inserts | Updates | Deletes | Truncates | Via root -----------+------------+---------+---------+---------+-----------+---------- - postgres | f | t | t | t | t | f + Publication p1 + Owner | All tables | Inserts | Updates | Deletes | Truncates | Generated columns | Via root +----------+------------+---------+---------+---------+-----------+-------------------+---------- + postgres | f | t | t | t | t | none | f Tables: "public.t1" (id, a, b, d) From d3f97fd1dda31d7bc925b226c2d9bec31bb7a6eb Mon Sep 17 00:00:00 2001 From: Andres Freund Date: Tue, 22 Jul 2025 08:30:52 -0400 Subject: [PATCH 353/602] aio: Fix assertion, clarify README The assertion wouldn't have triggered for a long while yet, but this won't accidentally fail to detect the issue if/when it occurs. Author: Matthias van de Meent Discussion: https://postgr.es/m/CAEze2Wj-43JV4YufW23gm=Uwr7Lkj+p0yKctKHxNm1rwFC+_DQ@mail.gmail.com Backpatch-through: 18 --- src/backend/storage/aio/README.md | 5 +++-- src/include/storage/aio.h | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/backend/storage/aio/README.md b/src/backend/storage/aio/README.md index f10b5c7e31ec7..72ae3b3737d51 100644 --- a/src/backend/storage/aio/README.md +++ b/src/backend/storage/aio/README.md @@ -94,7 +94,7 @@ pgaio_io_register_callbacks(ioh, PGAIO_HCB_SHARED_BUFFER_READV, 0); * * In this example we're reading only a single buffer, hence the 1. */ -pgaio_io_set_handle_data_32(ioh, (uint32 *) buffer, 1); +pgaio_io_set_handle_data_32(ioh, (uint32 *) &buffer, 1); /* * Pass the AIO handle to lower-level function. When operating on the level of @@ -119,8 +119,9 @@ pgaio_io_set_handle_data_32(ioh, (uint32 *) buffer, 1); * e.g. due to reaching a limit on the number of unsubmitted IOs, and even * complete before smgrstartreadv() returns. */ +void *page = BufferGetBlock(buffer); smgrstartreadv(ioh, operation->smgr, forknum, blkno, - BufferGetBlock(buffer), 1); + &page, 1); /* * To benefit from AIO, it is beneficial to perform other work, including diff --git a/src/include/storage/aio.h b/src/include/storage/aio.h index e7a0a234b6cf2..2933eea064910 100644 --- a/src/include/storage/aio.h +++ b/src/include/storage/aio.h @@ -201,7 +201,7 @@ typedef enum PgAioHandleCallbackID } PgAioHandleCallbackID; #define PGAIO_HCB_MAX PGAIO_HCB_LOCAL_BUFFER_READV -StaticAssertDecl(PGAIO_HCB_MAX <= (1 << PGAIO_RESULT_ID_BITS), +StaticAssertDecl(PGAIO_HCB_MAX < (1 << PGAIO_RESULT_ID_BITS), "PGAIO_HCB_MAX is too big for PGAIO_RESULT_ID_BITS"); From a7ca73af662bc95e14058ac3f8fcf5d257f8bf79 Mon Sep 17 00:00:00 2001 From: Fujii Masao Date: Tue, 22 Jul 2025 22:08:36 +0900 Subject: [PATCH 354/602] Remove translation marker from libpq-be-fe-helpers.h. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 112faf1378e introduced a translation marker in libpq-be-fe-helpers.h, but this caused build failures on some platforms—such as the one reported by buildfarm member indri—due to linker issues with dblink. This is the same problem previously addressed in commit 213c959a294. To fix the issue, this commit removes the translation marker from libpq-be-fe-helpers.h, following the approach used in 213c959a294. It also removes the associated gettext_noop() calls added in commit 112faf1378e, as they are no longer needed. While reviewing this, a gettext_noop() call was also found in contrib/basic_archive. Since contrib modules don't support translation, this call has been removed as well. Per buildfarm member indri. Author: Fujii Masao Reviewed-by: Álvaro Herrera Discussion: https://postgr.es/m/0e6299d9-608a-4ffa-aeb1-40cb8a99000b@oss.nttdata.com --- contrib/basic_archive/basic_archive.c | 2 +- contrib/dblink/dblink.c | 4 ++-- contrib/postgres_fdw/connection.c | 2 +- src/backend/replication/libpqwalreceiver/libpqwalreceiver.c | 2 +- src/include/libpq/libpq-be-fe-helpers.h | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/contrib/basic_archive/basic_archive.c b/contrib/basic_archive/basic_archive.c index 4a8b8c7ac29c1..8fc633d2cbf62 100644 --- a/contrib/basic_archive/basic_archive.c +++ b/contrib/basic_archive/basic_archive.c @@ -65,7 +65,7 @@ void _PG_init(void) { DefineCustomStringVariable("basic_archive.archive_directory", - gettext_noop("Archive file destination directory."), + "Archive file destination directory.", NULL, &archive_directory, "", diff --git a/contrib/dblink/dblink.c b/contrib/dblink/dblink.c index c459a842fa95e..de5bed282f3f0 100644 --- a/contrib/dblink/dblink.c +++ b/contrib/dblink/dblink.c @@ -242,7 +242,7 @@ dblink_get_conn(char *conname_or_str, } PQsetNoticeReceiver(conn, libpqsrv_notice_receiver, - gettext_noop("received message via remote connection")); + "received message via remote connection"); dblink_security_check(conn, NULL, connstr); if (PQclientEncoding(conn) != GetDatabaseEncoding()) @@ -343,7 +343,7 @@ dblink_connect(PG_FUNCTION_ARGS) } PQsetNoticeReceiver(conn, libpqsrv_notice_receiver, - gettext_noop("received message via remote connection")); + "received message via remote connection"); /* check password actually used if not superuser */ dblink_security_check(conn, connname, connstr); diff --git a/contrib/postgres_fdw/connection.c b/contrib/postgres_fdw/connection.c index e41d47c3bbd1a..c1ce6f3343665 100644 --- a/contrib/postgres_fdw/connection.c +++ b/contrib/postgres_fdw/connection.c @@ -626,7 +626,7 @@ connect_pg_server(ForeignServer *server, UserMapping *user) errdetail_internal("%s", pchomp(PQerrorMessage(conn))))); PQsetNoticeReceiver(conn, libpqsrv_notice_receiver, - gettext_noop("received message via remote connection")); + "received message via remote connection"); /* Perform post-connection security checks. */ pgfdw_security_check(keywords, values, user, conn); diff --git a/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c b/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c index 0c75fe064d5ec..886d99951dddf 100644 --- a/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c +++ b/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c @@ -233,7 +233,7 @@ libpqrcv_connect(const char *conninfo, bool replication, bool logical, } PQsetNoticeReceiver(conn->streamConn, libpqsrv_notice_receiver, - gettext_noop("received message via replication")); + "received message via replication"); /* * Set always-secure search path for the cases where the connection is diff --git a/src/include/libpq/libpq-be-fe-helpers.h b/src/include/libpq/libpq-be-fe-helpers.h index 49137a0a5706a..af13bd6bf3da3 100644 --- a/src/include/libpq/libpq-be-fe-helpers.h +++ b/src/include/libpq/libpq-be-fe-helpers.h @@ -481,7 +481,7 @@ libpqsrv_notice_receiver(void *arg, const PGresult *res) len--; ereport(LOG, - errmsg_internal("%s: %.*s", _(prefix), len, message)); + errmsg_internal("%s: %.*s", prefix, len, message)); } #endif /* LIBPQ_BE_FE_HELPERS_H */ From a6eabec6808cb1b8f20974ad57275b14fc079e3b Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Wed, 23 Jul 2025 08:18:36 +0900 Subject: [PATCH 355/602] ecpg: Improve error detection around ecpg_strdup() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Various code paths of the ECPG code did not check for memory allocation failures, including the specific case where ecpg_strdup() considers a NULL value given in input as a valid behavior. strdup() returning itself NULL on failure, there was no way to make the difference between what could be valid and what should fail. With the different cases in mind, ecpg_strdup() is redesigned and gains a new optional argument, giving its callers the possibility to differentiate allocation failures and valid cases where the caller is giving a NULL value in input. Most of the ECPG code does not expect a NULL value, at the exception of ECPGget_desc() (setlocale) and ECPGconnect(), like dbname being unspecified, with repeated strdup calls. The code is adapted to work with this new routine. Note the case of ecpg_auto_prepare(), where the code order is switched so as we handle failures with ecpg_strdup() before manipulating any cached data, avoiding inconsistencies. This class of failure is unlikely a problem in practice, so no backpatch is done. Random OOM failures in ECPGconnect() could cause the driver to connect to a different server than the one wanted by the caller, because it could fallback to default values instead of the parameters defined depending on the combinations of allocation failures and successes. Author: Evgeniy Gorbanev Co-authored-by: Aleksander Alekseev Reviewed-by: Álvaro Herrera Reviewed-by: Michael Paquier Discussion: https://postgr.es/m/a6b193c1-6994-4d9c-9059-aca4aaf41ddd@basealt.ru --- src/interfaces/ecpg/ecpglib/connect.c | 46 ++++++++++++------- src/interfaces/ecpg/ecpglib/descriptor.c | 12 ++++- src/interfaces/ecpg/ecpglib/ecpglib_extern.h | 2 +- src/interfaces/ecpg/ecpglib/execute.c | 42 ++++++++++++----- src/interfaces/ecpg/ecpglib/memory.c | 11 ++++- src/interfaces/ecpg/ecpglib/prepare.c | 47 ++++++++++++++++---- 6 files changed, 119 insertions(+), 41 deletions(-) diff --git a/src/interfaces/ecpg/ecpglib/connect.c b/src/interfaces/ecpg/ecpglib/connect.c index 713cbbf6360be..78de9f298ba62 100644 --- a/src/interfaces/ecpg/ecpglib/connect.c +++ b/src/interfaces/ecpg/ecpglib/connect.c @@ -264,7 +264,8 @@ ECPGconnect(int lineno, int c, const char *name, const char *user, const char *p struct connection *this; int i, connect_params = 0; - char *dbname = name ? ecpg_strdup(name, lineno) : NULL, + bool alloc_failed = (sqlca == NULL); + char *dbname = name ? ecpg_strdup(name, lineno, &alloc_failed) : NULL, *host = NULL, *tmp, *port = NULL, @@ -273,11 +274,12 @@ ECPGconnect(int lineno, int c, const char *name, const char *user, const char *p const char **conn_keywords; const char **conn_values; - if (sqlca == NULL) + if (alloc_failed) { ecpg_raise(lineno, ECPG_OUT_OF_MEMORY, ECPG_SQLSTATE_ECPG_OUT_OF_MEMORY, NULL); - ecpg_free(dbname); + if (dbname) + ecpg_free(dbname); return false; } @@ -302,7 +304,7 @@ ECPGconnect(int lineno, int c, const char *name, const char *user, const char *p if (envname) { ecpg_free(dbname); - dbname = ecpg_strdup(envname, lineno); + dbname = ecpg_strdup(envname, lineno, &alloc_failed); } } @@ -354,7 +356,7 @@ ECPGconnect(int lineno, int c, const char *name, const char *user, const char *p tmp = strrchr(dbname + offset, '?'); if (tmp != NULL) /* options given */ { - options = ecpg_strdup(tmp + 1, lineno); + options = ecpg_strdup(tmp + 1, lineno, &alloc_failed); *tmp = '\0'; } @@ -363,7 +365,7 @@ ECPGconnect(int lineno, int c, const char *name, const char *user, const char *p { if (tmp[1] != '\0') /* non-empty database name */ { - realname = ecpg_strdup(tmp + 1, lineno); + realname = ecpg_strdup(tmp + 1, lineno, &alloc_failed); connect_params++; } *tmp = '\0'; @@ -373,7 +375,7 @@ ECPGconnect(int lineno, int c, const char *name, const char *user, const char *p if (tmp != NULL) /* port number given */ { *tmp = '\0'; - port = ecpg_strdup(tmp + 1, lineno); + port = ecpg_strdup(tmp + 1, lineno, &alloc_failed); connect_params++; } @@ -407,7 +409,7 @@ ECPGconnect(int lineno, int c, const char *name, const char *user, const char *p { if (*(dbname + offset) != '\0') { - host = ecpg_strdup(dbname + offset, lineno); + host = ecpg_strdup(dbname + offset, lineno, &alloc_failed); connect_params++; } } @@ -419,7 +421,7 @@ ECPGconnect(int lineno, int c, const char *name, const char *user, const char *p tmp = strrchr(dbname, ':'); if (tmp != NULL) /* port number given */ { - port = ecpg_strdup(tmp + 1, lineno); + port = ecpg_strdup(tmp + 1, lineno, &alloc_failed); connect_params++; *tmp = '\0'; } @@ -427,14 +429,14 @@ ECPGconnect(int lineno, int c, const char *name, const char *user, const char *p tmp = strrchr(dbname, '@'); if (tmp != NULL) /* host name given */ { - host = ecpg_strdup(tmp + 1, lineno); + host = ecpg_strdup(tmp + 1, lineno, &alloc_failed); connect_params++; *tmp = '\0'; } if (strlen(dbname) > 0) { - realname = ecpg_strdup(dbname, lineno); + realname = ecpg_strdup(dbname, lineno, &alloc_failed); connect_params++; } else @@ -465,7 +467,18 @@ ECPGconnect(int lineno, int c, const char *name, const char *user, const char *p */ conn_keywords = (const char **) ecpg_alloc((connect_params + 1) * sizeof(char *), lineno); conn_values = (const char **) ecpg_alloc(connect_params * sizeof(char *), lineno); - if (conn_keywords == NULL || conn_values == NULL) + + /* Decide on a connection name */ + if (connection_name != NULL || realname != NULL) + { + this->name = ecpg_strdup(connection_name ? connection_name : realname, + lineno, &alloc_failed); + } + else + this->name = NULL; + + /* Deal with any failed allocations above */ + if (conn_keywords == NULL || conn_values == NULL || alloc_failed) { if (host) ecpg_free(host); @@ -481,6 +494,8 @@ ECPGconnect(int lineno, int c, const char *name, const char *user, const char *p ecpg_free(conn_keywords); if (conn_values) ecpg_free(conn_values); + if (this->name) + ecpg_free(this->name); free(this); return false; } @@ -515,17 +530,14 @@ ECPGconnect(int lineno, int c, const char *name, const char *user, const char *p ecpg_free(conn_keywords); if (conn_values) ecpg_free(conn_values); + if (this->name) + ecpg_free(this->name); free(this); return false; } } #endif - if (connection_name != NULL) - this->name = ecpg_strdup(connection_name, lineno); - else - this->name = ecpg_strdup(realname, lineno); - this->cache_head = NULL; this->prep_stmts = NULL; diff --git a/src/interfaces/ecpg/ecpglib/descriptor.c b/src/interfaces/ecpg/ecpglib/descriptor.c index 651d5c8b2ed3c..466428edfebee 100644 --- a/src/interfaces/ecpg/ecpglib/descriptor.c +++ b/src/interfaces/ecpg/ecpglib/descriptor.c @@ -240,8 +240,9 @@ ECPGget_desc(int lineno, const char *desc_name, int index,...) act_tuple; struct variable data_var; struct sqlca_t *sqlca = ECPGget_sqlca(); + bool alloc_failed = (sqlca == NULL); - if (sqlca == NULL) + if (alloc_failed) { ecpg_raise(lineno, ECPG_OUT_OF_MEMORY, ECPG_SQLSTATE_ECPG_OUT_OF_MEMORY, NULL); @@ -493,7 +494,14 @@ ECPGget_desc(int lineno, const char *desc_name, int index,...) #ifdef WIN32 stmt.oldthreadlocale = _configthreadlocale(_ENABLE_PER_THREAD_LOCALE); #endif - stmt.oldlocale = ecpg_strdup(setlocale(LC_NUMERIC, NULL), lineno); + stmt.oldlocale = ecpg_strdup(setlocale(LC_NUMERIC, NULL), + lineno, &alloc_failed); + if (alloc_failed) + { + va_end(args); + return false; + } + setlocale(LC_NUMERIC, "C"); #endif diff --git a/src/interfaces/ecpg/ecpglib/ecpglib_extern.h b/src/interfaces/ecpg/ecpglib/ecpglib_extern.h index 75cc68275bdac..949ff66cefc92 100644 --- a/src/interfaces/ecpg/ecpglib/ecpglib_extern.h +++ b/src/interfaces/ecpg/ecpglib/ecpglib_extern.h @@ -175,7 +175,7 @@ void ecpg_free(void *ptr); bool ecpg_init(const struct connection *con, const char *connection_name, const int lineno); -char *ecpg_strdup(const char *string, int lineno); +char *ecpg_strdup(const char *string, int lineno, bool *alloc_failed); const char *ecpg_type_name(enum ECPGttype typ); int ecpg_dynamic_type(Oid type); int sqlda_dynamic_type(Oid type, enum COMPAT_MODE compat); diff --git a/src/interfaces/ecpg/ecpglib/execute.c b/src/interfaces/ecpg/ecpglib/execute.c index f52da06de9a1d..84a4a9fc5781f 100644 --- a/src/interfaces/ecpg/ecpglib/execute.c +++ b/src/interfaces/ecpg/ecpglib/execute.c @@ -860,9 +860,9 @@ ecpg_store_input(const int lineno, const bool force_indicator, const struct vari numeric *nval; if (var->arrsize > 1) - mallocedval = ecpg_strdup("{", lineno); + mallocedval = ecpg_strdup("{", lineno, NULL); else - mallocedval = ecpg_strdup("", lineno); + mallocedval = ecpg_strdup("", lineno, NULL); if (!mallocedval) return false; @@ -923,9 +923,9 @@ ecpg_store_input(const int lineno, const bool force_indicator, const struct vari int slen; if (var->arrsize > 1) - mallocedval = ecpg_strdup("{", lineno); + mallocedval = ecpg_strdup("{", lineno, NULL); else - mallocedval = ecpg_strdup("", lineno); + mallocedval = ecpg_strdup("", lineno, NULL); if (!mallocedval) return false; @@ -970,9 +970,9 @@ ecpg_store_input(const int lineno, const bool force_indicator, const struct vari int slen; if (var->arrsize > 1) - mallocedval = ecpg_strdup("{", lineno); + mallocedval = ecpg_strdup("{", lineno, NULL); else - mallocedval = ecpg_strdup("", lineno); + mallocedval = ecpg_strdup("", lineno, NULL); if (!mallocedval) return false; @@ -1017,9 +1017,9 @@ ecpg_store_input(const int lineno, const bool force_indicator, const struct vari int slen; if (var->arrsize > 1) - mallocedval = ecpg_strdup("{", lineno); + mallocedval = ecpg_strdup("{", lineno, NULL); else - mallocedval = ecpg_strdup("", lineno); + mallocedval = ecpg_strdup("", lineno, NULL); if (!mallocedval) return false; @@ -2001,7 +2001,8 @@ ecpg_do_prologue(int lineno, const int compat, const int force_indicator, return false; } #endif - stmt->oldlocale = ecpg_strdup(setlocale(LC_NUMERIC, NULL), lineno); + stmt->oldlocale = ecpg_strdup(setlocale(LC_NUMERIC, NULL), lineno, + NULL); if (stmt->oldlocale == NULL) { ecpg_do_epilogue(stmt); @@ -2030,7 +2031,14 @@ ecpg_do_prologue(int lineno, const int compat, const int force_indicator, statement_type = ECPGst_execute; } else - stmt->command = ecpg_strdup(query, lineno); + { + stmt->command = ecpg_strdup(query, lineno, NULL); + if (!stmt->command) + { + ecpg_do_epilogue(stmt); + return false; + } + } stmt->name = NULL; @@ -2042,7 +2050,12 @@ ecpg_do_prologue(int lineno, const int compat, const int force_indicator, if (command) { stmt->name = stmt->command; - stmt->command = ecpg_strdup(command, lineno); + stmt->command = ecpg_strdup(command, lineno, NULL); + if (!stmt->command) + { + ecpg_do_epilogue(stmt); + return false; + } } else { @@ -2175,7 +2188,12 @@ ecpg_do_prologue(int lineno, const int compat, const int force_indicator, if (!is_prepared_name_set && stmt->statement_type == ECPGst_prepare) { - stmt->name = ecpg_strdup(var->value, lineno); + stmt->name = ecpg_strdup(var->value, lineno, NULL); + if (!stmt->name) + { + ecpg_do_epilogue(stmt); + return false; + } is_prepared_name_set = true; } } diff --git a/src/interfaces/ecpg/ecpglib/memory.c b/src/interfaces/ecpg/ecpglib/memory.c index 6979be2c988ac..2112e55b6e42c 100644 --- a/src/interfaces/ecpg/ecpglib/memory.c +++ b/src/interfaces/ecpg/ecpglib/memory.c @@ -43,8 +43,15 @@ ecpg_realloc(void *ptr, long size, int lineno) return new; } +/* + * Wrapper for strdup(), with NULL in input treated as a correct case. + * + * "alloc_failed" can be optionally specified by the caller to check for + * allocation failures. The caller is responsible for its initialization, + * as ecpg_strdup() may be called repeatedly across multiple allocations. + */ char * -ecpg_strdup(const char *string, int lineno) +ecpg_strdup(const char *string, int lineno, bool *alloc_failed) { char *new; @@ -54,6 +61,8 @@ ecpg_strdup(const char *string, int lineno) new = strdup(string); if (!new) { + if (alloc_failed) + *alloc_failed = true; ecpg_raise(lineno, ECPG_OUT_OF_MEMORY, ECPG_SQLSTATE_ECPG_OUT_OF_MEMORY, NULL); return NULL; } diff --git a/src/interfaces/ecpg/ecpglib/prepare.c b/src/interfaces/ecpg/ecpglib/prepare.c index ea1146f520f34..dd6fd1fe7f407 100644 --- a/src/interfaces/ecpg/ecpglib/prepare.c +++ b/src/interfaces/ecpg/ecpglib/prepare.c @@ -85,9 +85,22 @@ ecpg_register_prepared_stmt(struct statement *stmt) /* create statement */ prep_stmt->lineno = lineno; prep_stmt->connection = con; - prep_stmt->command = ecpg_strdup(stmt->command, lineno); + prep_stmt->command = ecpg_strdup(stmt->command, lineno, NULL); + if (!prep_stmt->command) + { + ecpg_free(prep_stmt); + ecpg_free(this); + return false; + } prep_stmt->inlist = prep_stmt->outlist = NULL; - this->name = ecpg_strdup(stmt->name, lineno); + this->name = ecpg_strdup(stmt->name, lineno, NULL); + if (!this->name) + { + ecpg_free(prep_stmt->command); + ecpg_free(prep_stmt); + ecpg_free(this); + return false; + } this->stmt = prep_stmt; this->prepared = true; @@ -177,14 +190,27 @@ prepare_common(int lineno, struct connection *con, const char *name, const char /* create statement */ stmt->lineno = lineno; stmt->connection = con; - stmt->command = ecpg_strdup(variable, lineno); + stmt->command = ecpg_strdup(variable, lineno, NULL); + if (!stmt->command) + { + ecpg_free(stmt); + ecpg_free(this); + return false; + } stmt->inlist = stmt->outlist = NULL; /* if we have C variables in our statement replace them with '?' */ replace_variables(&(stmt->command), lineno); /* add prepared statement to our list */ - this->name = ecpg_strdup(name, lineno); + this->name = ecpg_strdup(name, lineno, NULL); + if (!this->name) + { + ecpg_free(stmt->command); + ecpg_free(stmt); + ecpg_free(this); + return false; + } this->stmt = stmt; /* and finally really prepare the statement */ @@ -540,7 +566,9 @@ AddStmtToCache(int lineno, /* line # of statement */ /* add the query to the entry */ entry = &stmtCacheEntries[entNo]; entry->lineno = lineno; - entry->ecpgQuery = ecpg_strdup(ecpgQuery, lineno); + entry->ecpgQuery = ecpg_strdup(ecpgQuery, lineno, NULL); + if (!entry->ecpgQuery) + return -1; entry->connection = connection; entry->execs = 0; memcpy(entry->stmtID, stmtID, sizeof(entry->stmtID)); @@ -567,6 +595,9 @@ ecpg_auto_prepare(int lineno, const char *connection_name, const int compat, cha ecpg_log("ecpg_auto_prepare on line %d: statement found in cache; entry %d\n", lineno, entNo); stmtID = stmtCacheEntries[entNo].stmtID; + *name = ecpg_strdup(stmtID, lineno, NULL); + if (*name == NULL) + return false; con = ecpg_get_connection(connection_name); prep = ecpg_find_prepared_statement(stmtID, con, NULL); @@ -574,7 +605,6 @@ ecpg_auto_prepare(int lineno, const char *connection_name, const int compat, cha if (!prep && !prepare_common(lineno, con, stmtID, query)) return false; - *name = ecpg_strdup(stmtID, lineno); } else { @@ -584,6 +614,9 @@ ecpg_auto_prepare(int lineno, const char *connection_name, const int compat, cha /* generate a statement ID */ sprintf(stmtID, "ecpg%d", nextStmtID++); + *name = ecpg_strdup(stmtID, lineno, NULL); + if (*name == NULL) + return false; if (!ECPGprepare(lineno, connection_name, 0, stmtID, query)) return false; @@ -591,8 +624,6 @@ ecpg_auto_prepare(int lineno, const char *connection_name, const int compat, cha entNo = AddStmtToCache(lineno, stmtID, connection_name, compat, query); if (entNo < 0) return false; - - *name = ecpg_strdup(stmtID, lineno); } /* increase usage counter */ From 039f7ee0fe9a25cf26d915d4b68091e080c6c6ec Mon Sep 17 00:00:00 2001 From: David Rowley Date: Wed, 23 Jul 2025 12:02:55 +1200 Subject: [PATCH 356/602] Use strchr instead of strstr for single-char lookups Compilers such as gcc and clang seem to perform this rewrite automatically when the lookup string is known at compile-time to contain a single character. The MSVC compiler does not seem apply the same optimization, and the code being adjusted here is within an #ifdef WIN32, so it seems worth adjusting this with the assumption that strchr() will be slightly more performant. There are a couple more instances in contrib/fuzzystrmatch that this commit could also have adjusted. After some discussion, we deemed those not important enough to bother with. Author: Dmitry Mityugov Reviewed-by: Corey Huinker Reviewed-by: David Rowley Discussion: https://postgr.es/m/9c1beea6c7a5e9fb6677f26620f1f257%40postgrespro.ru --- src/port/pgmkdirp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/port/pgmkdirp.c b/src/port/pgmkdirp.c index d943559760d89..7d7cea4dd0ea1 100644 --- a/src/port/pgmkdirp.c +++ b/src/port/pgmkdirp.c @@ -73,7 +73,7 @@ pg_mkdir_p(char *path, int omode) if (p[0] == '/' && p[1] == '/') { /* network drive */ - p = strstr(p + 2, "/"); + p = strchr(p + 2, '/'); if (p == NULL) { errno = EINVAL; From 228c3708685542d34e6f02c74240656327a5c622 Mon Sep 17 00:00:00 2001 From: Amit Kapila Date: Wed, 23 Jul 2025 02:56:00 +0000 Subject: [PATCH 357/602] Preserve conflict-relevant data during logical replication. Logical replication requires reliable conflict detection to maintain data consistency across nodes. To achieve this, we must prevent premature removal of tuples deleted by other origins and their associated commit_ts data by VACUUM, which could otherwise lead to incorrect conflict reporting and resolution. This patch introduces a mechanism to retain deleted tuples on the subscriber during the application of concurrent transactions from remote nodes. Retaining these tuples allows us to correctly ignore concurrent updates to the same tuple. Without this, an UPDATE might be misinterpreted as an INSERT during resolutions due to the absence of the original tuple. Additionally, we ensure that origin metadata is not prematurely removed by vacuum freeze, which is essential for detecting update_origin_differs and delete_origin_differs conflicts. To support this, a new replication slot named pg_conflict_detection is created and maintained by the launcher on the subscriber. Each apply worker tracks its own non-removable transaction ID, which the launcher aggregates to determine the appropriate xmin for the slot, thereby retaining necessary tuples. Conflict information retention (deleted tuples and commit_ts) can be enabled per subscription via the retain_conflict_info option. This is disabled by default to avoid unnecessary overhead for configurations that do not require conflict resolution or logging. During upgrades, if any subscription on the old cluster has retain_conflict_info enabled, a conflict detection slot will be created to protect relevant tuples from deletion when the new cluster starts. This is a foundational work to correctly detect update_deleted conflict which will be done in a follow-up patch. Author: Zhijie Hou Reviewed-by: shveta malik Reviewed-by: Masahiko Sawada Reviewed-by: Dilip Kumar Reviewed-by: Nisha Moond Reviewed-by: Amit Kapila Discussion: https://postgr.es/m/OS0PR01MB5716BE80DAEB0EE2A6A5D1F5949D2@OS0PR01MB5716.jpnprd01.prod.outlook.com --- doc/src/sgml/catalogs.sgml | 11 + doc/src/sgml/config.sgml | 2 + doc/src/sgml/func.sgml | 16 +- doc/src/sgml/logical-replication.sgml | 32 + doc/src/sgml/protocol.sgml | 88 +++ doc/src/sgml/ref/alter_subscription.sgml | 18 +- doc/src/sgml/ref/create_subscription.sgml | 87 ++- src/backend/access/transam/twophase.c | 32 +- src/backend/access/transam/xact.c | 18 +- src/backend/access/transam/xlog.c | 2 +- src/backend/access/transam/xlogrecovery.c | 2 +- src/backend/catalog/pg_subscription.c | 1 + src/backend/catalog/system_views.sql | 3 +- src/backend/commands/subscriptioncmds.c | 400 +++++++++-- .../replication/logical/applyparallelworker.c | 3 +- src/backend/replication/logical/launcher.c | 228 ++++++- .../replication/logical/reorderbuffer.c | 2 +- src/backend/replication/logical/tablesync.c | 3 +- src/backend/replication/logical/worker.c | 623 +++++++++++++++++- src/backend/replication/slot.c | 48 +- src/backend/replication/walsender.c | 60 ++ src/backend/storage/ipc/procarray.c | 20 +- src/backend/utils/adt/pg_upgrade_support.c | 19 + src/bin/pg_dump/pg_dump.c | 18 +- src/bin/pg_dump/pg_dump.h | 1 + src/bin/pg_upgrade/check.c | 96 ++- src/bin/pg_upgrade/info.c | 25 +- src/bin/pg_upgrade/pg_upgrade.c | 60 +- src/bin/pg_upgrade/pg_upgrade.h | 4 +- src/bin/pg_upgrade/t/004_subscription.pl | 85 ++- src/bin/psql/describe.c | 6 +- src/bin/psql/tab-complete.in.c | 10 +- src/include/catalog/catversion.h | 2 +- src/include/catalog/pg_proc.dat | 4 + src/include/catalog/pg_subscription.h | 5 + src/include/commands/subscriptioncmds.h | 5 + src/include/replication/logicallauncher.h | 3 + src/include/replication/slot.h | 11 +- src/include/replication/worker_internal.h | 13 +- src/include/storage/proc.h | 8 + src/include/storage/procarray.h | 3 +- src/test/regress/expected/subscription.out | 168 ++--- src/test/regress/sql/subscription.sql | 11 + src/test/subscription/t/035_conflicts.pl | 195 +++++- src/tools/pgindent/typedefs.list | 2 + 45 files changed, 2233 insertions(+), 220 deletions(-) diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index 0d23bc1b122b6..97f547b3cc4b2 100644 --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -8082,6 +8082,17 @@ SCRAM-SHA-256$<iteration count>:&l + + + subretaindeadtuples bool + + + If true, the information (e.g., dead tuples, commit timestamps, and + origins) on the subscriber that is useful for conflict detection is + retained. + + + subconninfo text diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index c7acc0f182f3e..20ccb2d6b5447 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -4965,6 +4965,8 @@ ANY num_sync ( + + max_replication_slots + must be set to at least 1 when retain_dead_tuples + is enabled for any subscription. + + max_logical_replication_workers must be set to at least the number of subscriptions (for leader apply @@ -2532,6 +2538,22 @@ CONTEXT: processing remote data for replication origin "pg_16395" during "INSER dependencies on clusters before version 17.0 will silently be ignored. + + + Commit timestamps and origin data are not preserved during the upgrade. + As a result, even if + retain_dead_tuples + is enabled, the upgraded subscriber may be unable to detect conflicts or + log relevant commit timestamps and origins when applying changes from the + publisher occurred before the upgrade. Additionally, immediately after the + upgrade, the vacuum may remove the deleted rows that are required for + conflict detection. This can affect the changes that were not replicated + before the upgrade. To ensure consistent conflict tracking, users should + ensure that all potentially conflicting changes are replicated to the + subscriber before initiating the upgrade. + + + There are some prerequisites for pg_upgrade to be able to upgrade the subscriptions. If these are not met an error @@ -2563,6 +2585,16 @@ CONTEXT: processing remote data for replication origin "pg_16395" during "INSER subscriptions present in the old cluster. + + + If there are subscriptions with retain_dead_tuples enabled, the reserved + replication slot pg_conflict_detection + must not exist on the new cluster. Additionally, the + wal_level on the + new cluster must be set to replica or + logical. + + diff --git a/doc/src/sgml/protocol.sgml b/doc/src/sgml/protocol.sgml index e74b5be1effdd..b115884acb346 100644 --- a/doc/src/sgml/protocol.sgml +++ b/doc/src/sgml/protocol.sgml @@ -2235,6 +2235,8 @@ psql "dbname=postgres replication=database" -c "IDENTIFY_SYSTEM;" The name of the slot to create. Must be a valid replication slot name (see ). + The name cannot be pg_conflict_detection as it + is reserved for the conflict detection. @@ -2653,6 +2655,65 @@ psql "dbname=postgres replication=database" -c "IDENTIFY_SYSTEM;" + + + Primary status update (B) + + + + Byte1('s') + + + Identifies the message as a primary status update. + + + + + + Int64 + + + The latest WAL write position on the server. + + + + + + Int64 + + + The oldest transaction ID that is currently in the commit phase on + the server, along with its epoch. The most significant 32 bits are + the epoch. The least significant 32 bits are the transaction ID. + If no transactions are active on the server, this number will be + the next transaction ID to be assigned. + + + + + + Int64 + + + The next transaction ID to be assigned on the server, along with + its epoch. The most significant 32 bits are the epoch. The least + significant 32 bits are the transaction ID. + + + + + + Int64 + + + The server's system clock at the time of transmission, as + microseconds since midnight on 2000-01-01. + + + + + + @@ -2797,6 +2858,33 @@ psql "dbname=postgres replication=database" -c "IDENTIFY_SYSTEM;" + + + Request primary status update (F) + + + + Byte1('p') + + + Identifies the message as a request for a primary status update. + + + + + + Int64 + + + The client's system clock at the time of transmission, as + microseconds since midnight on 2000-01-01. + + + + + + + diff --git a/doc/src/sgml/ref/alter_subscription.sgml b/doc/src/sgml/ref/alter_subscription.sgml index fdc648d007f1c..d48cdc76bd34d 100644 --- a/doc/src/sgml/ref/alter_subscription.sgml +++ b/doc/src/sgml/ref/alter_subscription.sgml @@ -235,8 +235,9 @@ ALTER SUBSCRIPTION name RENAME TO < password_required, run_as_owner, origin, - failover, and - two_phase. + failover, + two_phase, and + retain_dead_tuples. Only a superuser can set password_required = false. @@ -261,8 +262,9 @@ ALTER SUBSCRIPTION name RENAME TO < - The failover - and two_phase + The failover, + two_phase, and + retain_dead_tuples parameters can only be altered when the subscription is disabled. @@ -285,6 +287,14 @@ ALTER SUBSCRIPTION name RENAME TO < option is changed from true to false, the publisher will replicate the transactions again when they are committed. + + + If the retain_dead_tuples + option is altered to false and no other subscription + has this option enabled, the replication slot named + pg_conflict_detection, created to retain + dead tuples for conflict detection, will be dropped. + diff --git a/doc/src/sgml/ref/create_subscription.sgml b/doc/src/sgml/ref/create_subscription.sgml index 57dec28a5df64..b8cd15f32806b 100644 --- a/doc/src/sgml/ref/create_subscription.sgml +++ b/doc/src/sgml/ref/create_subscription.sgml @@ -169,7 +169,9 @@ CREATE SUBSCRIPTION subscription_name Name of the publisher's replication slot to use. The default is - to use the name of the subscription for the slot name. + to use the name of the subscription for the slot name. The name cannot + be pg_conflict_detection as it is reserved for the + conflict detection. @@ -435,6 +437,89 @@ CREATE SUBSCRIPTION subscription_name + + + retain_dead_tuples (boolean) + + + Specifies whether the information (e.g., dead tuples, commit + timestamps, and origins) required for conflict detection on the + subscriber is retained. The default is false. + If set to true, a physical replication slot named + pg_conflict_detection will be + created on the subscriber to prevent the conflict information from + being removed. + + + + Note that the information useful for conflict detection is retained + only after the creation of the slot. You can verify the existence of + this slot by querying pg_replication_slots. + And even if multiple subscriptions on one node enable this option, + only one replication slot will be created. Also, + wal_level must be set to replica + or higher to allow the replication slot to be used. + + + + + Note that the information for conflict detection cannot be purged if + the subscription is disabled; thus, the information will accumulate + until the subscription is enabled. To prevent excessive accumulation, + it is recommended to disable retain_dead_tuples + if the subscription will be inactive for an extended period. + + + + Additionally when enabling retain_dead_tuples for + conflict detection in logical replication, it is important to design the + replication topology to balance data retention requirements with + overall system performance. This option provides minimal performance + overhead when applied appropriately. The following scenarios illustrate + effective usage patterns when enabling this option. + + + + a. Large Tables with Bidirectional Writes: + For large tables subject to concurrent writes on both publisher and + subscriber nodes, publishers can define row filters when creating + publications to segment data. This allows multiple subscriptions + to replicate exclusive subsets of the table in parallel, optimizing + the throughput. + + + + b. Write-Enabled Subscribers: + If a subscriber node is expected to perform write operations, replication + can be structured using multiple publications and subscriptions. By + distributing tables across these publications, the workload is spread among + several apply workers, improving concurrency and reducing contention. + + + + c. Read-Only Subscribers: + In configurations involving single or multiple publisher nodes + performing concurrent write operations, read-only subscriber nodes may + replicate changes without seeing a performance impact if it does index + scan. However, if the subscriber is impacted due to replication lag or + scan performance (say due to sequential scans), it needs to follow one + of the two previous strategies to distribute the workload on the + subscriber. + + + + + This option cannot be enabled if the publisher is a physical standby. + + + + Enabling this option ensures retention of information useful for + conflict detection solely for changes occurring locally on the + publisher. For the changes originating from different origins, + reliable conflict detection cannot be guaranteed. + + + diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c index 85cbe397cb21a..7918176fc588e 100644 --- a/src/backend/access/transam/twophase.c +++ b/src/backend/access/transam/twophase.c @@ -1183,7 +1183,11 @@ EndPrepare(GlobalTransaction gxact) * starting immediately after the WAL record is inserted could complete * without fsync'ing our state file. (This is essentially the same kind * of race condition as the COMMIT-to-clog-write case that - * RecordTransactionCommit uses DELAY_CHKPT_START for; see notes there.) + * RecordTransactionCommit uses DELAY_CHKPT_IN_COMMIT for; see notes + * there.) Note that DELAY_CHKPT_IN_COMMIT is used to find transactions in + * the critical commit section. We need to know about such transactions + * for conflict detection in logical replication. See + * GetOldestActiveTransactionId(true, false) and its use. * * We save the PREPARE record's location in the gxact for later use by * CheckPointTwoPhase. @@ -2298,7 +2302,7 @@ ProcessTwoPhaseBuffer(FullTransactionId fxid, * RecordTransactionCommitPrepared * * This is basically the same as RecordTransactionCommit (q.v. if you change - * this function): in particular, we must set DELAY_CHKPT_START to avoid a + * this function): in particular, we must set DELAY_CHKPT_IN_COMMIT to avoid a * race condition. * * We know the transaction made at least one XLOG entry (its PREPARE), @@ -2318,7 +2322,7 @@ RecordTransactionCommitPrepared(TransactionId xid, const char *gid) { XLogRecPtr recptr; - TimestampTz committs = GetCurrentTimestamp(); + TimestampTz committs; bool replorigin; /* @@ -2331,8 +2335,24 @@ RecordTransactionCommitPrepared(TransactionId xid, START_CRIT_SECTION(); /* See notes in RecordTransactionCommit */ - Assert((MyProc->delayChkptFlags & DELAY_CHKPT_START) == 0); - MyProc->delayChkptFlags |= DELAY_CHKPT_START; + Assert((MyProc->delayChkptFlags & DELAY_CHKPT_IN_COMMIT) == 0); + MyProc->delayChkptFlags |= DELAY_CHKPT_IN_COMMIT; + + /* + * Ensures the DELAY_CHKPT_IN_COMMIT flag write is globally visible before + * commit time is written. + */ + pg_write_barrier(); + + /* + * Note it is important to set committs value after marking ourselves as + * in the commit critical section (DELAY_CHKPT_IN_COMMIT). This is because + * we want to ensure all transactions that have acquired commit timestamp + * are finished before we allow the logical replication client to advance + * its xid which is used to hold back dead rows for conflict detection. + * See comments atop worker.c. + */ + committs = GetCurrentTimestamp(); /* * Emit the XLOG commit record. Note that we mark 2PC commits as @@ -2381,7 +2401,7 @@ RecordTransactionCommitPrepared(TransactionId xid, TransactionIdCommitTree(xid, nchildren, children); /* Checkpoint can proceed now */ - MyProc->delayChkptFlags &= ~DELAY_CHKPT_START; + MyProc->delayChkptFlags &= ~DELAY_CHKPT_IN_COMMIT; END_CRIT_SECTION(); diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index 41601fcb2803e..b46e7e9c2a6b0 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -1431,10 +1431,22 @@ RecordTransactionCommit(void) * without holding the ProcArrayLock, since we're the only one * modifying it. This makes checkpoint's determination of which xacts * are delaying the checkpoint a bit fuzzy, but it doesn't matter. + * + * Note, it is important to get the commit timestamp after marking the + * transaction in the commit critical section. See + * RecordTransactionCommitPrepared. */ - Assert((MyProc->delayChkptFlags & DELAY_CHKPT_START) == 0); + Assert((MyProc->delayChkptFlags & DELAY_CHKPT_IN_COMMIT) == 0); START_CRIT_SECTION(); - MyProc->delayChkptFlags |= DELAY_CHKPT_START; + MyProc->delayChkptFlags |= DELAY_CHKPT_IN_COMMIT; + + Assert(xactStopTimestamp == 0); + + /* + * Ensures the DELAY_CHKPT_IN_COMMIT flag write is globally visible + * before commit time is written. + */ + pg_write_barrier(); /* * Insert the commit XLOG record. @@ -1537,7 +1549,7 @@ RecordTransactionCommit(void) */ if (markXidCommitted) { - MyProc->delayChkptFlags &= ~DELAY_CHKPT_START; + MyProc->delayChkptFlags &= ~DELAY_CHKPT_IN_COMMIT; END_CRIT_SECTION(); } diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 8e7827c6ed92c..eefffc4277a1a 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -7121,7 +7121,7 @@ CreateCheckPoint(int flags) * starting snapshot of locks and transactions. */ if (!shutdown && XLogStandbyInfoActive()) - checkPoint.oldestActiveXid = GetOldestActiveTransactionId(); + checkPoint.oldestActiveXid = GetOldestActiveTransactionId(false, true); else checkPoint.oldestActiveXid = InvalidTransactionId; diff --git a/src/backend/access/transam/xlogrecovery.c b/src/backend/access/transam/xlogrecovery.c index 23878b2dd9199..e8f3ba00caae7 100644 --- a/src/backend/access/transam/xlogrecovery.c +++ b/src/backend/access/transam/xlogrecovery.c @@ -4760,7 +4760,7 @@ bool check_primary_slot_name(char **newval, void **extra, GucSource source) { if (*newval && strcmp(*newval, "") != 0 && - !ReplicationSlotValidateName(*newval, WARNING)) + !ReplicationSlotValidateName(*newval, false, WARNING)) return false; return true; diff --git a/src/backend/catalog/pg_subscription.c b/src/backend/catalog/pg_subscription.c index 1395032413e3d..63c2992d19f75 100644 --- a/src/backend/catalog/pg_subscription.c +++ b/src/backend/catalog/pg_subscription.c @@ -103,6 +103,7 @@ GetSubscription(Oid subid, bool missing_ok) sub->passwordrequired = subform->subpasswordrequired; sub->runasowner = subform->subrunasowner; sub->failover = subform->subfailover; + sub->retaindeadtuples = subform->subretaindeadtuples; /* Get conninfo */ datum = SysCacheGetAttrNotNull(SUBSCRIPTIONOID, diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index b2d5332effc1b..f6eca09ee153a 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -1386,7 +1386,8 @@ REVOKE ALL ON pg_subscription FROM public; GRANT SELECT (oid, subdbid, subskiplsn, subname, subowner, subenabled, subbinary, substream, subtwophasestate, subdisableonerr, subpasswordrequired, subrunasowner, subfailover, - subslotname, subsynccommit, subpublications, suborigin) + subretaindeadtuples, subslotname, subsynccommit, + subpublications, suborigin) ON pg_subscription TO public; CREATE VIEW pg_stat_subscription_stats AS diff --git a/src/backend/commands/subscriptioncmds.c b/src/backend/commands/subscriptioncmds.c index e23b0de724299..cd6c3684482f9 100644 --- a/src/backend/commands/subscriptioncmds.c +++ b/src/backend/commands/subscriptioncmds.c @@ -14,6 +14,7 @@ #include "postgres.h" +#include "access/commit_ts.h" #include "access/htup_details.h" #include "access/table.h" #include "access/twophase.h" @@ -71,8 +72,9 @@ #define SUBOPT_PASSWORD_REQUIRED 0x00000800 #define SUBOPT_RUN_AS_OWNER 0x00001000 #define SUBOPT_FAILOVER 0x00002000 -#define SUBOPT_LSN 0x00004000 -#define SUBOPT_ORIGIN 0x00008000 +#define SUBOPT_RETAIN_DEAD_TUPLES 0x00004000 +#define SUBOPT_LSN 0x00008000 +#define SUBOPT_ORIGIN 0x00010000 /* check if the 'val' has 'bits' set */ #define IsSet(val, bits) (((val) & (bits)) == (bits)) @@ -98,6 +100,7 @@ typedef struct SubOpts bool passwordrequired; bool runasowner; bool failover; + bool retaindeadtuples; char *origin; XLogRecPtr lsn; } SubOpts; @@ -105,8 +108,10 @@ typedef struct SubOpts static List *fetch_table_list(WalReceiverConn *wrconn, List *publications); static void check_publications_origin(WalReceiverConn *wrconn, List *publications, bool copydata, - char *origin, Oid *subrel_local_oids, - int subrel_count, char *subname); + bool retain_dead_tuples, char *origin, + Oid *subrel_local_oids, int subrel_count, + char *subname); +static void check_pub_dead_tuple_retention(WalReceiverConn *wrconn); static void check_duplicates_in_publist(List *publist, Datum *datums); static List *merge_publications(List *oldpublist, List *newpublist, bool addpub, const char *subname); static void ReportSlotConnectionError(List *rstates, Oid subid, char *slotname, char *err); @@ -162,6 +167,8 @@ parse_subscription_options(ParseState *pstate, List *stmt_options, opts->runasowner = false; if (IsSet(supported_opts, SUBOPT_FAILOVER)) opts->failover = false; + if (IsSet(supported_opts, SUBOPT_RETAIN_DEAD_TUPLES)) + opts->retaindeadtuples = false; if (IsSet(supported_opts, SUBOPT_ORIGIN)) opts->origin = pstrdup(LOGICALREP_ORIGIN_ANY); @@ -210,7 +217,7 @@ parse_subscription_options(ParseState *pstate, List *stmt_options, if (strcmp(opts->slot_name, "none") == 0) opts->slot_name = NULL; else - ReplicationSlotValidateName(opts->slot_name, ERROR); + ReplicationSlotValidateName(opts->slot_name, false, ERROR); } else if (IsSet(supported_opts, SUBOPT_COPY_DATA) && strcmp(defel->defname, "copy_data") == 0) @@ -307,6 +314,15 @@ parse_subscription_options(ParseState *pstate, List *stmt_options, opts->specified_opts |= SUBOPT_FAILOVER; opts->failover = defGetBoolean(defel); } + else if (IsSet(supported_opts, SUBOPT_RETAIN_DEAD_TUPLES) && + strcmp(defel->defname, "retain_dead_tuples") == 0) + { + if (IsSet(opts->specified_opts, SUBOPT_RETAIN_DEAD_TUPLES)) + errorConflictingDefElem(defel, pstate); + + opts->specified_opts |= SUBOPT_RETAIN_DEAD_TUPLES; + opts->retaindeadtuples = defGetBoolean(defel); + } else if (IsSet(supported_opts, SUBOPT_ORIGIN) && strcmp(defel->defname, "origin") == 0) { @@ -563,7 +579,8 @@ CreateSubscription(ParseState *pstate, CreateSubscriptionStmt *stmt, SUBOPT_SYNCHRONOUS_COMMIT | SUBOPT_BINARY | SUBOPT_STREAMING | SUBOPT_TWOPHASE_COMMIT | SUBOPT_DISABLE_ON_ERR | SUBOPT_PASSWORD_REQUIRED | - SUBOPT_RUN_AS_OWNER | SUBOPT_FAILOVER | SUBOPT_ORIGIN); + SUBOPT_RUN_AS_OWNER | SUBOPT_FAILOVER | + SUBOPT_RETAIN_DEAD_TUPLES | SUBOPT_ORIGIN); parse_subscription_options(pstate, stmt->options, supported_opts, &opts); /* @@ -630,6 +647,10 @@ CreateSubscription(ParseState *pstate, CreateSubscriptionStmt *stmt, stmt->subname))); } + /* Ensure that we can enable retain_dead_tuples */ + if (opts.retaindeadtuples) + CheckSubDeadTupleRetention(true, !opts.enabled, WARNING); + if (!IsSet(opts.specified_opts, SUBOPT_SLOT_NAME) && opts.slot_name == NULL) opts.slot_name = stmt->subname; @@ -670,6 +691,8 @@ CreateSubscription(ParseState *pstate, CreateSubscriptionStmt *stmt, values[Anum_pg_subscription_subpasswordrequired - 1] = BoolGetDatum(opts.passwordrequired); values[Anum_pg_subscription_subrunasowner - 1] = BoolGetDatum(opts.runasowner); values[Anum_pg_subscription_subfailover - 1] = BoolGetDatum(opts.failover); + values[Anum_pg_subscription_subretaindeadtuples - 1] = + BoolGetDatum(opts.retaindeadtuples); values[Anum_pg_subscription_subconninfo - 1] = CStringGetTextDatum(conninfo); if (opts.slot_name) @@ -722,7 +745,11 @@ CreateSubscription(ParseState *pstate, CreateSubscriptionStmt *stmt, { check_publications(wrconn, publications); check_publications_origin(wrconn, publications, opts.copy_data, - opts.origin, NULL, 0, stmt->subname); + opts.retaindeadtuples, opts.origin, + NULL, 0, stmt->subname); + + if (opts.retaindeadtuples) + check_pub_dead_tuple_retention(wrconn); /* * Set sync state based on if we were asked to do data copy or @@ -881,8 +908,8 @@ AlterSubscription_refresh(Subscription *sub, bool copy_data, sizeof(Oid), oid_cmp); check_publications_origin(wrconn, sub->publications, copy_data, - sub->origin, subrel_local_oids, - subrel_count, sub->name); + sub->retaindeadtuples, sub->origin, + subrel_local_oids, subrel_count, sub->name); /* * Rels that we want to remove from subscription and drop any slots @@ -1040,18 +1067,22 @@ AlterSubscription_refresh(Subscription *sub, bool copy_data, } /* - * Common checks for altering failover and two_phase options. + * Common checks for altering failover, two_phase, and retain_dead_tuples + * options. */ static void CheckAlterSubOption(Subscription *sub, const char *option, bool slot_needs_update, bool isTopLevel) { + Assert(strcmp(option, "failover") == 0 || + strcmp(option, "two_phase") == 0 || + strcmp(option, "retain_dead_tuples") == 0); + /* - * The checks in this function are required only for failover and - * two_phase options. + * Altering the retain_dead_tuples option does not update the slot on the + * publisher. */ - Assert(strcmp(option, "failover") == 0 || - strcmp(option, "two_phase") == 0); + Assert(!slot_needs_update || strcmp(option, "retain_dead_tuples") != 0); /* * Do not allow changing the option if the subscription is enabled. This @@ -1063,6 +1094,39 @@ CheckAlterSubOption(Subscription *sub, const char *option, * the publisher by the existing walsender, so we could have allowed that * even when the subscription is enabled. But we kept this restriction for * the sake of consistency and simplicity. + * + * Additionally, do not allow changing the retain_dead_tuples option when + * the subscription is enabled to prevent race conditions arising from the + * new option value being acknowledged asynchronously by the launcher and + * apply workers. + * + * Without the restriction, a race condition may arise when a user + * disables and immediately re-enables the retain_dead_tuples option. In + * this case, the launcher might drop the slot upon noticing the disabled + * action, while the apply worker may keep maintaining + * oldest_nonremovable_xid without noticing the option change. During this + * period, a transaction ID wraparound could falsely make this ID appear + * as if it originates from the future w.r.t the transaction ID stored in + * the slot maintained by launcher. + * + * Similarly, if the user enables retain_dead_tuples concurrently with the + * launcher starting the worker, the apply worker may start calculating + * oldest_nonremovable_xid before the launcher notices the enable action. + * Consequently, the launcher may update slot.xmin to a newer value than + * that maintained by the worker. In subsequent cycles, upon integrating + * the worker's oldest_nonremovable_xid, the launcher might detect a + * retreat in the calculated xmin, necessitating additional handling. + * + * XXX To address the above race conditions, we can define + * oldest_nonremovable_xid as FullTransactionID and adds the check to + * disallow retreating the conflict slot's xmin. For now, we kept the + * implementation simple by disallowing change to the retain_dead_tuples, + * but in the future we can change this after some more analysis. + * + * Note that we could restrict only the enabling of retain_dead_tuples to + * avoid the race conditions described above, but we maintain the + * restriction for both enable and disable operations for the sake of + * consistency. */ if (sub->enabled) ereport(ERROR, @@ -1110,6 +1174,9 @@ AlterSubscription(ParseState *pstate, AlterSubscriptionStmt *stmt, bool update_tuple = false; bool update_failover = false; bool update_two_phase = false; + bool check_pub_rdt = false; + bool retain_dead_tuples; + char *origin; Subscription *sub; Form_pg_subscription form; bits32 supported_opts; @@ -1137,6 +1204,9 @@ AlterSubscription(ParseState *pstate, AlterSubscriptionStmt *stmt, sub = GetSubscription(subid, false); + retain_dead_tuples = sub->retaindeadtuples; + origin = sub->origin; + /* * Don't allow non-superuser modification of a subscription with * password_required=false. @@ -1165,7 +1235,7 @@ AlterSubscription(ParseState *pstate, AlterSubscriptionStmt *stmt, SUBOPT_DISABLE_ON_ERR | SUBOPT_PASSWORD_REQUIRED | SUBOPT_RUN_AS_OWNER | SUBOPT_FAILOVER | - SUBOPT_ORIGIN); + SUBOPT_RETAIN_DEAD_TUPLES | SUBOPT_ORIGIN); parse_subscription_options(pstate, stmt->options, supported_opts, &opts); @@ -1325,11 +1395,62 @@ AlterSubscription(ParseState *pstate, AlterSubscriptionStmt *stmt, replaces[Anum_pg_subscription_subfailover - 1] = true; } + if (IsSet(opts.specified_opts, SUBOPT_RETAIN_DEAD_TUPLES)) + { + values[Anum_pg_subscription_subretaindeadtuples - 1] = + BoolGetDatum(opts.retaindeadtuples); + replaces[Anum_pg_subscription_subretaindeadtuples - 1] = true; + + CheckAlterSubOption(sub, "retain_dead_tuples", false, isTopLevel); + + /* + * Workers may continue running even after the + * subscription has been disabled. + * + * To prevent race conditions (as described in + * CheckAlterSubOption()), ensure that all worker + * processes have already exited before proceeding. + */ + if (logicalrep_workers_find(subid, true, true)) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("cannot alter retain_dead_tuples when logical replication worker is still running"), + errhint("Try again after some time."))); + + /* + * Remind the user that enabling subscription will prevent + * the accumulation of dead tuples. + */ + if (opts.retaindeadtuples) + CheckSubDeadTupleRetention(true, !sub->enabled, NOTICE); + + /* + * Notify the launcher to manage the replication slot for + * conflict detection. This ensures that replication slot + * is efficiently handled (created, updated, or dropped) + * in response to any configuration changes. + */ + ApplyLauncherWakeupAtCommit(); + + check_pub_rdt = opts.retaindeadtuples; + retain_dead_tuples = opts.retaindeadtuples; + } + if (IsSet(opts.specified_opts, SUBOPT_ORIGIN)) { values[Anum_pg_subscription_suborigin - 1] = CStringGetTextDatum(opts.origin); replaces[Anum_pg_subscription_suborigin - 1] = true; + + /* + * Check if changes from different origins may be received + * from the publisher when the origin is changed to ANY + * and retain_dead_tuples is enabled. + */ + check_pub_rdt = retain_dead_tuples && + pg_strcasecmp(opts.origin, LOGICALREP_ORIGIN_ANY) == 0; + + origin = opts.origin; } update_tuple = true; @@ -1347,6 +1468,15 @@ AlterSubscription(ParseState *pstate, AlterSubscriptionStmt *stmt, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("cannot enable subscription that does not have a slot name"))); + /* + * Check track_commit_timestamp only when enabling the + * subscription in case it was disabled after creation. See + * comments atop CheckSubDeadTupleRetention() for details. + */ + if (sub->retaindeadtuples) + CheckSubDeadTupleRetention(opts.enabled, !opts.enabled, + WARNING); + values[Anum_pg_subscription_subenabled - 1] = BoolGetDatum(opts.enabled); replaces[Anum_pg_subscription_subenabled - 1] = true; @@ -1355,6 +1485,14 @@ AlterSubscription(ParseState *pstate, AlterSubscriptionStmt *stmt, ApplyLauncherWakeupAtCommit(); update_tuple = true; + + /* + * The subscription might be initially created with + * connect=false and retain_dead_tuples=true, meaning the + * remote server's status may not be checked. Ensure this + * check is conducted now. + */ + check_pub_rdt = sub->retaindeadtuples && opts.enabled; break; } @@ -1369,6 +1507,13 @@ AlterSubscription(ParseState *pstate, AlterSubscriptionStmt *stmt, CStringGetTextDatum(stmt->conninfo); replaces[Anum_pg_subscription_subconninfo - 1] = true; update_tuple = true; + + /* + * Since the remote server configuration might have changed, + * perform a check to ensure it permits enabling + * retain_dead_tuples. + */ + check_pub_rdt = sub->retaindeadtuples; break; case ALTER_SUBSCRIPTION_SET_PUBLICATION: @@ -1568,14 +1713,15 @@ AlterSubscription(ParseState *pstate, AlterSubscriptionStmt *stmt, } /* - * Try to acquire the connection necessary for altering the slot, if - * needed. + * Try to acquire the connection necessary either for modifying the slot + * or for checking if the remote server permits enabling + * retain_dead_tuples. * * This has to be at the end because otherwise if there is an error while * doing the database operations we won't be able to rollback altered * slot. */ - if (update_failover || update_two_phase) + if (update_failover || update_two_phase || check_pub_rdt) { bool must_use_password; char *err; @@ -1584,10 +1730,14 @@ AlterSubscription(ParseState *pstate, AlterSubscriptionStmt *stmt, /* Load the library providing us libpq calls. */ load_file("libpqwalreceiver", false); - /* Try to connect to the publisher. */ + /* + * Try to connect to the publisher, using the new connection string if + * available. + */ must_use_password = sub->passwordrequired && !sub->ownersuperuser; - wrconn = walrcv_connect(sub->conninfo, true, true, must_use_password, - sub->name, &err); + wrconn = walrcv_connect(stmt->conninfo ? stmt->conninfo : sub->conninfo, + true, true, must_use_password, sub->name, + &err); if (!wrconn) ereport(ERROR, (errcode(ERRCODE_CONNECTION_FAILURE), @@ -1596,9 +1746,17 @@ AlterSubscription(ParseState *pstate, AlterSubscriptionStmt *stmt, PG_TRY(); { - walrcv_alter_slot(wrconn, sub->slotname, - update_failover ? &opts.failover : NULL, - update_two_phase ? &opts.twophase : NULL); + if (retain_dead_tuples) + check_pub_dead_tuple_retention(wrconn); + + check_publications_origin(wrconn, sub->publications, false, + retain_dead_tuples, origin, NULL, 0, + sub->name); + + if (update_failover || update_two_phase) + walrcv_alter_slot(wrconn, sub->slotname, + update_failover ? &opts.failover : NULL, + update_two_phase ? &opts.twophase : NULL); } PG_FINALLY(); { @@ -2086,20 +2244,29 @@ AlterSubscriptionOwner_oid(Oid subid, Oid newOwnerId) * Check and log a warning if the publisher has subscribed to the same table, * its partition ancestors (if it's a partition), or its partition children (if * it's a partitioned table), from some other publishers. This check is - * required only if "copy_data = true" and "origin = none" for CREATE - * SUBSCRIPTION and ALTER SUBSCRIPTION ... REFRESH statements to notify the - * user that data having origin might have been copied. + * required in the following scenarios: * - * This check need not be performed on the tables that are already added - * because incremental sync for those tables will happen through WAL and the - * origin of the data can be identified from the WAL records. + * 1) For CREATE SUBSCRIPTION and ALTER SUBSCRIPTION ... REFRESH statements + * with "copy_data = true" and "origin = none": + * - Warn the user that data with an origin might have been copied. + * - This check is skipped for tables already added, as incremental sync via + * WAL allows origin tracking. The list of such tables is in + * subrel_local_oids. * - * subrel_local_oids contains the list of relation oids that are already - * present on the subscriber. + * 2) For CREATE SUBSCRIPTION and ALTER SUBSCRIPTION ... REFRESH statements + * with "retain_dead_tuples = true" and "origin = any", and for ALTER + * SUBSCRIPTION statements that modify retain_dead_tuples or origin, or + * when the publisher's status changes (e.g., due to a connection string + * update): + * - Warn the user that only conflict detection info for local changes on + * the publisher is retained. Data from other origins may lack sufficient + * details for reliable conflict detection. + * - See comments atop worker.c for more details. */ static void check_publications_origin(WalReceiverConn *wrconn, List *publications, - bool copydata, char *origin, Oid *subrel_local_oids, + bool copydata, bool retain_dead_tuples, + char *origin, Oid *subrel_local_oids, int subrel_count, char *subname) { WalRcvExecResult *res; @@ -2108,9 +2275,29 @@ check_publications_origin(WalReceiverConn *wrconn, List *publications, Oid tableRow[1] = {TEXTOID}; List *publist = NIL; int i; + bool check_rdt; + bool check_table_sync; + bool origin_none = origin && + pg_strcasecmp(origin, LOGICALREP_ORIGIN_NONE) == 0; + + /* + * Enable retain_dead_tuples checks only when origin is set to 'any', + * since with origin='none' only local changes are replicated to the + * subscriber. + */ + check_rdt = retain_dead_tuples && !origin_none; + + /* + * Enable table synchronization checks only when origin is 'none', to + * ensure that data from other origins is not inadvertently copied. + */ + check_table_sync = copydata && origin_none; - if (!copydata || !origin || - (pg_strcasecmp(origin, LOGICALREP_ORIGIN_NONE) != 0)) + /* retain_dead_tuples and table sync checks occur separately */ + Assert(!(check_rdt && check_table_sync)); + + /* Return if no checks are required */ + if (!check_rdt && !check_table_sync) return; initStringInfo(&cmd); @@ -2129,16 +2316,23 @@ check_publications_origin(WalReceiverConn *wrconn, List *publications, /* * In case of ALTER SUBSCRIPTION ... REFRESH, subrel_local_oids contains * the list of relation oids that are already present on the subscriber. - * This check should be skipped for these tables. + * This check should be skipped for these tables if checking for table + * sync scenario. However, when handling the retain_dead_tuples scenario, + * ensure all tables are checked, as some existing tables may now include + * changes from other origins due to newly created subscriptions on the + * publisher. */ - for (i = 0; i < subrel_count; i++) + if (check_table_sync) { - Oid relid = subrel_local_oids[i]; - char *schemaname = get_namespace_name(get_rel_namespace(relid)); - char *tablename = get_rel_name(relid); + for (i = 0; i < subrel_count; i++) + { + Oid relid = subrel_local_oids[i]; + char *schemaname = get_namespace_name(get_rel_namespace(relid)); + char *tablename = get_rel_name(relid); - appendStringInfo(&cmd, "AND NOT (N.nspname = '%s' AND C.relname = '%s')\n", - schemaname, tablename); + appendStringInfo(&cmd, "AND NOT (N.nspname = '%s' AND C.relname = '%s')\n", + schemaname, tablename); + } } res = walrcv_exec(wrconn, cmd.data, 1, tableRow); @@ -2173,22 +2367,37 @@ check_publications_origin(WalReceiverConn *wrconn, List *publications, * XXX: For simplicity, we don't check whether the table has any data or * not. If the table doesn't have any data then we don't need to * distinguish between data having origin and data not having origin so we - * can avoid logging a warning in that case. + * can avoid logging a warning for table sync scenario. */ if (publist) { StringInfo pubnames = makeStringInfo(); + StringInfo err_msg = makeStringInfo(); + StringInfo err_hint = makeStringInfo(); /* Prepare the list of publication(s) for warning message. */ GetPublicationsStr(publist, pubnames, false); + + if (check_table_sync) + { + appendStringInfo(err_msg, _("subscription \"%s\" requested copy_data with origin = NONE but might copy data that had a different origin"), + subname); + appendStringInfoString(err_hint, _("Verify that initial data copied from the publisher tables did not come from other origins.")); + } + else + { + appendStringInfo(err_msg, _("subscription \"%s\" enabled retain_dead_tuples but might not reliably detect conflicts for changes from different origins"), + subname); + appendStringInfoString(err_hint, _("Consider using origin = NONE or disabling retain_dead_tuples.")); + } + ereport(WARNING, errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), - errmsg("subscription \"%s\" requested copy_data with origin = NONE but might copy data that had a different origin", - subname), - errdetail_plural("The subscription being created subscribes to a publication (%s) that contains tables that are written to by other subscriptions.", - "The subscription being created subscribes to publications (%s) that contain tables that are written to by other subscriptions.", + errmsg_internal("%s", err_msg->data), + errdetail_plural("The subscription subscribes to a publication (%s) that contains tables that are written to by other subscriptions.", + "The subscription subscribes to publications (%s) that contain tables that are written to by other subscriptions.", list_length(publist), pubnames->data), - errhint("Verify that initial data copied from the publisher tables did not come from other origins.")); + errhint_internal("%s", err_hint->data)); } ExecDropSingleTupleTableSlot(slot); @@ -2196,6 +2405,101 @@ check_publications_origin(WalReceiverConn *wrconn, List *publications, walrcv_clear_result(res); } +/* + * Determine whether the retain_dead_tuples can be enabled based on the + * publisher's status. + * + * This option is disallowed if the publisher is running a version earlier + * than the PG19, or if the publisher is in recovery (i.e., it is a standby + * server). + * + * See comments atop worker.c for a detailed explanation. + */ +static void +check_pub_dead_tuple_retention(WalReceiverConn *wrconn) +{ + WalRcvExecResult *res; + Oid RecoveryRow[1] = {BOOLOID}; + TupleTableSlot *slot; + bool isnull; + bool remote_in_recovery; + + if (walrcv_server_version(wrconn) < 19000) + ereport(ERROR, + errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("cannot enable retain_dead_tuples if the publisher is running a version earlier than PostgreSQL 19")); + + res = walrcv_exec(wrconn, "SELECT pg_is_in_recovery()", 1, RecoveryRow); + + if (res->status != WALRCV_OK_TUPLES) + ereport(ERROR, + (errcode(ERRCODE_CONNECTION_FAILURE), + errmsg("could not obtain recovery progress from the publisher: %s", + res->err))); + + slot = MakeSingleTupleTableSlot(res->tupledesc, &TTSOpsMinimalTuple); + if (!tuplestore_gettupleslot(res->tuplestore, true, false, slot)) + elog(ERROR, "failed to fetch tuple for the recovery progress"); + + remote_in_recovery = DatumGetBool(slot_getattr(slot, 1, &isnull)); + + if (remote_in_recovery) + ereport(ERROR, + errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot enable retain_dead_tuples if the publisher is in recovery.")); + + ExecDropSingleTupleTableSlot(slot); + + walrcv_clear_result(res); +} + +/* + * Check if the subscriber's configuration is adequate to enable the + * retain_dead_tuples option. + * + * Issue an ERROR if the wal_level does not support the use of replication + * slots when check_guc is set to true. + * + * Issue a WARNING if track_commit_timestamp is not enabled when check_guc is + * set to true. This is only to highlight the importance of enabling + * track_commit_timestamp instead of catching all the misconfigurations, as + * this setting can be adjusted after subscription creation. Without it, the + * apply worker will simply skip conflict detection. + * + * Issue a WARNING or NOTICE if the subscription is disabled. Do not raise an + * ERROR since users can only modify retain_dead_tuples for disabled + * subscriptions. And as long as the subscription is enabled promptly, it will + * not pose issues. + */ +void +CheckSubDeadTupleRetention(bool check_guc, bool sub_disabled, + int elevel_for_sub_disabled) +{ + Assert(elevel_for_sub_disabled == NOTICE || + elevel_for_sub_disabled == WARNING); + + if (check_guc && wal_level < WAL_LEVEL_REPLICA) + ereport(ERROR, + errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("\"wal_level\" is insufficient to create the replication slot required by retain_dead_tuples"), + errhint("\"wal_level\" must be set to \"replica\" or \"logical\" at server start.")); + + if (check_guc && !track_commit_timestamp) + ereport(WARNING, + errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("commit timestamp and origin data required for detecting conflicts won't be retained"), + errhint("Consider setting \"%s\" to true.", + "track_commit_timestamp")); + + if (sub_disabled) + ereport(elevel_for_sub_disabled, + errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("deleted rows to detect conflicts would not be removed until the subscription is enabled"), + (elevel_for_sub_disabled > NOTICE) + ? errhint("Consider setting %s to false.", + "retain_dead_tuples") : 0); +} + /* * Get the list of tables which belong to specified publications on the * publisher connection. diff --git a/src/backend/replication/logical/applyparallelworker.c b/src/backend/replication/logical/applyparallelworker.c index d25085d351535..1fa931a74229d 100644 --- a/src/backend/replication/logical/applyparallelworker.c +++ b/src/backend/replication/logical/applyparallelworker.c @@ -441,7 +441,8 @@ pa_launch_parallel_worker(void) MySubscription->name, MyLogicalRepWorker->userid, InvalidOid, - dsm_segment_handle(winfo->dsm_seg)); + dsm_segment_handle(winfo->dsm_seg), + false); if (launched) { diff --git a/src/backend/replication/logical/launcher.c b/src/backend/replication/logical/launcher.c index 4aed0dfcebb24..742d9ba68e900 100644 --- a/src/backend/replication/logical/launcher.c +++ b/src/backend/replication/logical/launcher.c @@ -32,6 +32,7 @@ #include "postmaster/interrupt.h" #include "replication/logicallauncher.h" #include "replication/origin.h" +#include "replication/slot.h" #include "replication/walreceiver.h" #include "replication/worker_internal.h" #include "storage/ipc.h" @@ -91,7 +92,6 @@ static dshash_table *last_start_times = NULL; static bool on_commit_launcher_wakeup = false; -static void ApplyLauncherWakeup(void); static void logicalrep_launcher_onexit(int code, Datum arg); static void logicalrep_worker_onexit(int code, Datum arg); static void logicalrep_worker_detach(void); @@ -100,6 +100,9 @@ static int logicalrep_pa_worker_count(Oid subid); static void logicalrep_launcher_attach_dshmem(void); static void ApplyLauncherSetWorkerStartTime(Oid subid, TimestampTz start_time); static TimestampTz ApplyLauncherGetWorkerStartTime(Oid subid); +static void compute_min_nonremovable_xid(LogicalRepWorker *worker, TransactionId *xmin); +static bool acquire_conflict_slot_if_exists(void); +static void advance_conflict_slot_xmin(TransactionId new_xmin); /* @@ -148,6 +151,7 @@ get_subscription_list(void) sub->owner = subform->subowner; sub->enabled = subform->subenabled; sub->name = pstrdup(NameStr(subform->subname)); + sub->retaindeadtuples = subform->subretaindeadtuples; /* We don't fill fields we are not interested in. */ res = lappend(res, sub); @@ -309,7 +313,8 @@ logicalrep_workers_find(Oid subid, bool only_running, bool acquire_lock) bool logicalrep_worker_launch(LogicalRepWorkerType wtype, Oid dbid, Oid subid, const char *subname, Oid userid, - Oid relid, dsm_handle subworker_dsm) + Oid relid, dsm_handle subworker_dsm, + bool retain_dead_tuples) { BackgroundWorker bgw; BackgroundWorkerHandle *bgw_handle; @@ -328,10 +333,13 @@ logicalrep_worker_launch(LogicalRepWorkerType wtype, * - must be valid worker type * - tablesync workers are only ones to have relid * - parallel apply worker is the only kind of subworker + * - The replication slot used in conflict detection is created when + * retain_dead_tuples is enabled */ Assert(wtype != WORKERTYPE_UNKNOWN); Assert(is_tablesync_worker == OidIsValid(relid)); Assert(is_parallel_apply_worker == (subworker_dsm != DSM_HANDLE_INVALID)); + Assert(!retain_dead_tuples || MyReplicationSlot); ereport(DEBUG1, (errmsg_internal("starting logical replication worker for subscription \"%s\"", @@ -454,6 +462,9 @@ logicalrep_worker_launch(LogicalRepWorkerType wtype, worker->stream_fileset = NULL; worker->leader_pid = is_parallel_apply_worker ? MyProcPid : InvalidPid; worker->parallel_apply = is_parallel_apply_worker; + worker->oldest_nonremovable_xid = retain_dead_tuples + ? MyReplicationSlot->data.xmin + : InvalidTransactionId; worker->last_lsn = InvalidXLogRecPtr; TIMESTAMP_NOBEGIN(worker->last_send_time); TIMESTAMP_NOBEGIN(worker->last_recv_time); @@ -1118,7 +1129,10 @@ ApplyLauncherWakeupAtCommit(void) on_commit_launcher_wakeup = true; } -static void +/* + * Wakeup the launcher immediately. + */ +void ApplyLauncherWakeup(void) { if (LogicalRepCtx->launcher_pid != 0) @@ -1150,6 +1164,12 @@ ApplyLauncherMain(Datum main_arg) */ BackgroundWorkerInitializeConnection(NULL, NULL, 0); + /* + * Acquire the conflict detection slot at startup to ensure it can be + * dropped if no longer needed after a restart. + */ + acquire_conflict_slot_if_exists(); + /* Enter main loop */ for (;;) { @@ -1159,6 +1179,9 @@ ApplyLauncherMain(Datum main_arg) MemoryContext subctx; MemoryContext oldctx; long wait_time = DEFAULT_NAPTIME_PER_CYCLE; + bool can_advance_xmin = true; + bool retain_dead_tuples = false; + TransactionId xmin = InvalidTransactionId; CHECK_FOR_INTERRUPTS(); @@ -1168,7 +1191,14 @@ ApplyLauncherMain(Datum main_arg) ALLOCSET_DEFAULT_SIZES); oldctx = MemoryContextSwitchTo(subctx); - /* Start any missing workers for enabled subscriptions. */ + /* + * Start any missing workers for enabled subscriptions. + * + * Also, during the iteration through all subscriptions, we compute + * the minimum XID required to protect deleted tuples for conflict + * detection if one of the subscription enables retain_dead_tuples + * option. + */ sublist = get_subscription_list(); foreach(lc, sublist) { @@ -1178,6 +1208,38 @@ ApplyLauncherMain(Datum main_arg) TimestampTz now; long elapsed; + if (sub->retaindeadtuples) + { + retain_dead_tuples = true; + + /* + * Can't advance xmin of the slot unless all the subscriptions + * with retain_dead_tuples are enabled. This is required to + * ensure that we don't advance the xmin of + * CONFLICT_DETECTION_SLOT if one of the subscriptions is not + * enabled. Otherwise, we won't be able to detect conflicts + * reliably for such a subscription even though it has set the + * retain_dead_tuples option. + */ + can_advance_xmin &= sub->enabled; + + /* + * Create a replication slot to retain information necessary + * for conflict detection such as dead tuples, commit + * timestamps, and origins. + * + * The slot is created before starting the apply worker to + * prevent it from unnecessarily maintaining its + * oldest_nonremovable_xid. + * + * The slot is created even for a disabled subscription to + * ensure that conflict-related information is available when + * applying remote changes that occurred before the + * subscription was enabled. + */ + CreateConflictDetectionSlot(); + } + if (!sub->enabled) continue; @@ -1186,7 +1248,27 @@ ApplyLauncherMain(Datum main_arg) LWLockRelease(LogicalRepWorkerLock); if (w != NULL) - continue; /* worker is running already */ + { + /* + * Compute the minimum xmin required to protect dead tuples + * required for conflict detection among all running apply + * workers that enables retain_dead_tuples. + */ + if (sub->retaindeadtuples && can_advance_xmin) + compute_min_nonremovable_xid(w, &xmin); + + /* worker is running already */ + continue; + } + + /* + * Can't advance xmin of the slot unless all the workers + * corresponding to subscriptions with retain_dead_tuples are + * running, disabling the further computation of the minimum + * nonremovable xid. + */ + if (sub->retaindeadtuples) + can_advance_xmin = false; /* * If the worker is eligible to start now, launch it. Otherwise, @@ -1210,7 +1292,8 @@ ApplyLauncherMain(Datum main_arg) if (!logicalrep_worker_launch(WORKERTYPE_APPLY, sub->dbid, sub->oid, sub->name, sub->owner, InvalidOid, - DSM_HANDLE_INVALID)) + DSM_HANDLE_INVALID, + sub->retaindeadtuples)) { /* * We get here either if we failed to launch a worker @@ -1230,6 +1313,20 @@ ApplyLauncherMain(Datum main_arg) } } + /* + * Drop the CONFLICT_DETECTION_SLOT slot if there is no subscription + * that requires us to retain dead tuples. Otherwise, if required, + * advance the slot's xmin to protect dead tuples required for the + * conflict detection. + */ + if (MyReplicationSlot) + { + if (!retain_dead_tuples) + ReplicationSlotDropAcquired(); + else if (can_advance_xmin) + advance_conflict_slot_xmin(xmin); + } + /* Switch back to original memory context. */ MemoryContextSwitchTo(oldctx); /* Clean the temporary memory. */ @@ -1257,6 +1354,125 @@ ApplyLauncherMain(Datum main_arg) /* Not reachable */ } +/* + * Determine the minimum non-removable transaction ID across all apply workers + * for subscriptions that have retain_dead_tuples enabled. Store the result + * in *xmin. + */ +static void +compute_min_nonremovable_xid(LogicalRepWorker *worker, TransactionId *xmin) +{ + TransactionId nonremovable_xid; + + Assert(worker != NULL); + + /* + * The replication slot for conflict detection must be created before the + * worker starts. + */ + Assert(MyReplicationSlot); + + SpinLockAcquire(&worker->relmutex); + nonremovable_xid = worker->oldest_nonremovable_xid; + SpinLockRelease(&worker->relmutex); + + Assert(TransactionIdIsValid(nonremovable_xid)); + + if (!TransactionIdIsValid(*xmin) || + TransactionIdPrecedes(nonremovable_xid, *xmin)) + *xmin = nonremovable_xid; +} + +/* + * Acquire the replication slot used to retain information for conflict + * detection, if it exists. + * + * Return true if successfully acquired, otherwise return false. + */ +static bool +acquire_conflict_slot_if_exists(void) +{ + if (!SearchNamedReplicationSlot(CONFLICT_DETECTION_SLOT, true)) + return false; + + ReplicationSlotAcquire(CONFLICT_DETECTION_SLOT, true, false); + return true; +} + +/* + * Advance the xmin the replication slot used to retain information required + * for conflict detection. + */ +static void +advance_conflict_slot_xmin(TransactionId new_xmin) +{ + Assert(MyReplicationSlot); + Assert(TransactionIdIsValid(new_xmin)); + Assert(TransactionIdPrecedesOrEquals(MyReplicationSlot->data.xmin, new_xmin)); + + /* Return if the xmin value of the slot cannot be advanced */ + if (TransactionIdEquals(MyReplicationSlot->data.xmin, new_xmin)) + return; + + SpinLockAcquire(&MyReplicationSlot->mutex); + MyReplicationSlot->effective_xmin = new_xmin; + MyReplicationSlot->data.xmin = new_xmin; + SpinLockRelease(&MyReplicationSlot->mutex); + + elog(DEBUG1, "updated xmin: %u", MyReplicationSlot->data.xmin); + + ReplicationSlotMarkDirty(); + ReplicationSlotsComputeRequiredXmin(false); + + /* + * Like PhysicalConfirmReceivedLocation(), do not save slot information + * each time. This is acceptable because all concurrent transactions on + * the publisher that require the data preceding the slot's xmin should + * have already been applied and flushed on the subscriber before the xmin + * is advanced. So, even if the slot's xmin regresses after a restart, it + * will be advanced again in the next cycle. Therefore, no data required + * for conflict detection will be prematurely removed. + */ + return; +} + +/* + * Create and acquire the replication slot used to retain information for + * conflict detection, if not yet. + */ +void +CreateConflictDetectionSlot(void) +{ + TransactionId xmin_horizon; + + /* Exit early, if the replication slot is already created and acquired */ + if (MyReplicationSlot) + return; + + ereport(LOG, + errmsg("creating replication conflict detection slot")); + + ReplicationSlotCreate(CONFLICT_DETECTION_SLOT, false, RS_PERSISTENT, false, + false, false); + + LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); + + xmin_horizon = GetOldestSafeDecodingTransactionId(false); + + SpinLockAcquire(&MyReplicationSlot->mutex); + MyReplicationSlot->effective_xmin = xmin_horizon; + MyReplicationSlot->data.xmin = xmin_horizon; + SpinLockRelease(&MyReplicationSlot->mutex); + + ReplicationSlotsComputeRequiredXmin(true); + + LWLockRelease(ProcArrayLock); + + /* Write this slot to disk */ + ReplicationSlotMarkDirty(); + ReplicationSlotSave(); +} + /* * Is current process the logical replication launcher? */ diff --git a/src/backend/replication/logical/reorderbuffer.c b/src/backend/replication/logical/reorderbuffer.c index 7b4e8629553b8..5febd154b6bae 100644 --- a/src/backend/replication/logical/reorderbuffer.c +++ b/src/backend/replication/logical/reorderbuffer.c @@ -4917,7 +4917,7 @@ StartupReorderBuffer(void) continue; /* if it cannot be a slot, skip the directory */ - if (!ReplicationSlotValidateName(logical_de->d_name, DEBUG2)) + if (!ReplicationSlotValidateName(logical_de->d_name, true, DEBUG2)) continue; /* diff --git a/src/backend/replication/logical/tablesync.c b/src/backend/replication/logical/tablesync.c index e4fd6347fd1be..3fea0a0206ed3 100644 --- a/src/backend/replication/logical/tablesync.c +++ b/src/backend/replication/logical/tablesync.c @@ -615,7 +615,8 @@ process_syncing_tables_for_apply(XLogRecPtr current_lsn) MySubscription->name, MyLogicalRepWorker->userid, rstate->relid, - DSM_HANDLE_INVALID); + DSM_HANDLE_INVALID, + false); } } } diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c index c5fb627aa56ec..b59221c4d0636 100644 --- a/src/backend/replication/logical/worker.c +++ b/src/backend/replication/logical/worker.c @@ -132,6 +132,96 @@ * failover = true when creating the subscription. Enabling failover allows us * to smoothly transition to the promoted standby, ensuring that we can * subscribe to the new primary without losing any data. + * + * RETAIN DEAD TUPLES + * ---------------------- + * Each apply worker that enabled retain_dead_tuples option maintains a + * non-removable transaction ID (oldest_nonremovable_xid) in shared memory to + * prevent dead rows from being removed prematurely when the apply worker still + * needs them to detect conflicts reliably. This helps to retain the required + * commit_ts module information, which further helps to detect + * update_origin_differs and delete_origin_differs conflicts reliably, as + * otherwise, vacuum freeze could remove the required information. + * + * The logical replication launcher manages an internal replication slot named + * "pg_conflict_detection". It asynchronously aggregates the non-removable + * transaction ID from all apply workers to determine the appropriate xmin for + * the slot, thereby retaining necessary tuples. + * + * The non-removable transaction ID in the apply worker is advanced to the + * oldest running transaction ID once all concurrent transactions on the + * publisher have been applied and flushed locally. The process involves: + * + * - RDT_GET_CANDIDATE_XID: + * Call GetOldestActiveTransactionId() to take oldestRunningXid as the + * candidate xid. + * + * - RDT_REQUEST_PUBLISHER_STATUS: + * Send a message to the walsender requesting the publisher status, which + * includes the latest WAL write position and information about transactions + * that are in the commit phase. + * + * - RDT_WAIT_FOR_PUBLISHER_STATUS: + * Wait for the status from the walsender. After receiving the first status, + * do not proceed if there are concurrent remote transactions that are still + * in the commit phase. These transactions might have been assigned an + * earlier commit timestamp but have not yet written the commit WAL record. + * Continue to request the publisher status (RDT_REQUEST_PUBLISHER_STATUS) + * until all these transactions have completed. + * + * - RDT_WAIT_FOR_LOCAL_FLUSH: + * Advance the non-removable transaction ID if the current flush location has + * reached or surpassed the last received WAL position. + * + * The overall state progression is: GET_CANDIDATE_XID -> + * REQUEST_PUBLISHER_STATUS -> WAIT_FOR_PUBLISHER_STATUS -> (loop to + * REQUEST_PUBLISHER_STATUS till concurrent remote transactions end) -> + * WAIT_FOR_LOCAL_FLUSH -> loop back to GET_CANDIDATE_XID. + * + * Retaining the dead tuples for this period is sufficient for ensuring + * eventual consistency using last-update-wins strategy, as dead tuples are + * useful for detecting conflicts only during the application of concurrent + * transactions from remote nodes. After applying and flushing all remote + * transactions that occurred concurrently with the tuple DELETE, any + * subsequent UPDATE from a remote node should have a later timestamp. In such + * cases, it is acceptable to detect an update_missing scenario and convert the + * UPDATE to an INSERT when applying it. But, detecting concurrent remote + * transactions with earlier timestamps than the DELETE is necessary, as the + * UPDATEs in remote transactions should be ignored if their timestamp is + * earlier than that of the dead tuples. + * + * Note that advancing the non-removable transaction ID is not supported if the + * publisher is also a physical standby. This is because the logical walsender + * on the standby can only get the WAL replay position but there may be more + * WALs that are being replicated from the primary and those WALs could have + * earlier commit timestamp. + * + * Similarly, when the publisher has subscribed to another publisher, + * information necessary for conflict detection cannot be retained for + * changes from origins other than the publisher. This is because publisher + * lacks the information on concurrent transactions of other publishers to + * which it subscribes. As the information on concurrent transactions is + * unavailable beyond subscriber's immediate publishers, the non-removable + * transaction ID might be advanced prematurely before changes from other + * origins have been fully applied. + * + * XXX Retaining information for changes from other origins might be possible + * by requesting the subscription on that origin to enable retain_dead_tuples + * and fetching the conflict detection slot.xmin along with the publisher's + * status. In the RDT_WAIT_FOR_PUBLISHER_STATUS phase, the apply worker could + * wait for the remote slot's xmin to reach the oldest active transaction ID, + * ensuring that all transactions from other origins have been applied on the + * publisher, thereby getting the latest WAL position that includes all + * concurrent changes. However, this approach may impact performance, so it + * might not worth the effort. + * + * XXX It seems feasible to get the latest commit's WAL location from the + * publisher and wait till that is applied. However, we can't do that + * because commit timestamps can regress as a commit with a later LSN is not + * guaranteed to have a later timestamp than those with earlier LSNs. Having + * said that, even if that is possible, it won't improve performance much as + * the apply always lag and moves slowly as compared with the transactions + * on the publisher. *------------------------------------------------------------------------- */ @@ -140,6 +230,7 @@ #include #include +#include "access/commit_ts.h" #include "access/table.h" #include "access/tableam.h" #include "access/twophase.h" @@ -148,6 +239,7 @@ #include "catalog/pg_inherits.h" #include "catalog/pg_subscription.h" #include "catalog/pg_subscription_rel.h" +#include "commands/subscriptioncmds.h" #include "commands/tablecmds.h" #include "commands/trigger.h" #include "executor/executor.h" @@ -166,12 +258,14 @@ #include "replication/logicalrelation.h" #include "replication/logicalworker.h" #include "replication/origin.h" +#include "replication/slot.h" #include "replication/walreceiver.h" #include "replication/worker_internal.h" #include "rewrite/rewriteHandler.h" #include "storage/buffile.h" #include "storage/ipc.h" #include "storage/lmgr.h" +#include "storage/procarray.h" #include "tcop/tcopprot.h" #include "utils/acl.h" #include "utils/dynahash.h" @@ -268,6 +362,78 @@ typedef enum TRANS_PARALLEL_APPLY, } TransApplyAction; +/* + * The phases involved in advancing the non-removable transaction ID. + * + * See comments atop worker.c for details of the transition between these + * phases. + */ +typedef enum +{ + RDT_GET_CANDIDATE_XID, + RDT_REQUEST_PUBLISHER_STATUS, + RDT_WAIT_FOR_PUBLISHER_STATUS, + RDT_WAIT_FOR_LOCAL_FLUSH +} RetainDeadTuplesPhase; + +/* + * Critical information for managing phase transitions within the + * RetainDeadTuplesPhase. + */ +typedef struct RetainDeadTuplesData +{ + RetainDeadTuplesPhase phase; /* current phase */ + XLogRecPtr remote_lsn; /* WAL write position on the publisher */ + + /* + * Oldest transaction ID that was in the commit phase on the publisher. + * Use FullTransactionId to prevent issues with transaction ID wraparound, + * where a new remote_oldestxid could falsely appear to originate from the + * past and block advancement. + */ + FullTransactionId remote_oldestxid; + + /* + * Next transaction ID to be assigned on the publisher. Use + * FullTransactionId for consistency and to allow straightforward + * comparisons with remote_oldestxid. + */ + FullTransactionId remote_nextxid; + + TimestampTz reply_time; /* when the publisher responds with status */ + + /* + * Publisher transaction ID that must be awaited to complete before + * entering the final phase (RDT_WAIT_FOR_LOCAL_FLUSH). Use + * FullTransactionId for the same reason as remote_nextxid. + */ + FullTransactionId remote_wait_for; + + TransactionId candidate_xid; /* candidate for the non-removable + * transaction ID */ + TimestampTz flushpos_update_time; /* when the remote flush position was + * updated in final phase + * (RDT_WAIT_FOR_LOCAL_FLUSH) */ + + /* + * The following fields are used to determine the timing for the next + * round of transaction ID advancement. + */ + TimestampTz last_recv_time; /* when the last message was received */ + TimestampTz candidate_xid_time; /* when the candidate_xid is decided */ + int xid_advance_interval; /* how much time (ms) to wait before + * attempting to advance the + * non-removable transaction ID */ +} RetainDeadTuplesData; + +/* + * The minimum (100ms) and maximum (3 minutes) intervals for advancing + * non-removable transaction IDs. The maximum interval is a bit arbitrary but + * is sufficient to not cause any undue network traffic. + */ +#define MIN_XID_ADVANCE_INTERVAL 100 +#define MAX_XID_ADVANCE_INTERVAL 180000 + /* errcontext tracker */ static ApplyErrorCallbackArg apply_error_callback_arg = { @@ -332,6 +498,13 @@ static XLogRecPtr skip_xact_finish_lsn = InvalidXLogRecPtr; /* BufFile handle of the current streaming file */ static BufFile *stream_fd = NULL; +/* + * The remote WAL position that has been applied and flushed locally. We record + * and use this information both while sending feedback to the server and + * advancing oldest_nonremovable_xid. + */ +static XLogRecPtr last_flushpos = InvalidXLogRecPtr; + typedef struct SubXactInfo { TransactionId xid; /* XID of the subxact */ @@ -372,6 +545,19 @@ static void stream_close_file(void); static void send_feedback(XLogRecPtr recvpos, bool force, bool requestReply); +static void maybe_advance_nonremovable_xid(RetainDeadTuplesData *rdt_data, + bool status_received); +static bool can_advance_nonremovable_xid(RetainDeadTuplesData *rdt_data); +static void process_rdt_phase_transition(RetainDeadTuplesData *rdt_data, + bool status_received); +static void get_candidate_xid(RetainDeadTuplesData *rdt_data); +static void request_publisher_status(RetainDeadTuplesData *rdt_data); +static void wait_for_publisher_status(RetainDeadTuplesData *rdt_data, + bool status_received); +static void wait_for_local_flush(RetainDeadTuplesData *rdt_data); +static void adjust_xid_advance_interval(RetainDeadTuplesData *rdt_data, + bool new_xid_found); + static void apply_handle_commit_internal(LogicalRepCommitData *commit_data); static void apply_handle_insert_internal(ApplyExecutionData *edata, ResultRelInfo *relinfo, @@ -3577,6 +3763,7 @@ LogicalRepApplyLoop(XLogRecPtr last_received) bool ping_sent = false; TimeLineID tli; ErrorContextCallback errcallback; + RetainDeadTuplesData rdt_data = {0}; /* * Init the ApplyMessageContext which we clean up after each replication @@ -3655,6 +3842,8 @@ LogicalRepApplyLoop(XLogRecPtr last_received) last_recv_timestamp = GetCurrentTimestamp(); ping_sent = false; + rdt_data.last_recv_time = last_recv_timestamp; + /* Ensure we are reading the data into our memory context. */ MemoryContextSwitchTo(ApplyMessageContext); @@ -3681,6 +3870,8 @@ LogicalRepApplyLoop(XLogRecPtr last_received) UpdateWorkerStats(last_received, send_time, false); apply_dispatch(&s); + + maybe_advance_nonremovable_xid(&rdt_data, false); } else if (c == 'k') { @@ -3696,8 +3887,31 @@ LogicalRepApplyLoop(XLogRecPtr last_received) last_received = end_lsn; send_feedback(last_received, reply_requested, false); + + maybe_advance_nonremovable_xid(&rdt_data, false); + UpdateWorkerStats(last_received, timestamp, true); } + else if (c == 's') /* Primary status update */ + { + rdt_data.remote_lsn = pq_getmsgint64(&s); + rdt_data.remote_oldestxid = FullTransactionIdFromU64((uint64) pq_getmsgint64(&s)); + rdt_data.remote_nextxid = FullTransactionIdFromU64((uint64) pq_getmsgint64(&s)); + rdt_data.reply_time = pq_getmsgint64(&s); + + /* + * This should never happen, see + * ProcessStandbyPSRequestMessage. But if it happens + * due to a bug, we don't want to proceed as it can + * incorrectly advance oldest_nonremovable_xid. + */ + if (XLogRecPtrIsInvalid(rdt_data.remote_lsn)) + elog(ERROR, "cannot get the latest WAL position from the publisher"); + + maybe_advance_nonremovable_xid(&rdt_data, true); + + UpdateWorkerStats(last_received, rdt_data.reply_time, false); + } /* other message types are purposefully ignored */ MemoryContextReset(ApplyMessageContext); @@ -3710,6 +3924,11 @@ LogicalRepApplyLoop(XLogRecPtr last_received) /* confirm all writes so far */ send_feedback(last_received, false, false); + /* Reset the timestamp if no message was received */ + rdt_data.last_recv_time = 0; + + maybe_advance_nonremovable_xid(&rdt_data, false); + if (!in_remote_transaction && !in_streamed_transaction) { /* @@ -3744,6 +3963,14 @@ LogicalRepApplyLoop(XLogRecPtr last_received) else wait_time = NAPTIME_PER_CYCLE; + /* + * Ensure to wake up when it's possible to advance the non-removable + * transaction ID. + */ + if (rdt_data.phase == RDT_GET_CANDIDATE_XID && + rdt_data.xid_advance_interval) + wait_time = Min(wait_time, rdt_data.xid_advance_interval); + rc = WaitLatchOrSocket(MyLatch, WL_SOCKET_READABLE | WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH, @@ -3807,6 +4034,8 @@ LogicalRepApplyLoop(XLogRecPtr last_received) send_feedback(last_received, requestReply, requestReply); + maybe_advance_nonremovable_xid(&rdt_data, false); + /* * Force reporting to ensure long idle periods don't lead to * arbitrarily delayed stats. Stats can only be reported outside @@ -3842,7 +4071,6 @@ send_feedback(XLogRecPtr recvpos, bool force, bool requestReply) static XLogRecPtr last_recvpos = InvalidXLogRecPtr; static XLogRecPtr last_writepos = InvalidXLogRecPtr; - static XLogRecPtr last_flushpos = InvalidXLogRecPtr; XLogRecPtr writepos; XLogRecPtr flushpos; @@ -3920,6 +4148,367 @@ send_feedback(XLogRecPtr recvpos, bool force, bool requestReply) last_flushpos = flushpos; } +/* + * Attempt to advance the non-removable transaction ID. + * + * See comments atop worker.c for details. + */ +static void +maybe_advance_nonremovable_xid(RetainDeadTuplesData *rdt_data, + bool status_received) +{ + if (!can_advance_nonremovable_xid(rdt_data)) + return; + + process_rdt_phase_transition(rdt_data, status_received); +} + +/* + * Preliminary check to determine if advancing the non-removable transaction ID + * is allowed. + */ +static bool +can_advance_nonremovable_xid(RetainDeadTuplesData *rdt_data) +{ + /* + * It is sufficient to manage non-removable transaction ID for a + * subscription by the main apply worker to detect conflicts reliably even + * for table sync or parallel apply workers. + */ + if (!am_leader_apply_worker()) + return false; + + /* No need to advance if retaining dead tuples is not required */ + if (!MySubscription->retaindeadtuples) + return false; + + return true; +} + +/* + * Process phase transitions during the non-removable transaction ID + * advancement. See comments atop worker.c for details of the transition. + */ +static void +process_rdt_phase_transition(RetainDeadTuplesData *rdt_data, + bool status_received) +{ + switch (rdt_data->phase) + { + case RDT_GET_CANDIDATE_XID: + get_candidate_xid(rdt_data); + break; + case RDT_REQUEST_PUBLISHER_STATUS: + request_publisher_status(rdt_data); + break; + case RDT_WAIT_FOR_PUBLISHER_STATUS: + wait_for_publisher_status(rdt_data, status_received); + break; + case RDT_WAIT_FOR_LOCAL_FLUSH: + wait_for_local_flush(rdt_data); + break; + } +} + +/* + * Workhorse for the RDT_GET_CANDIDATE_XID phase. + */ +static void +get_candidate_xid(RetainDeadTuplesData *rdt_data) +{ + TransactionId oldest_running_xid; + TimestampTz now; + + /* + * Use last_recv_time when applying changes in the loop to avoid + * unnecessary system time retrieval. If last_recv_time is not available, + * obtain the current timestamp. + */ + now = rdt_data->last_recv_time ? rdt_data->last_recv_time : GetCurrentTimestamp(); + + /* + * Compute the candidate_xid and request the publisher status at most once + * per xid_advance_interval. Refer to adjust_xid_advance_interval() for + * details on how this value is dynamically adjusted. This is to avoid + * using CPU and network resources without making much progress. + */ + if (!TimestampDifferenceExceeds(rdt_data->candidate_xid_time, now, + rdt_data->xid_advance_interval)) + return; + + /* + * Immediately update the timer, even if the function returns later + * without setting candidate_xid due to inactivity on the subscriber. This + * avoids frequent calls to GetOldestActiveTransactionId. + */ + rdt_data->candidate_xid_time = now; + + /* + * Consider transactions in the current database, as only dead tuples from + * this database are required for conflict detection. + */ + oldest_running_xid = GetOldestActiveTransactionId(false, false); + + /* + * Oldest active transaction ID (oldest_running_xid) can't be behind any + * of its previously computed value. + */ + Assert(TransactionIdPrecedesOrEquals(MyLogicalRepWorker->oldest_nonremovable_xid, + oldest_running_xid)); + + /* Return if the oldest_nonremovable_xid cannot be advanced */ + if (TransactionIdEquals(MyLogicalRepWorker->oldest_nonremovable_xid, + oldest_running_xid)) + { + adjust_xid_advance_interval(rdt_data, false); + return; + } + + adjust_xid_advance_interval(rdt_data, true); + + rdt_data->candidate_xid = oldest_running_xid; + rdt_data->phase = RDT_REQUEST_PUBLISHER_STATUS; + + /* process the next phase */ + process_rdt_phase_transition(rdt_data, false); +} + +/* + * Workhorse for the RDT_REQUEST_PUBLISHER_STATUS phase. + */ +static void +request_publisher_status(RetainDeadTuplesData *rdt_data) +{ + static StringInfo request_message = NULL; + + if (!request_message) + { + MemoryContext oldctx = MemoryContextSwitchTo(ApplyContext); + + request_message = makeStringInfo(); + MemoryContextSwitchTo(oldctx); + } + else + resetStringInfo(request_message); + + /* + * Send the current time to update the remote walsender's latest reply + * message received time. + */ + pq_sendbyte(request_message, 'p'); + pq_sendint64(request_message, GetCurrentTimestamp()); + + elog(DEBUG2, "sending publisher status request message"); + + /* Send a request for the publisher status */ + walrcv_send(LogRepWorkerWalRcvConn, + request_message->data, request_message->len); + + rdt_data->phase = RDT_WAIT_FOR_PUBLISHER_STATUS; + + /* + * Skip calling maybe_advance_nonremovable_xid() since further transition + * is possible only once we receive the publisher status message. + */ +} + +/* + * Workhorse for the RDT_WAIT_FOR_PUBLISHER_STATUS phase. + */ +static void +wait_for_publisher_status(RetainDeadTuplesData *rdt_data, + bool status_received) +{ + /* + * Return if we have requested but not yet received the publisher status. + */ + if (!status_received) + return; + + if (!FullTransactionIdIsValid(rdt_data->remote_wait_for)) + rdt_data->remote_wait_for = rdt_data->remote_nextxid; + + /* + * Check if all remote concurrent transactions that were active at the + * first status request have now completed. If completed, proceed to the + * next phase; otherwise, continue checking the publisher status until + * these transactions finish. + * + * It's possible that transactions in the commit phase during the last + * cycle have now finished committing, but remote_oldestxid remains older + * than remote_wait_for. This can happen if some old transaction came in + * the commit phase when we requested status in this cycle. We do not + * handle this case explicitly as it's rare and the benefit doesn't + * justify the required complexity. Tracking would require either caching + * all xids at the publisher or sending them to subscribers. The condition + * will resolve naturally once the remaining transactions are finished. + * + * Directly advancing the non-removable transaction ID is possible if + * there are no activities on the publisher since the last advancement + * cycle. However, it requires maintaining two fields, last_remote_nextxid + * and last_remote_lsn, within the structure for comparison with the + * current cycle's values. Considering the minimal cost of continuing in + * RDT_WAIT_FOR_LOCAL_FLUSH without awaiting changes, we opted not to + * advance the transaction ID here. + */ + if (FullTransactionIdPrecedesOrEquals(rdt_data->remote_wait_for, + rdt_data->remote_oldestxid)) + rdt_data->phase = RDT_WAIT_FOR_LOCAL_FLUSH; + else + rdt_data->phase = RDT_REQUEST_PUBLISHER_STATUS; + + /* process the next phase */ + process_rdt_phase_transition(rdt_data, false); +} + +/* + * Workhorse for the RDT_WAIT_FOR_LOCAL_FLUSH phase. + */ +static void +wait_for_local_flush(RetainDeadTuplesData *rdt_data) +{ + Assert(!XLogRecPtrIsInvalid(rdt_data->remote_lsn) && + TransactionIdIsValid(rdt_data->candidate_xid)); + + /* + * We expect the publisher and subscriber clocks to be in sync using time + * sync service like NTP. Otherwise, we will advance this worker's + * oldest_nonremovable_xid prematurely, leading to the removal of rows + * required to detect conflicts reliably. This check primarily addresses + * scenarios where the publisher's clock falls behind; if the publisher's + * clock is ahead, subsequent transactions will naturally bear later + * commit timestamps, conforming to the design outlined atop worker.c. + * + * XXX Consider waiting for the publisher's clock to catch up with the + * subscriber's before proceeding to the next phase. + */ + if (TimestampDifferenceExceeds(rdt_data->reply_time, + rdt_data->candidate_xid_time, 0)) + ereport(ERROR, + errmsg_internal("oldest_nonremovable_xid transaction ID could be advanced prematurely"), + errdetail_internal("The clock on the publisher is behind that of the subscriber.")); + + /* + * Do not attempt to advance the non-removable transaction ID when table + * sync is in progress. During this time, changes from a single + * transaction may be applied by multiple table sync workers corresponding + * to the target tables. So, it's necessary for all table sync workers to + * apply and flush the corresponding changes before advancing the + * transaction ID, otherwise, dead tuples that are still needed for + * conflict detection in table sync workers could be removed prematurely. + * However, confirming the apply and flush progress across all table sync + * workers is complex and not worth the effort, so we simply return if not + * all tables are in the READY state. + * + * It is safe to add new tables with initial states to the subscription + * after this check because any changes applied to these tables should + * have a WAL position greater than the rdt_data->remote_lsn. + */ + if (!AllTablesyncsReady()) + return; + + /* + * Update and check the remote flush position if we are applying changes + * in a loop. This is done at most once per WalWriterDelay to avoid + * performing costly operations in get_flush_position() too frequently + * during change application. + */ + if (last_flushpos < rdt_data->remote_lsn && rdt_data->last_recv_time && + TimestampDifferenceExceeds(rdt_data->flushpos_update_time, + rdt_data->last_recv_time, WalWriterDelay)) + { + XLogRecPtr writepos; + XLogRecPtr flushpos; + bool have_pending_txes; + + /* Fetch the latest remote flush position */ + get_flush_position(&writepos, &flushpos, &have_pending_txes); + + if (flushpos > last_flushpos) + last_flushpos = flushpos; + + rdt_data->flushpos_update_time = rdt_data->last_recv_time; + } + + /* Return to wait for the changes to be applied */ + if (last_flushpos < rdt_data->remote_lsn) + return; + + /* + * Reaching here means the remote WAL position has been received, and all + * transactions up to that position on the publisher have been applied and + * flushed locally. So, we can advance the non-removable transaction ID. + */ + SpinLockAcquire(&MyLogicalRepWorker->relmutex); + MyLogicalRepWorker->oldest_nonremovable_xid = rdt_data->candidate_xid; + SpinLockRelease(&MyLogicalRepWorker->relmutex); + + elog(DEBUG2, "confirmed flush up to remote lsn %X/%X: new oldest_nonremovable_xid %u", + LSN_FORMAT_ARGS(rdt_data->remote_lsn), + rdt_data->candidate_xid); + + /* Notify launcher to update the xmin of the conflict slot */ + ApplyLauncherWakeup(); + + /* + * Reset all data fields except those used to determine the timing for the + * next round of transaction ID advancement. We can even use + * flushpos_update_time in the next round to decide whether to get the + * latest flush position. + */ + rdt_data->phase = RDT_GET_CANDIDATE_XID; + rdt_data->remote_lsn = InvalidXLogRecPtr; + rdt_data->remote_oldestxid = InvalidFullTransactionId; + rdt_data->remote_nextxid = InvalidFullTransactionId; + rdt_data->reply_time = 0; + rdt_data->remote_wait_for = InvalidFullTransactionId; + rdt_data->candidate_xid = InvalidTransactionId; + + /* process the next phase */ + process_rdt_phase_transition(rdt_data, false); +} + +/* + * Adjust the interval for advancing non-removable transaction IDs. + * + * We double the interval to try advancing the non-removable transaction IDs + * if there is no activity on the node. The maximum value of the interval is + * capped by wal_receiver_status_interval if it is not zero, otherwise to a + * 3 minutes which should be sufficient to avoid using CPU or network + * resources without much benefit. + * + * The interval is reset to a minimum value of 100ms once there is some + * activity on the node. + * + * XXX The use of wal_receiver_status_interval is a bit arbitrary so we can + * consider the other interval or a separate GUC if the need arises. + */ +static void +adjust_xid_advance_interval(RetainDeadTuplesData *rdt_data, bool new_xid_found) +{ + if (!new_xid_found && rdt_data->xid_advance_interval) + { + int max_interval = wal_receiver_status_interval + ? wal_receiver_status_interval * 1000 + : MAX_XID_ADVANCE_INTERVAL; + + /* + * No new transaction ID has been assigned since the last check, so + * double the interval, but not beyond the maximum allowable value. + */ + rdt_data->xid_advance_interval = Min(rdt_data->xid_advance_interval * 2, + max_interval); + } + else + { + /* + * A new transaction ID was found or the interval is not yet + * initialized, so set the interval to the minimum value. + */ + rdt_data->xid_advance_interval = MIN_XID_ADVANCE_INTERVAL; + } +} + /* * Exit routine for apply workers due to subscription parameter changes. */ @@ -4708,6 +5297,30 @@ InitializeLogRepWorker(void) apply_worker_exit(); } + /* + * Restart the worker if retain_dead_tuples was enabled during startup. + * + * At this point, the replication slot used for conflict detection might + * not exist yet, or could be dropped soon if the launcher perceives + * retain_dead_tuples as disabled. To avoid unnecessary tracking of + * oldest_nonremovable_xid when the slot is absent or at risk of being + * dropped, a restart is initiated. + * + * The oldest_nonremovable_xid should be initialized only when the + * retain_dead_tuples is enabled before launching the worker. See + * logicalrep_worker_launch. + */ + if (am_leader_apply_worker() && + MySubscription->retaindeadtuples && + !TransactionIdIsValid(MyLogicalRepWorker->oldest_nonremovable_xid)) + { + ereport(LOG, + errmsg("logical replication worker for subscription \"%s\" will restart because the option %s was enabled during startup", + MySubscription->name, "retain_dead_tuples")); + + apply_worker_exit(); + } + /* Setup synchronous commit according to the user's wishes */ SetConfigOption("synchronous_commit", MySubscription->synccommit, PGC_BACKEND, PGC_S_OVERRIDE); @@ -4864,6 +5477,14 @@ DisableSubscriptionAndExit(void) errmsg("subscription \"%s\" has been disabled because of an error", MySubscription->name)); + /* + * Skip the track_commit_timestamp check when disabling the worker due to + * an error, as verifying commit timestamps is unnecessary in this + * context. + */ + if (MySubscription->retaindeadtuples) + CheckSubDeadTupleRetention(false, true, WARNING); + proc_exit(0); } diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c index e44ad576bc769..8605776ad8631 100644 --- a/src/backend/replication/slot.c +++ b/src/backend/replication/slot.c @@ -47,6 +47,7 @@ #include "miscadmin.h" #include "pgstat.h" #include "postmaster/interrupt.h" +#include "replication/logicallauncher.h" #include "replication/slotsync.h" #include "replication/slot.h" #include "replication/walsender_private.h" @@ -172,6 +173,7 @@ static SyncStandbySlotsConfigData *synchronized_standby_slots_config; static XLogRecPtr ss_oldest_flush_lsn = InvalidXLogRecPtr; static void ReplicationSlotShmemExit(int code, Datum arg); +static bool IsSlotForConflictCheck(const char *name); static void ReplicationSlotDropPtr(ReplicationSlot *slot); /* internal persistency functions */ @@ -258,13 +260,17 @@ ReplicationSlotShmemExit(int code, Datum arg) /* * Check whether the passed slot name is valid and report errors at elevel. * + * An error will be reported for a reserved replication slot name if + * allow_reserved_name is set to false. + * * Slot names may consist out of [a-z0-9_]{1,NAMEDATALEN-1} which should allow * the name to be used as a directory name on every supported OS. * * Returns whether the directory name is valid or not if elevel < ERROR. */ bool -ReplicationSlotValidateName(const char *name, int elevel) +ReplicationSlotValidateName(const char *name, bool allow_reserved_name, + int elevel) { const char *cp; @@ -300,9 +306,31 @@ ReplicationSlotValidateName(const char *name, int elevel) return false; } } + + if (!allow_reserved_name && IsSlotForConflictCheck(name)) + { + ereport(elevel, + errcode(ERRCODE_RESERVED_NAME), + errmsg("replication slot name \"%s\" is reserved", + name), + errdetail("The name \"%s\" is reserved for the conflict detection slot.", + CONFLICT_DETECTION_SLOT)); + + return false; + } + return true; } +/* + * Return true if the replication slot name is "pg_conflict_detection". + */ +static bool +IsSlotForConflictCheck(const char *name) +{ + return (strcmp(name, CONFLICT_DETECTION_SLOT) == 0); +} + /* * Create a new replication slot and mark it as used by this backend. * @@ -330,7 +358,12 @@ ReplicationSlotCreate(const char *name, bool db_specific, Assert(MyReplicationSlot == NULL); - ReplicationSlotValidateName(name, ERROR); + /* + * The logical launcher or pg_upgrade may create or migrate an internal + * slot, so using a reserved name is allowed in these cases. + */ + ReplicationSlotValidateName(name, IsBinaryUpgrade || IsLogicalLauncher(), + ERROR); if (failover) { @@ -581,6 +614,17 @@ ReplicationSlotAcquire(const char *name, bool nowait, bool error_if_invalid) name))); } + /* + * Do not allow users to acquire the reserved slot. This scenario may + * occur if the launcher that owns the slot has terminated unexpectedly + * due to an error, and a backend process attempts to reuse the slot. + */ + if (!IsLogicalLauncher() && IsSlotForConflictCheck(name)) + ereport(ERROR, + errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("cannot acquire replication slot \"%s\"", name), + errdetail("The slot is reserved for conflict detection and can only be acquired by logical replication launcher.")); + /* * This is the slot we want; check if it's active under some other * process. In single user mode, we don't need this check. diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c index 28b8591efa5f0..4c72a0d43b32b 100644 --- a/src/backend/replication/walsender.c +++ b/src/backend/replication/walsender.c @@ -84,6 +84,7 @@ #include "storage/ipc.h" #include "storage/pmsignal.h" #include "storage/proc.h" +#include "storage/procarray.h" #include "tcop/dest.h" #include "tcop/tcopprot.h" #include "utils/acl.h" @@ -258,6 +259,7 @@ static void StartLogicalReplication(StartReplicationCmd *cmd); static void ProcessStandbyMessage(void); static void ProcessStandbyReplyMessage(void); static void ProcessStandbyHSFeedbackMessage(void); +static void ProcessStandbyPSRequestMessage(void); static void ProcessRepliesIfAny(void); static void ProcessPendingWrites(void); static void WalSndKeepalive(bool requestReply, XLogRecPtr writePtr); @@ -2355,6 +2357,10 @@ ProcessStandbyMessage(void) ProcessStandbyHSFeedbackMessage(); break; + case 'p': + ProcessStandbyPSRequestMessage(); + break; + default: ereport(COMMERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION), @@ -2701,6 +2707,60 @@ ProcessStandbyHSFeedbackMessage(void) } } +/* + * Process the request for a primary status update message. + */ +static void +ProcessStandbyPSRequestMessage(void) +{ + XLogRecPtr lsn = InvalidXLogRecPtr; + TransactionId oldestXidInCommit; + FullTransactionId nextFullXid; + FullTransactionId fullOldestXidInCommit; + WalSnd *walsnd = MyWalSnd; + TimestampTz replyTime; + + /* + * This shouldn't happen because we don't support getting primary status + * message from standby. + */ + if (RecoveryInProgress()) + elog(ERROR, "the primary status is unavailable during recovery"); + + replyTime = pq_getmsgint64(&reply_message); + + /* + * Update shared state for this WalSender process based on reply data from + * standby. + */ + SpinLockAcquire(&walsnd->mutex); + walsnd->replyTime = replyTime; + SpinLockRelease(&walsnd->mutex); + + /* + * Consider transactions in the current database, as only these are the + * ones replicated. + */ + oldestXidInCommit = GetOldestActiveTransactionId(true, false); + nextFullXid = ReadNextFullTransactionId(); + fullOldestXidInCommit = FullTransactionIdFromAllowableAt(nextFullXid, + oldestXidInCommit); + lsn = GetXLogWriteRecPtr(); + + elog(DEBUG2, "sending primary status"); + + /* construct the message... */ + resetStringInfo(&output_message); + pq_sendbyte(&output_message, 's'); + pq_sendint64(&output_message, lsn); + pq_sendint64(&output_message, (int64) U64FromFullTransactionId(fullOldestXidInCommit)); + pq_sendint64(&output_message, (int64) U64FromFullTransactionId(nextFullXid)); + pq_sendint64(&output_message, GetCurrentTimestamp()); + + /* ... and send it wrapped in CopyData */ + pq_putmessage_noblock('d', output_message.data, output_message.len); +} + /* * Compute how long send/receive loops should sleep. * diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c index 2418967def695..bf987aed8d327 100644 --- a/src/backend/storage/ipc/procarray.c +++ b/src/backend/storage/ipc/procarray.c @@ -2814,8 +2814,10 @@ GetRunningTransactionData(void) * * Similar to GetSnapshotData but returns just oldestActiveXid. We include * all PGPROCs with an assigned TransactionId, even VACUUM processes. - * We look at all databases, though there is no need to include WALSender - * since this has no effect on hot standby conflicts. + * + * If allDbs is true, we look at all databases, though there is no need to + * include WALSender since this has no effect on hot standby conflicts. If + * allDbs is false, skip processes attached to other databases. * * This is never executed during recovery so there is no need to look at * KnownAssignedXids. @@ -2823,9 +2825,12 @@ GetRunningTransactionData(void) * We don't worry about updating other counters, we want to keep this as * simple as possible and leave GetSnapshotData() as the primary code for * that bookkeeping. + * + * inCommitOnly indicates getting the oldestActiveXid among the transactions + * in the commit critical section. */ TransactionId -GetOldestActiveTransactionId(void) +GetOldestActiveTransactionId(bool inCommitOnly, bool allDbs) { ProcArrayStruct *arrayP = procArray; TransactionId *other_xids = ProcGlobal->xids; @@ -2852,6 +2857,8 @@ GetOldestActiveTransactionId(void) for (index = 0; index < arrayP->numProcs; index++) { TransactionId xid; + int pgprocno = arrayP->pgprocnos[index]; + PGPROC *proc = &allProcs[pgprocno]; /* Fetch xid just once - see GetNewTransactionId */ xid = UINT32_ACCESS_ONCE(other_xids[index]); @@ -2859,6 +2866,13 @@ GetOldestActiveTransactionId(void) if (!TransactionIdIsNormal(xid)) continue; + if (inCommitOnly && + (proc->delayChkptFlags & DELAY_CHKPT_IN_COMMIT) == 0) + continue; + + if (!allDbs && proc->databaseId != MyDatabaseId) + continue; + if (TransactionIdPrecedes(xid, oldestRunningXid)) oldestRunningXid = xid; diff --git a/src/backend/utils/adt/pg_upgrade_support.c b/src/backend/utils/adt/pg_upgrade_support.c index d44f8c262baa2..a4f8b4faa90dc 100644 --- a/src/backend/utils/adt/pg_upgrade_support.c +++ b/src/backend/utils/adt/pg_upgrade_support.c @@ -21,6 +21,7 @@ #include "commands/extension.h" #include "miscadmin.h" #include "replication/logical.h" +#include "replication/logicallauncher.h" #include "replication/origin.h" #include "replication/worker_internal.h" #include "storage/lmgr.h" @@ -410,3 +411,21 @@ binary_upgrade_replorigin_advance(PG_FUNCTION_ARGS) PG_RETURN_VOID(); } + +/* + * binary_upgrade_create_conflict_detection_slot + * + * Create a replication slot to retain information necessary for conflict + * detection such as dead tuples, commit timestamps, and origins. + */ +Datum +binary_upgrade_create_conflict_detection_slot(PG_FUNCTION_ARGS) +{ + CHECK_IS_BINARY_UPGRADE; + + CreateConflictDetectionSlot(); + + ReplicationSlotRelease(); + + PG_RETURN_VOID(); +} diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index ede10e5291efc..6298edb26b5df 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -5028,6 +5028,7 @@ getSubscriptions(Archive *fout) int i_suboriginremotelsn; int i_subenabled; int i_subfailover; + int i_subretaindeadtuples; int i, ntups; @@ -5100,10 +5101,17 @@ getSubscriptions(Archive *fout) if (fout->remoteVersion >= 170000) appendPQExpBufferStr(query, - " s.subfailover\n"); + " s.subfailover,\n"); else appendPQExpBufferStr(query, - " false AS subfailover\n"); + " false AS subfailover,\n"); + + if (fout->remoteVersion >= 190000) + appendPQExpBufferStr(query, + " s.subretaindeadtuples\n"); + else + appendPQExpBufferStr(query, + " false AS subretaindeadtuples\n"); appendPQExpBufferStr(query, "FROM pg_subscription s\n"); @@ -5137,6 +5145,7 @@ getSubscriptions(Archive *fout) i_subpasswordrequired = PQfnumber(res, "subpasswordrequired"); i_subrunasowner = PQfnumber(res, "subrunasowner"); i_subfailover = PQfnumber(res, "subfailover"); + i_subretaindeadtuples = PQfnumber(res, "subretaindeadtuples"); i_subconninfo = PQfnumber(res, "subconninfo"); i_subslotname = PQfnumber(res, "subslotname"); i_subsynccommit = PQfnumber(res, "subsynccommit"); @@ -5170,6 +5179,8 @@ getSubscriptions(Archive *fout) (strcmp(PQgetvalue(res, i, i_subrunasowner), "t") == 0); subinfo[i].subfailover = (strcmp(PQgetvalue(res, i, i_subfailover), "t") == 0); + subinfo[i].subretaindeadtuples = + (strcmp(PQgetvalue(res, i, i_subretaindeadtuples), "t") == 0); subinfo[i].subconninfo = pg_strdup(PQgetvalue(res, i, i_subconninfo)); if (PQgetisnull(res, i, i_subslotname)) @@ -5428,6 +5439,9 @@ dumpSubscription(Archive *fout, const SubscriptionInfo *subinfo) if (subinfo->subfailover) appendPQExpBufferStr(query, ", failover = true"); + if (subinfo->subretaindeadtuples) + appendPQExpBufferStr(query, ", retain_dead_tuples = true"); + if (strcmp(subinfo->subsynccommit, "off") != 0) appendPQExpBuffer(query, ", synchronous_commit = %s", fmtId(subinfo->subsynccommit)); diff --git a/src/bin/pg_dump/pg_dump.h b/src/bin/pg_dump/pg_dump.h index 2370c98d192a6..93a4475d51b80 100644 --- a/src/bin/pg_dump/pg_dump.h +++ b/src/bin/pg_dump/pg_dump.h @@ -711,6 +711,7 @@ typedef struct _SubscriptionInfo bool subpasswordrequired; bool subrunasowner; bool subfailover; + bool subretaindeadtuples; char *subconninfo; char *subslotname; char *subsynccommit; diff --git a/src/bin/pg_upgrade/check.c b/src/bin/pg_upgrade/check.c index 30579ef2051ba..5e6403f07731b 100644 --- a/src/bin/pg_upgrade/check.c +++ b/src/bin/pg_upgrade/check.c @@ -28,7 +28,7 @@ static void check_for_pg_role_prefix(ClusterInfo *cluster); static void check_for_new_tablespace_dir(void); static void check_for_user_defined_encoding_conversions(ClusterInfo *cluster); static void check_for_unicode_update(ClusterInfo *cluster); -static void check_new_cluster_logical_replication_slots(void); +static void check_new_cluster_replication_slots(void); static void check_new_cluster_subscription_configuration(void); static void check_old_cluster_for_valid_slots(void); static void check_old_cluster_subscription_state(void); @@ -631,7 +631,7 @@ check_and_dump_old_cluster(void) * Before that the logical slots are not upgraded, so we will not be * able to upgrade the logical replication clusters completely. */ - get_subscription_count(&old_cluster); + get_subscription_info(&old_cluster); check_old_cluster_subscription_state(); } @@ -764,7 +764,7 @@ check_new_cluster(void) check_for_new_tablespace_dir(); - check_new_cluster_logical_replication_slots(); + check_new_cluster_replication_slots(); check_new_cluster_subscription_configuration(); } @@ -2040,48 +2040,80 @@ check_for_unicode_update(ClusterInfo *cluster) } /* - * check_new_cluster_logical_replication_slots() + * check_new_cluster_replication_slots() * - * Verify that there are no logical replication slots on the new cluster and - * that the parameter settings necessary for creating slots are sufficient. + * Validate the new cluster's readiness for migrating replication slots: + * - Ensures no existing logical replication slots on the new cluster when + * migrating logical slots. + * - Ensure conflict detection slot does not exist on the new cluster when + * migrating subscriptions with retain_dead_tuples enabled. + * - Ensure that the parameter settings on the new cluster necessary for + * creating slots are sufficient. */ static void -check_new_cluster_logical_replication_slots(void) +check_new_cluster_replication_slots(void) { PGresult *res; PGconn *conn; int nslots_on_old; int nslots_on_new; + int rdt_slot_on_new; int max_replication_slots; char *wal_level; + int i_nslots_on_new; + int i_rdt_slot_on_new; - /* Logical slots can be migrated since PG17. */ + /* + * Logical slots can be migrated since PG17 and a physical slot + * CONFLICT_DETECTION_SLOT can be migrated since PG19. + */ if (GET_MAJOR_VERSION(old_cluster.major_version) <= 1600) return; nslots_on_old = count_old_cluster_logical_slots(); - /* Quick return if there are no logical slots to be migrated. */ - if (nslots_on_old == 0) + /* + * Quick return if there are no slots to be migrated and no subscriptions + * have the retain_dead_tuples option enabled. + */ + if (nslots_on_old == 0 && !old_cluster.sub_retain_dead_tuples) return; conn = connectToServer(&new_cluster, "template1"); - prep_status("Checking for new cluster logical replication slots"); + prep_status("Checking for new cluster replication slots"); - res = executeQueryOrDie(conn, "SELECT count(*) " - "FROM pg_catalog.pg_replication_slots " - "WHERE slot_type = 'logical' AND " - "temporary IS FALSE;"); + res = executeQueryOrDie(conn, "SELECT %s AS nslots_on_new, %s AS rdt_slot_on_new " + "FROM pg_catalog.pg_replication_slots", + nslots_on_old > 0 + ? "COUNT(*) FILTER (WHERE slot_type = 'logical' AND temporary IS FALSE)" + : "0", + old_cluster.sub_retain_dead_tuples + ? "COUNT(*) FILTER (WHERE slot_name = 'pg_conflict_detection')" + : "0"); if (PQntuples(res) != 1) - pg_fatal("could not count the number of logical replication slots"); + pg_fatal("could not count the number of replication slots"); - nslots_on_new = atoi(PQgetvalue(res, 0, 0)); + i_nslots_on_new = PQfnumber(res, "nslots_on_new"); + i_rdt_slot_on_new = PQfnumber(res, "rdt_slot_on_new"); + + nslots_on_new = atoi(PQgetvalue(res, 0, i_nslots_on_new)); if (nslots_on_new) + { + Assert(nslots_on_old); pg_fatal("expected 0 logical replication slots but found %d", nslots_on_new); + } + + rdt_slot_on_new = atoi(PQgetvalue(res, 0, i_rdt_slot_on_new)); + + if (rdt_slot_on_new) + { + Assert(old_cluster.sub_retain_dead_tuples); + pg_fatal("The replication slot \"pg_conflict_detection\" already exists on the new cluster"); + } PQclear(res); @@ -2094,12 +2126,24 @@ check_new_cluster_logical_replication_slots(void) wal_level = PQgetvalue(res, 0, 0); - if (strcmp(wal_level, "logical") != 0) + if (nslots_on_old > 0 && strcmp(wal_level, "logical") != 0) pg_fatal("\"wal_level\" must be \"logical\" but is set to \"%s\"", wal_level); + if (old_cluster.sub_retain_dead_tuples && + strcmp(wal_level, "minimal") == 0) + pg_fatal("\"wal_level\" must be \"replica\" or \"logical\" but is set to \"%s\"", + wal_level); + max_replication_slots = atoi(PQgetvalue(res, 1, 0)); + if (old_cluster.sub_retain_dead_tuples && + nslots_on_old + 1 > max_replication_slots) + pg_fatal("\"max_replication_slots\" (%d) must be greater than or equal to the number of " + "logical replication slots on the old cluster plus one additional slot required " + "for retaining conflict detection information (%d)", + max_replication_slots, nslots_on_old + 1); + if (nslots_on_old > max_replication_slots) pg_fatal("\"max_replication_slots\" (%d) must be greater than or equal to the number of " "logical replication slots (%d) on the old cluster", @@ -2211,6 +2255,22 @@ check_old_cluster_for_valid_slots(void) "The slot \"%s\" has not consumed the WAL yet\n", slot->slotname); } + + /* + * The name "pg_conflict_detection" (defined as + * CONFLICT_DETECTION_SLOT) has been reserved for logical + * replication conflict detection slot since PG19. + */ + if (strcmp(slot->slotname, "pg_conflict_detection") == 0) + { + if (script == NULL && + (script = fopen_priv(output_path, "w")) == NULL) + pg_fatal("could not open file \"%s\": %m", output_path); + + fprintf(script, + "The slot name \"%s\" is reserved\n", + slot->slotname); + } } } diff --git a/src/bin/pg_upgrade/info.c b/src/bin/pg_upgrade/info.c index 4b7a56f5b3be4..a437067cdca82 100644 --- a/src/bin/pg_upgrade/info.c +++ b/src/bin/pg_upgrade/info.c @@ -752,20 +752,33 @@ count_old_cluster_logical_slots(void) } /* - * get_subscription_count() + * get_subscription_info() * - * Gets the number of subscriptions in the cluster. + * Gets the information of subscriptions in the cluster. */ void -get_subscription_count(ClusterInfo *cluster) +get_subscription_info(ClusterInfo *cluster) { PGconn *conn; PGresult *res; + int i_nsub; + int i_retain_dead_tuples; conn = connectToServer(cluster, "template1"); - res = executeQueryOrDie(conn, "SELECT count(*) " - "FROM pg_catalog.pg_subscription"); - cluster->nsubs = atoi(PQgetvalue(res, 0, 0)); + if (GET_MAJOR_VERSION(cluster->major_version) >= 1900) + res = executeQueryOrDie(conn, "SELECT count(*) AS nsub," + "COUNT(CASE WHEN subretaindeadtuples THEN 1 END) > 0 AS retain_dead_tuples " + "FROM pg_catalog.pg_subscription"); + else + res = executeQueryOrDie(conn, "SELECT count(*) AS nsub," + "'f' AS retain_dead_tuples " + "FROM pg_catalog.pg_subscription"); + + i_nsub = PQfnumber(res, "nsub"); + i_retain_dead_tuples = PQfnumber(res, "retain_dead_tuples"); + + cluster->nsubs = atoi(PQgetvalue(res, 0, i_nsub)); + cluster->sub_retain_dead_tuples = (strcmp(PQgetvalue(res, 0, i_retain_dead_tuples), "t") == 0); PQclear(res); PQfinish(conn); diff --git a/src/bin/pg_upgrade/pg_upgrade.c b/src/bin/pg_upgrade/pg_upgrade.c index 536e49d26168b..d5cd5bf0b3a6b 100644 --- a/src/bin/pg_upgrade/pg_upgrade.c +++ b/src/bin/pg_upgrade/pg_upgrade.c @@ -67,6 +67,7 @@ static void set_frozenxids(bool minmxid_only); static void make_outputdirs(char *pgdata); static void setup(char *argv0); static void create_logical_replication_slots(void); +static void create_conflict_detection_slot(void); ClusterInfo old_cluster, new_cluster; @@ -88,6 +89,7 @@ int main(int argc, char **argv) { char *deletion_script_file_name = NULL; + bool migrate_logical_slots; /* * pg_upgrade doesn't currently use common/logging.c, but initialize it @@ -198,18 +200,39 @@ main(int argc, char **argv) new_cluster.pgdata); check_ok(); + migrate_logical_slots = count_old_cluster_logical_slots(); + /* - * Migrate the logical slots to the new cluster. Note that we need to do - * this after resetting WAL because otherwise the required WAL would be - * removed and slots would become unusable. There is a possibility that - * background processes might generate some WAL before we could create the - * slots in the new cluster but we can ignore that WAL as that won't be - * required downstream. + * Migrate replication slots to the new cluster. + * + * Note that we must migrate logical slots after resetting WAL because + * otherwise the required WAL would be removed and slots would become + * unusable. There is a possibility that background processes might + * generate some WAL before we could create the slots in the new cluster + * but we can ignore that WAL as that won't be required downstream. + * + * The conflict detection slot is not affected by concerns related to WALs + * as it only retains the dead tuples. It is created here for consistency. + * Note that the new conflict detection slot uses the latest transaction + * ID as xmin, so it cannot protect dead tuples that existed before the + * upgrade. Additionally, commit timestamps and origin data are not + * preserved during the upgrade. So, even after creating the slot, the + * upgraded subscriber may be unable to detect conflicts or log relevant + * commit timestamps and origins when applying changes from the publisher + * occurred before the upgrade especially if those changes were not + * replicated. It can only protect tuples that might be deleted after the + * new cluster starts. */ - if (count_old_cluster_logical_slots()) + if (migrate_logical_slots || old_cluster.sub_retain_dead_tuples) { start_postmaster(&new_cluster, true); - create_logical_replication_slots(); + + if (migrate_logical_slots) + create_logical_replication_slots(); + + if (old_cluster.sub_retain_dead_tuples) + create_conflict_detection_slot(); + stop_postmaster(false); } @@ -1025,3 +1048,24 @@ create_logical_replication_slots(void) return; } + +/* + * create_conflict_detection_slot() + * + * Create a replication slot to retain information necessary for conflict + * detection such as dead tuples, commit timestamps, and origins, for migrated + * subscriptions with retain_dead_tuples enabled. + */ +static void +create_conflict_detection_slot(void) +{ + PGconn *conn_new_template1; + + prep_status("Creating the replication conflict detection slot"); + + conn_new_template1 = connectToServer(&new_cluster, "template1"); + PQclear(executeQueryOrDie(conn_new_template1, "SELECT pg_catalog.binary_upgrade_create_conflict_detection_slot()")); + PQfinish(conn_new_template1); + + check_ok(); +} diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h index 69c965bb7d09a..e9401430e697f 100644 --- a/src/bin/pg_upgrade/pg_upgrade.h +++ b/src/bin/pg_upgrade/pg_upgrade.h @@ -302,6 +302,8 @@ typedef struct uint32 bin_version; /* version returned from pg_ctl */ const char *tablespace_suffix; /* directory specification */ int nsubs; /* number of subscriptions */ + bool sub_retain_dead_tuples; /* whether a subscription enables + * retain_dead_tuples. */ } ClusterInfo; @@ -441,7 +443,7 @@ FileNameMap *gen_db_file_maps(DbInfo *old_db, const char *new_pgdata); void get_db_rel_and_slot_infos(ClusterInfo *cluster); int count_old_cluster_logical_slots(void); -void get_subscription_count(ClusterInfo *cluster); +void get_subscription_info(ClusterInfo *cluster); /* option.c */ diff --git a/src/bin/pg_upgrade/t/004_subscription.pl b/src/bin/pg_upgrade/t/004_subscription.pl index e46f02c6cc612..77387be0f9d56 100644 --- a/src/bin/pg_upgrade/t/004_subscription.pl +++ b/src/bin/pg_upgrade/t/004_subscription.pl @@ -22,13 +22,13 @@ # Initialize the old subscriber node my $old_sub = PostgreSQL::Test::Cluster->new('old_sub'); -$old_sub->init; +$old_sub->init(allows_streaming => 'physical'); $old_sub->start; my $oldbindir = $old_sub->config_data('--bindir'); # Initialize the new subscriber my $new_sub = PostgreSQL::Test::Cluster->new('new_sub'); -$new_sub->init; +$new_sub->init(allows_streaming => 'physical'); my $newbindir = $new_sub->config_data('--bindir'); # In a VPATH build, we'll be started in the source directory, but we want @@ -89,6 +89,54 @@ $old_sub->start; $old_sub->safe_psql('postgres', "DROP SUBSCRIPTION regress_sub1;"); +# ------------------------------------------------------ +# Check that pg_upgrade fails when max_replication_slots configured in the new +# cluster is less than the number of logical slots in the old cluster + 1 when +# subscription's retain_dead_tuples option is enabled. +# ------------------------------------------------------ +# It is sufficient to use disabled subscription to test upgrade failure. + +$publisher->safe_psql('postgres', "CREATE PUBLICATION regress_pub1"); +$old_sub->safe_psql('postgres', + "CREATE SUBSCRIPTION regress_sub1 CONNECTION '$connstr' PUBLICATION regress_pub1 WITH (enabled = false, retain_dead_tuples = true)" +); + +$old_sub->stop; + +$new_sub->append_conf('postgresql.conf', 'max_replication_slots = 0'); + +# pg_upgrade will fail because the new cluster has insufficient +# max_replication_slots. +command_checks_all( + [ + 'pg_upgrade', + '--no-sync', + '--old-datadir' => $old_sub->data_dir, + '--new-datadir' => $new_sub->data_dir, + '--old-bindir' => $oldbindir, + '--new-bindir' => $newbindir, + '--socketdir' => $new_sub->host, + '--old-port' => $old_sub->port, + '--new-port' => $new_sub->port, + $mode, + '--check', + ], + 1, + [ + qr/"max_replication_slots" \(0\) must be greater than or equal to the number of logical replication slots on the old cluster plus one additional slot required for retaining conflict detection information \(1\)/ + ], + [qr//], + 'run of pg_upgrade where the new cluster has insufficient max_replication_slots' +); + +# Reset max_replication_slots +$new_sub->append_conf('postgresql.conf', 'max_replication_slots = 10'); + +# Cleanup +$publisher->safe_psql('postgres', "DROP PUBLICATION regress_pub1"); +$old_sub->start; +$old_sub->safe_psql('postgres', "DROP SUBSCRIPTION regress_sub1;"); + # ------------------------------------------------------ # Check that pg_upgrade refuses to run if: # a) there's a subscription with tables in a state other than 'r' (ready) or @@ -200,8 +248,9 @@ rmtree($new_sub->data_dir . "/pg_upgrade_output.d"); # Verify that the upgrade should be successful with tables in 'ready'/'init' -# state along with retaining the replication origin's remote lsn, subscription's -# running status, and failover option. +# state along with retaining the replication origin's remote lsn, +# subscription's running status, failover option, and retain_dead_tuples +# option. $publisher->safe_psql( 'postgres', qq[ CREATE TABLE tab_upgraded1(id int); @@ -211,7 +260,7 @@ $old_sub->safe_psql( 'postgres', qq[ CREATE TABLE tab_upgraded1(id int); - CREATE SUBSCRIPTION regress_sub4 CONNECTION '$connstr' PUBLICATION regress_pub4 WITH (failover = true); + CREATE SUBSCRIPTION regress_sub4 CONNECTION '$connstr' PUBLICATION regress_pub4 WITH (failover = true, retain_dead_tuples = true); ]); # Wait till the table tab_upgraded1 reaches 'ready' state @@ -270,7 +319,8 @@ # Check that pg_upgrade is successful when all tables are in ready or in # init state (tab_upgraded1 table is in ready state and tab_upgraded2 table is # in init state) along with retaining the replication origin's remote lsn, -# subscription's running status, and failover option. +# subscription's running status, failover option, and retain_dead_tuples +# option. # ------------------------------------------------------ command_ok( [ @@ -293,7 +343,8 @@ # ------------------------------------------------------ # Check that the data inserted to the publisher when the new subscriber is down # will be replicated once it is started. Also check that the old subscription -# states and relations origins are all preserved. +# states and relations origins are all preserved, and that the conflict +# detection slot is created. # ------------------------------------------------------ $publisher->safe_psql( 'postgres', qq[ @@ -303,15 +354,16 @@ $new_sub->start; -# The subscription's running status and failover option should be preserved -# in the upgraded instance. So regress_sub4 should still have subenabled and -# subfailover set to true, while regress_sub5 should have both set to false. +# The subscription's running status, failover option, and retain_dead_tuples +# option should be preserved in the upgraded instance. So regress_sub4 should +# still have subenabled, subfailover, and subretaindeadtuples set to true, +# while regress_sub5 should have both set to false. $result = $new_sub->safe_psql('postgres', - "SELECT subname, subenabled, subfailover FROM pg_subscription ORDER BY subname" + "SELECT subname, subenabled, subfailover, subretaindeadtuples FROM pg_subscription ORDER BY subname" ); -is( $result, qq(regress_sub4|t|t -regress_sub5|f|f), - "check that the subscription's running status and failover are preserved" +is( $result, qq(regress_sub4|t|t|t +regress_sub5|f|f|f), + "check that the subscription's running status, failover, and retain_dead_tuples are preserved" ); # Subscription relations should be preserved @@ -330,6 +382,11 @@ ); is($result, qq($remote_lsn), "remote_lsn should have been preserved"); +# The conflict detection slot should be created +$result = $new_sub->safe_psql('postgres', + "SELECT xmin IS NOT NULL from pg_replication_slots WHERE slot_name = 'pg_conflict_detection'"); +is($result, qq(t), "conflict detection slot exists"); + # Resume the initial sync and wait until all tables of subscription # 'regress_sub5' are synchronized $new_sub->append_conf('postgresql.conf', diff --git a/src/bin/psql/describe.c b/src/bin/psql/describe.c index dd25d2fe7b8a7..7a06af48842d8 100644 --- a/src/bin/psql/describe.c +++ b/src/bin/psql/describe.c @@ -6746,7 +6746,7 @@ describeSubscriptions(const char *pattern, bool verbose) printQueryOpt myopt = pset.popt; static const bool translate_columns[] = {false, false, false, false, false, false, false, false, false, false, false, false, false, false, - false}; + false, false}; if (pset.sversion < 100000) { @@ -6814,6 +6814,10 @@ describeSubscriptions(const char *pattern, bool verbose) appendPQExpBuffer(&buf, ", subfailover AS \"%s\"\n", gettext_noop("Failover")); + if (pset.sversion >= 190000) + appendPQExpBuffer(&buf, + ", subretaindeadtuples AS \"%s\"\n", + gettext_noop("Retain dead tuples")); appendPQExpBuffer(&buf, ", subsynccommit AS \"%s\"\n" diff --git a/src/bin/psql/tab-complete.in.c b/src/bin/psql/tab-complete.in.c index 37524364290ce..dbc586c5bc370 100644 --- a/src/bin/psql/tab-complete.in.c +++ b/src/bin/psql/tab-complete.in.c @@ -2319,8 +2319,9 @@ match_previous_words(int pattern_id, /* ALTER SUBSCRIPTION SET ( */ else if (Matches("ALTER", "SUBSCRIPTION", MatchAny, MatchAnyN, "SET", "(")) COMPLETE_WITH("binary", "disable_on_error", "failover", "origin", - "password_required", "run_as_owner", "slot_name", - "streaming", "synchronous_commit", "two_phase"); + "password_required", "retain_dead_tuples", + "run_as_owner", "slot_name", "streaming", + "synchronous_commit", "two_phase"); /* ALTER SUBSCRIPTION SKIP ( */ else if (Matches("ALTER", "SUBSCRIPTION", MatchAny, MatchAnyN, "SKIP", "(")) COMPLETE_WITH("lsn"); @@ -3774,8 +3775,9 @@ match_previous_words(int pattern_id, else if (Matches("CREATE", "SUBSCRIPTION", MatchAnyN, "WITH", "(")) COMPLETE_WITH("binary", "connect", "copy_data", "create_slot", "disable_on_error", "enabled", "failover", "origin", - "password_required", "run_as_owner", "slot_name", - "streaming", "synchronous_commit", "two_phase"); + "password_required", "retain_dead_tuples", + "run_as_owner", "slot_name", "streaming", + "synchronous_commit", "two_phase"); /* CREATE TRIGGER --- is allowed inside CREATE SCHEMA, so use TailMatches */ diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index a3f3315fed997..5173d422d468a 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -57,6 +57,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 202507091 +#define CATALOG_VERSION_NO 202507231 #endif diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 1fc19146f4674..3ee8fed7e537f 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -11801,6 +11801,10 @@ proname => 'binary_upgrade_replorigin_advance', proisstrict => 'f', provolatile => 'v', proparallel => 'u', prorettype => 'void', proargtypes => 'text pg_lsn', prosrc => 'binary_upgrade_replorigin_advance' }, +{ oid => '9159', descr => 'for use by pg_upgrade (conflict detection slot)', + proname => 'binary_upgrade_create_conflict_detection_slot', proisstrict => 'f', + provolatile => 'v', proparallel => 'u', prorettype => 'void', + proargtypes => '', prosrc => 'binary_upgrade_create_conflict_detection_slot' }, # conversion functions { oid => '4302', diff --git a/src/include/catalog/pg_subscription.h b/src/include/catalog/pg_subscription.h index 20fc329992dc5..231ef84ec9a6f 100644 --- a/src/include/catalog/pg_subscription.h +++ b/src/include/catalog/pg_subscription.h @@ -78,6 +78,9 @@ CATALOG(pg_subscription,6100,SubscriptionRelationId) BKI_SHARED_RELATION BKI_ROW * slots) in the upstream database are enabled * to be synchronized to the standbys. */ + bool subretaindeadtuples; /* True if dead tuples useful for + * conflict detection are retained */ + #ifdef CATALOG_VARLEN /* variable-length fields start here */ /* Connection string to the publisher */ text subconninfo BKI_FORCE_NOT_NULL; @@ -131,6 +134,8 @@ typedef struct Subscription * (i.e. the main slot and the table sync * slots) in the upstream database are enabled * to be synchronized to the standbys. */ + bool retaindeadtuples; /* True if dead tuples useful for conflict + * detection are retained */ char *conninfo; /* Connection string to the publisher */ char *slotname; /* Name of the replication slot */ char *synccommit; /* Synchronous commit setting for worker */ diff --git a/src/include/commands/subscriptioncmds.h b/src/include/commands/subscriptioncmds.h index c2262e46a7f5c..9b288ad22a623 100644 --- a/src/include/commands/subscriptioncmds.h +++ b/src/include/commands/subscriptioncmds.h @@ -28,4 +28,9 @@ extern void AlterSubscriptionOwner_oid(Oid subid, Oid newOwnerId); extern char defGetStreamingMode(DefElem *def); +extern ObjectAddress AlterSubscription(ParseState *pstate, AlterSubscriptionStmt *stmt, bool isTopLevel); + +extern void CheckSubDeadTupleRetention(bool check_guc, bool sub_disabled, + int elevel_for_sub_disabled); + #endif /* SUBSCRIPTIONCMDS_H */ diff --git a/src/include/replication/logicallauncher.h b/src/include/replication/logicallauncher.h index 82b202f330515..b29453e8e4f69 100644 --- a/src/include/replication/logicallauncher.h +++ b/src/include/replication/logicallauncher.h @@ -25,8 +25,11 @@ extern void ApplyLauncherShmemInit(void); extern void ApplyLauncherForgetWorkerStartTime(Oid subid); extern void ApplyLauncherWakeupAtCommit(void); +extern void ApplyLauncherWakeup(void); extern void AtEOXact_ApplyLauncher(bool isCommit); +extern void CreateConflictDetectionSlot(void); + extern bool IsLogicalLauncher(void); extern pid_t GetLeaderApplyWorkerPid(pid_t pid); diff --git a/src/include/replication/slot.h b/src/include/replication/slot.h index 19b4e8b6a030e..e8fc342d1a96e 100644 --- a/src/include/replication/slot.h +++ b/src/include/replication/slot.h @@ -20,6 +20,13 @@ /* directory to store replication slot data in */ #define PG_REPLSLOT_DIR "pg_replslot" +/* + * The reserved name for a replication slot used to retain dead tuples for + * conflict detection in logical replication. See + * maybe_advance_nonremovable_xid() for detail. + */ +#define CONFLICT_DETECTION_SLOT "pg_conflict_detection" + /* * Behaviour of replication slots, upon release or crash. * @@ -311,7 +318,9 @@ extern void ReplicationSlotMarkDirty(void); /* misc stuff */ extern void ReplicationSlotInitialize(void); -extern bool ReplicationSlotValidateName(const char *name, int elevel); +extern bool ReplicationSlotValidateName(const char *name, + bool allow_reserved_name, + int elevel); extern void ReplicationSlotReserveWal(void); extern void ReplicationSlotsComputeRequiredXmin(bool already_locked); extern void ReplicationSlotsComputeRequiredLSN(void); diff --git a/src/include/replication/worker_internal.h b/src/include/replication/worker_internal.h index 30b2775952c38..0c7b8440a61e3 100644 --- a/src/include/replication/worker_internal.h +++ b/src/include/replication/worker_internal.h @@ -86,6 +86,16 @@ typedef struct LogicalRepWorker /* Indicates whether apply can be performed in parallel. */ bool parallel_apply; + /* + * The changes made by this and later transactions must be retained to + * ensure reliable conflict detection during the apply phase. + * + * The logical replication launcher manages an internal replication slot + * named "pg_conflict_detection". It asynchronously collects this ID to + * decide when to advance the xmin value of the slot. + */ + TransactionId oldest_nonremovable_xid; + /* Stats. */ XLogRecPtr last_lsn; TimestampTz last_send_time; @@ -245,7 +255,8 @@ extern List *logicalrep_workers_find(Oid subid, bool only_running, extern bool logicalrep_worker_launch(LogicalRepWorkerType wtype, Oid dbid, Oid subid, const char *subname, Oid userid, Oid relid, - dsm_handle subworker_dsm); + dsm_handle subworker_dsm, + bool retain_dead_tuples); extern void logicalrep_worker_stop(Oid subid, Oid relid); extern void logicalrep_pa_worker_stop(ParallelApplyWorkerInfo *winfo); extern void logicalrep_worker_wakeup(Oid subid, Oid relid); diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h index 9f9b3fcfbf1d7..c6f5ebceefdd5 100644 --- a/src/include/storage/proc.h +++ b/src/include/storage/proc.h @@ -130,9 +130,17 @@ extern PGDLLIMPORT int FastPathLockGroupsPerBackend; * the checkpoint are actually destroyed on disk. Replay can cope with a file * or block that doesn't exist, but not with a block that has the wrong * contents. + * + * Setting DELAY_CHKPT_IN_COMMIT is similar to setting DELAY_CHKPT_START, but + * it explicitly indicates that the reason for delaying the checkpoint is due + * to a transaction being within a critical commit section. We need this new + * flag to ensure all the transactions that have acquired commit timestamp are + * finished before we allow the logical replication client to advance its xid + * which is used to hold back dead rows for conflict detection. */ #define DELAY_CHKPT_START (1<<0) #define DELAY_CHKPT_COMPLETE (1<<1) +#define DELAY_CHKPT_IN_COMMIT (DELAY_CHKPT_START | 1<<2) typedef enum { diff --git a/src/include/storage/procarray.h b/src/include/storage/procarray.h index e4877d88e8f9e..2f4ae06c27932 100644 --- a/src/include/storage/procarray.h +++ b/src/include/storage/procarray.h @@ -55,7 +55,8 @@ extern RunningTransactions GetRunningTransactionData(void); extern bool TransactionIdIsInProgress(TransactionId xid); extern TransactionId GetOldestNonRemovableTransactionId(Relation rel); extern TransactionId GetOldestTransactionIdConsideredRunning(void); -extern TransactionId GetOldestActiveTransactionId(void); +extern TransactionId GetOldestActiveTransactionId(bool inCommitOnly, + bool allDbs); extern TransactionId GetOldestSafeDecodingTransactionId(bool catalogOnly); extern void GetReplicationHorizons(TransactionId *xmin, TransactionId *catalog_xmin); diff --git a/src/test/regress/expected/subscription.out b/src/test/regress/expected/subscription.out index 529b22417319d..a98c97f761689 100644 --- a/src/test/regress/expected/subscription.out +++ b/src/test/regress/expected/subscription.out @@ -116,18 +116,18 @@ CREATE SUBSCRIPTION regress_testsub4 CONNECTION 'dbname=regress_doesnotexist' PU WARNING: subscription was created, but is not connected HINT: To initiate replication, you must manually create the replication slot, enable the subscription, and refresh the subscription. \dRs+ regress_testsub4 - List of subscriptions - Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Synchronous commit | Conninfo | Skip LSN -------------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+-----------------------------+------------ - regress_testsub4 | regress_subscription_user | f | {testpub} | f | parallel | d | f | none | t | f | f | off | dbname=regress_doesnotexist | 0/00000000 + List of subscriptions + Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Retain dead tuples | Synchronous commit | Conninfo | Skip LSN +------------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+--------------------+-----------------------------+------------ + regress_testsub4 | regress_subscription_user | f | {testpub} | f | parallel | d | f | none | t | f | f | f | off | dbname=regress_doesnotexist | 0/00000000 (1 row) ALTER SUBSCRIPTION regress_testsub4 SET (origin = any); \dRs+ regress_testsub4 - List of subscriptions - Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Synchronous commit | Conninfo | Skip LSN -------------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+-----------------------------+------------ - regress_testsub4 | regress_subscription_user | f | {testpub} | f | parallel | d | f | any | t | f | f | off | dbname=regress_doesnotexist | 0/00000000 + List of subscriptions + Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Retain dead tuples | Synchronous commit | Conninfo | Skip LSN +------------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+--------------------+-----------------------------+------------ + regress_testsub4 | regress_subscription_user | f | {testpub} | f | parallel | d | f | any | t | f | f | f | off | dbname=regress_doesnotexist | 0/00000000 (1 row) DROP SUBSCRIPTION regress_testsub3; @@ -145,10 +145,10 @@ ALTER SUBSCRIPTION regress_testsub CONNECTION 'foobar'; ERROR: invalid connection string syntax: missing "=" after "foobar" in connection info string \dRs+ - List of subscriptions - Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Synchronous commit | Conninfo | Skip LSN ------------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+-----------------------------+------------ - regress_testsub | regress_subscription_user | f | {testpub} | f | parallel | d | f | any | t | f | f | off | dbname=regress_doesnotexist | 0/00000000 + List of subscriptions + Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Retain dead tuples | Synchronous commit | Conninfo | Skip LSN +-----------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+--------------------+-----------------------------+------------ + regress_testsub | regress_subscription_user | f | {testpub} | f | parallel | d | f | any | t | f | f | f | off | dbname=regress_doesnotexist | 0/00000000 (1 row) ALTER SUBSCRIPTION regress_testsub SET PUBLICATION testpub2, testpub3 WITH (refresh = false); @@ -157,10 +157,10 @@ ALTER SUBSCRIPTION regress_testsub SET (slot_name = 'newname'); ALTER SUBSCRIPTION regress_testsub SET (password_required = false); ALTER SUBSCRIPTION regress_testsub SET (run_as_owner = true); \dRs+ - List of subscriptions - Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Synchronous commit | Conninfo | Skip LSN ------------------+---------------------------+---------+---------------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+------------------------------+------------ - regress_testsub | regress_subscription_user | f | {testpub2,testpub3} | f | parallel | d | f | any | f | t | f | off | dbname=regress_doesnotexist2 | 0/00000000 + List of subscriptions + Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Retain dead tuples | Synchronous commit | Conninfo | Skip LSN +-----------------+---------------------------+---------+---------------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+--------------------+------------------------------+------------ + regress_testsub | regress_subscription_user | f | {testpub2,testpub3} | f | parallel | d | f | any | f | t | f | f | off | dbname=regress_doesnotexist2 | 0/00000000 (1 row) ALTER SUBSCRIPTION regress_testsub SET (password_required = true); @@ -176,10 +176,10 @@ ERROR: unrecognized subscription parameter: "create_slot" -- ok ALTER SUBSCRIPTION regress_testsub SKIP (lsn = '0/12345'); \dRs+ - List of subscriptions - Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Synchronous commit | Conninfo | Skip LSN ------------------+---------------------------+---------+---------------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+------------------------------+------------ - regress_testsub | regress_subscription_user | f | {testpub2,testpub3} | f | parallel | d | f | any | t | f | f | off | dbname=regress_doesnotexist2 | 0/00012345 + List of subscriptions + Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Retain dead tuples | Synchronous commit | Conninfo | Skip LSN +-----------------+---------------------------+---------+---------------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+--------------------+------------------------------+------------ + regress_testsub | regress_subscription_user | f | {testpub2,testpub3} | f | parallel | d | f | any | t | f | f | f | off | dbname=regress_doesnotexist2 | 0/00012345 (1 row) -- ok - with lsn = NONE @@ -188,10 +188,10 @@ ALTER SUBSCRIPTION regress_testsub SKIP (lsn = NONE); ALTER SUBSCRIPTION regress_testsub SKIP (lsn = '0/0'); ERROR: invalid WAL location (LSN): 0/0 \dRs+ - List of subscriptions - Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Synchronous commit | Conninfo | Skip LSN ------------------+---------------------------+---------+---------------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+------------------------------+------------ - regress_testsub | regress_subscription_user | f | {testpub2,testpub3} | f | parallel | d | f | any | t | f | f | off | dbname=regress_doesnotexist2 | 0/00000000 + List of subscriptions + Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Retain dead tuples | Synchronous commit | Conninfo | Skip LSN +-----------------+---------------------------+---------+---------------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+--------------------+------------------------------+------------ + regress_testsub | regress_subscription_user | f | {testpub2,testpub3} | f | parallel | d | f | any | t | f | f | f | off | dbname=regress_doesnotexist2 | 0/00000000 (1 row) BEGIN; @@ -223,10 +223,10 @@ ALTER SUBSCRIPTION regress_testsub_foo SET (synchronous_commit = foobar); ERROR: invalid value for parameter "synchronous_commit": "foobar" HINT: Available values: local, remote_write, remote_apply, on, off. \dRs+ - List of subscriptions - Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Synchronous commit | Conninfo | Skip LSN ----------------------+---------------------------+---------+---------------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+------------------------------+------------ - regress_testsub_foo | regress_subscription_user | f | {testpub2,testpub3} | f | parallel | d | f | any | t | f | f | local | dbname=regress_doesnotexist2 | 0/00000000 + List of subscriptions + Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Retain dead tuples | Synchronous commit | Conninfo | Skip LSN +---------------------+---------------------------+---------+---------------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+--------------------+------------------------------+------------ + regress_testsub_foo | regress_subscription_user | f | {testpub2,testpub3} | f | parallel | d | f | any | t | f | f | f | local | dbname=regress_doesnotexist2 | 0/00000000 (1 row) -- rename back to keep the rest simple @@ -255,19 +255,19 @@ CREATE SUBSCRIPTION regress_testsub CONNECTION 'dbname=regress_doesnotexist' PUB WARNING: subscription was created, but is not connected HINT: To initiate replication, you must manually create the replication slot, enable the subscription, and refresh the subscription. \dRs+ - List of subscriptions - Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Synchronous commit | Conninfo | Skip LSN ------------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+-----------------------------+------------ - regress_testsub | regress_subscription_user | f | {testpub} | t | parallel | d | f | any | t | f | f | off | dbname=regress_doesnotexist | 0/00000000 + List of subscriptions + Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Retain dead tuples | Synchronous commit | Conninfo | Skip LSN +-----------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+--------------------+-----------------------------+------------ + regress_testsub | regress_subscription_user | f | {testpub} | t | parallel | d | f | any | t | f | f | f | off | dbname=regress_doesnotexist | 0/00000000 (1 row) ALTER SUBSCRIPTION regress_testsub SET (binary = false); ALTER SUBSCRIPTION regress_testsub SET (slot_name = NONE); \dRs+ - List of subscriptions - Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Synchronous commit | Conninfo | Skip LSN ------------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+-----------------------------+------------ - regress_testsub | regress_subscription_user | f | {testpub} | f | parallel | d | f | any | t | f | f | off | dbname=regress_doesnotexist | 0/00000000 + List of subscriptions + Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Retain dead tuples | Synchronous commit | Conninfo | Skip LSN +-----------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+--------------------+-----------------------------+------------ + regress_testsub | regress_subscription_user | f | {testpub} | f | parallel | d | f | any | t | f | f | f | off | dbname=regress_doesnotexist | 0/00000000 (1 row) DROP SUBSCRIPTION regress_testsub; @@ -279,27 +279,27 @@ CREATE SUBSCRIPTION regress_testsub CONNECTION 'dbname=regress_doesnotexist' PUB WARNING: subscription was created, but is not connected HINT: To initiate replication, you must manually create the replication slot, enable the subscription, and refresh the subscription. \dRs+ - List of subscriptions - Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Synchronous commit | Conninfo | Skip LSN ------------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+-----------------------------+------------ - regress_testsub | regress_subscription_user | f | {testpub} | f | on | d | f | any | t | f | f | off | dbname=regress_doesnotexist | 0/00000000 + List of subscriptions + Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Retain dead tuples | Synchronous commit | Conninfo | Skip LSN +-----------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+--------------------+-----------------------------+------------ + regress_testsub | regress_subscription_user | f | {testpub} | f | on | d | f | any | t | f | f | f | off | dbname=regress_doesnotexist | 0/00000000 (1 row) ALTER SUBSCRIPTION regress_testsub SET (streaming = parallel); \dRs+ - List of subscriptions - Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Synchronous commit | Conninfo | Skip LSN ------------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+-----------------------------+------------ - regress_testsub | regress_subscription_user | f | {testpub} | f | parallel | d | f | any | t | f | f | off | dbname=regress_doesnotexist | 0/00000000 + List of subscriptions + Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Retain dead tuples | Synchronous commit | Conninfo | Skip LSN +-----------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+--------------------+-----------------------------+------------ + regress_testsub | regress_subscription_user | f | {testpub} | f | parallel | d | f | any | t | f | f | f | off | dbname=regress_doesnotexist | 0/00000000 (1 row) ALTER SUBSCRIPTION regress_testsub SET (streaming = false); ALTER SUBSCRIPTION regress_testsub SET (slot_name = NONE); \dRs+ - List of subscriptions - Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Synchronous commit | Conninfo | Skip LSN ------------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+-----------------------------+------------ - regress_testsub | regress_subscription_user | f | {testpub} | f | off | d | f | any | t | f | f | off | dbname=regress_doesnotexist | 0/00000000 + List of subscriptions + Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Retain dead tuples | Synchronous commit | Conninfo | Skip LSN +-----------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+--------------------+-----------------------------+------------ + regress_testsub | regress_subscription_user | f | {testpub} | f | off | d | f | any | t | f | f | f | off | dbname=regress_doesnotexist | 0/00000000 (1 row) -- fail - publication already exists @@ -314,10 +314,10 @@ ALTER SUBSCRIPTION regress_testsub ADD PUBLICATION testpub1, testpub2 WITH (refr ALTER SUBSCRIPTION regress_testsub ADD PUBLICATION testpub1, testpub2 WITH (refresh = false); ERROR: publication "testpub1" is already in subscription "regress_testsub" \dRs+ - List of subscriptions - Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Synchronous commit | Conninfo | Skip LSN ------------------+---------------------------+---------+-----------------------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+-----------------------------+------------ - regress_testsub | regress_subscription_user | f | {testpub,testpub1,testpub2} | f | off | d | f | any | t | f | f | off | dbname=regress_doesnotexist | 0/00000000 + List of subscriptions + Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Retain dead tuples | Synchronous commit | Conninfo | Skip LSN +-----------------+---------------------------+---------+-----------------------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+--------------------+-----------------------------+------------ + regress_testsub | regress_subscription_user | f | {testpub,testpub1,testpub2} | f | off | d | f | any | t | f | f | f | off | dbname=regress_doesnotexist | 0/00000000 (1 row) -- fail - publication used more than once @@ -332,10 +332,10 @@ ERROR: publication "testpub3" is not in subscription "regress_testsub" -- ok - delete publications ALTER SUBSCRIPTION regress_testsub DROP PUBLICATION testpub1, testpub2 WITH (refresh = false); \dRs+ - List of subscriptions - Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Synchronous commit | Conninfo | Skip LSN ------------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+-----------------------------+------------ - regress_testsub | regress_subscription_user | f | {testpub} | f | off | d | f | any | t | f | f | off | dbname=regress_doesnotexist | 0/00000000 + List of subscriptions + Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Retain dead tuples | Synchronous commit | Conninfo | Skip LSN +-----------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+--------------------+-----------------------------+------------ + regress_testsub | regress_subscription_user | f | {testpub} | f | off | d | f | any | t | f | f | f | off | dbname=regress_doesnotexist | 0/00000000 (1 row) DROP SUBSCRIPTION regress_testsub; @@ -371,19 +371,19 @@ CREATE SUBSCRIPTION regress_testsub CONNECTION 'dbname=regress_doesnotexist' PUB WARNING: subscription was created, but is not connected HINT: To initiate replication, you must manually create the replication slot, enable the subscription, and refresh the subscription. \dRs+ - List of subscriptions - Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Synchronous commit | Conninfo | Skip LSN ------------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+-----------------------------+------------ - regress_testsub | regress_subscription_user | f | {testpub} | f | parallel | p | f | any | t | f | f | off | dbname=regress_doesnotexist | 0/00000000 + List of subscriptions + Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Retain dead tuples | Synchronous commit | Conninfo | Skip LSN +-----------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+--------------------+-----------------------------+------------ + regress_testsub | regress_subscription_user | f | {testpub} | f | parallel | p | f | any | t | f | f | f | off | dbname=regress_doesnotexist | 0/00000000 (1 row) -- we can alter streaming when two_phase enabled ALTER SUBSCRIPTION regress_testsub SET (streaming = true); \dRs+ - List of subscriptions - Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Synchronous commit | Conninfo | Skip LSN ------------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+-----------------------------+------------ - regress_testsub | regress_subscription_user | f | {testpub} | f | on | p | f | any | t | f | f | off | dbname=regress_doesnotexist | 0/00000000 + List of subscriptions + Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Retain dead tuples | Synchronous commit | Conninfo | Skip LSN +-----------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+--------------------+-----------------------------+------------ + regress_testsub | regress_subscription_user | f | {testpub} | f | on | p | f | any | t | f | f | f | off | dbname=regress_doesnotexist | 0/00000000 (1 row) ALTER SUBSCRIPTION regress_testsub SET (slot_name = NONE); @@ -393,10 +393,10 @@ CREATE SUBSCRIPTION regress_testsub CONNECTION 'dbname=regress_doesnotexist' PUB WARNING: subscription was created, but is not connected HINT: To initiate replication, you must manually create the replication slot, enable the subscription, and refresh the subscription. \dRs+ - List of subscriptions - Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Synchronous commit | Conninfo | Skip LSN ------------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+-----------------------------+------------ - regress_testsub | regress_subscription_user | f | {testpub} | f | on | p | f | any | t | f | f | off | dbname=regress_doesnotexist | 0/00000000 + List of subscriptions + Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Retain dead tuples | Synchronous commit | Conninfo | Skip LSN +-----------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+--------------------+-----------------------------+------------ + regress_testsub | regress_subscription_user | f | {testpub} | f | on | p | f | any | t | f | f | f | off | dbname=regress_doesnotexist | 0/00000000 (1 row) ALTER SUBSCRIPTION regress_testsub SET (slot_name = NONE); @@ -409,18 +409,34 @@ CREATE SUBSCRIPTION regress_testsub CONNECTION 'dbname=regress_doesnotexist' PUB WARNING: subscription was created, but is not connected HINT: To initiate replication, you must manually create the replication slot, enable the subscription, and refresh the subscription. \dRs+ - List of subscriptions - Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Synchronous commit | Conninfo | Skip LSN ------------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+-----------------------------+------------ - regress_testsub | regress_subscription_user | f | {testpub} | f | parallel | d | f | any | t | f | f | off | dbname=regress_doesnotexist | 0/00000000 + List of subscriptions + Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Retain dead tuples | Synchronous commit | Conninfo | Skip LSN +-----------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+--------------------+-----------------------------+------------ + regress_testsub | regress_subscription_user | f | {testpub} | f | parallel | d | f | any | t | f | f | f | off | dbname=regress_doesnotexist | 0/00000000 (1 row) ALTER SUBSCRIPTION regress_testsub SET (disable_on_error = true); \dRs+ - List of subscriptions - Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Synchronous commit | Conninfo | Skip LSN ------------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+-----------------------------+------------ - regress_testsub | regress_subscription_user | f | {testpub} | f | parallel | d | t | any | t | f | f | off | dbname=regress_doesnotexist | 0/00000000 + List of subscriptions + Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Retain dead tuples | Synchronous commit | Conninfo | Skip LSN +-----------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+--------------------+-----------------------------+------------ + regress_testsub | regress_subscription_user | f | {testpub} | f | parallel | d | t | any | t | f | f | f | off | dbname=regress_doesnotexist | 0/00000000 +(1 row) + +ALTER SUBSCRIPTION regress_testsub SET (slot_name = NONE); +DROP SUBSCRIPTION regress_testsub; +-- fail - retain_dead_tuples must be boolean +CREATE SUBSCRIPTION regress_testsub CONNECTION 'dbname=regress_doesnotexist' PUBLICATION testpub WITH (connect = false, retain_dead_tuples = foo); +ERROR: retain_dead_tuples requires a Boolean value +-- ok +CREATE SUBSCRIPTION regress_testsub CONNECTION 'dbname=regress_doesnotexist' PUBLICATION testpub WITH (connect = false, retain_dead_tuples = false); +WARNING: subscription was created, but is not connected +HINT: To initiate replication, you must manually create the replication slot, enable the subscription, and refresh the subscription. +\dRs+ + List of subscriptions + Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Retain dead tuples | Synchronous commit | Conninfo | Skip LSN +-----------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+--------------------+-----------------------------+------------ + regress_testsub | regress_subscription_user | f | {testpub} | f | parallel | d | f | any | t | f | f | f | off | dbname=regress_doesnotexist | 0/00000000 (1 row) ALTER SUBSCRIPTION regress_testsub SET (slot_name = NONE); diff --git a/src/test/regress/sql/subscription.sql b/src/test/regress/sql/subscription.sql index 007c9e7037463..f0f714fe747a9 100644 --- a/src/test/regress/sql/subscription.sql +++ b/src/test/regress/sql/subscription.sql @@ -287,6 +287,17 @@ ALTER SUBSCRIPTION regress_testsub SET (disable_on_error = true); ALTER SUBSCRIPTION regress_testsub SET (slot_name = NONE); DROP SUBSCRIPTION regress_testsub; +-- fail - retain_dead_tuples must be boolean +CREATE SUBSCRIPTION regress_testsub CONNECTION 'dbname=regress_doesnotexist' PUBLICATION testpub WITH (connect = false, retain_dead_tuples = foo); + +-- ok +CREATE SUBSCRIPTION regress_testsub CONNECTION 'dbname=regress_doesnotexist' PUBLICATION testpub WITH (connect = false, retain_dead_tuples = false); + +\dRs+ + +ALTER SUBSCRIPTION regress_testsub SET (slot_name = NONE); +DROP SUBSCRIPTION regress_testsub; + -- let's do some tests with pg_create_subscription rather than superuser SET SESSION AUTHORIZATION regress_subscription_user3; diff --git a/src/test/subscription/t/035_conflicts.pl b/src/test/subscription/t/035_conflicts.pl index d78a6bac16aeb..7458d7fba7e9a 100644 --- a/src/test/subscription/t/035_conflicts.pl +++ b/src/test/subscription/t/035_conflicts.pl @@ -1,6 +1,6 @@ # Copyright (c) 2025, PostgreSQL Global Development Group -# Test the conflict detection of conflict type 'multiple_unique_conflicts'. +# Test conflicts in logical replication use strict; use warnings FATAL => 'all'; use PostgreSQL::Test::Cluster; @@ -18,7 +18,7 @@ # Create a subscriber node my $node_subscriber = PostgreSQL::Test::Cluster->new('subscriber'); -$node_subscriber->init; +$node_subscriber->init(allows_streaming => 'logical'); $node_subscriber->start; # Create a table on publisher @@ -146,4 +146,195 @@ pass('multiple_unique_conflicts detected on a leaf partition during insert'); +############################################################################### +# Setup a bidirectional logical replication between node_A & node_B +############################################################################### + +# Initialize nodes. + +# node_A. Increase the log_min_messages setting to DEBUG2 to debug test +# failures. Disable autovacuum to avoid generating xid that could affect the +# replication slot's xmin value. +my $node_A = $node_publisher; +$node_A->append_conf( + 'postgresql.conf', + qq{autovacuum = off + log_min_messages = 'debug2'}); +$node_A->restart; + +# node_B +my $node_B = $node_subscriber; +$node_B->append_conf('postgresql.conf', "track_commit_timestamp = on"); +$node_B->restart; + +# Create table on node_A +$node_A->safe_psql('postgres', "CREATE TABLE tab (a int PRIMARY KEY, b int)"); + +# Create the same table on node_B +$node_B->safe_psql('postgres', "CREATE TABLE tab (a int PRIMARY KEY, b int)"); + +my $subname_AB = 'tap_sub_a_b'; +my $subname_BA = 'tap_sub_b_a'; + +# Setup logical replication +# node_A (pub) -> node_B (sub) +my $node_A_connstr = $node_A->connstr . ' dbname=postgres'; +$node_A->safe_psql('postgres', "CREATE PUBLICATION tap_pub_A FOR TABLE tab"); +$node_B->safe_psql( + 'postgres', " + CREATE SUBSCRIPTION $subname_BA + CONNECTION '$node_A_connstr application_name=$subname_BA' + PUBLICATION tap_pub_A + WITH (origin = none, retain_dead_tuples = true)"); + +# node_B (pub) -> node_A (sub) +my $node_B_connstr = $node_B->connstr . ' dbname=postgres'; +$node_B->safe_psql('postgres', "CREATE PUBLICATION tap_pub_B FOR TABLE tab"); +$node_A->safe_psql( + 'postgres', " + CREATE SUBSCRIPTION $subname_AB + CONNECTION '$node_B_connstr application_name=$subname_AB' + PUBLICATION tap_pub_B + WITH (origin = none, copy_data = off)"); + +# Wait for initial table sync to finish +$node_A->wait_for_subscription_sync($node_B, $subname_AB); +$node_B->wait_for_subscription_sync($node_A, $subname_BA); + +is(1, 1, 'Bidirectional replication setup is complete'); + +# Confirm that the conflict detection slot is created on Node B and the xmin +# value is valid. +ok( $node_B->poll_query_until( + 'postgres', + "SELECT xmin IS NOT NULL from pg_replication_slots WHERE slot_name = 'pg_conflict_detection'" + ), + "the xmin value of slot 'pg_conflict_detection' is valid on Node B"); + +################################################## +# Check that the retain_dead_tuples option can be enabled only for disabled +# subscriptions. Validate the NOTICE message during the subscription DDL, and +# ensure the conflict detection slot is created upon enabling the +# retain_dead_tuples option. +################################################## + +# Alter retain_dead_tuples for enabled subscription +my ($cmdret, $stdout, $stderr) = $node_A->psql('postgres', + "ALTER SUBSCRIPTION $subname_AB SET (retain_dead_tuples = true)"); +ok( $stderr =~ + /ERROR: cannot set option \"retain_dead_tuples\" for enabled subscription/, + "altering retain_dead_tuples is not allowed for enabled subscription"); + +# Disable the subscription +$node_A->psql('postgres', "ALTER SUBSCRIPTION $subname_AB DISABLE;"); + +# Enable retain_dead_tuples for disabled subscription +($cmdret, $stdout, $stderr) = $node_A->psql('postgres', + "ALTER SUBSCRIPTION $subname_AB SET (retain_dead_tuples = true);"); +ok( $stderr =~ + /NOTICE: deleted rows to detect conflicts would not be removed until the subscription is enabled/, + "altering retain_dead_tuples is allowed for disabled subscription"); + +# Re-enable the subscription +$node_A->safe_psql('postgres', "ALTER SUBSCRIPTION $subname_AB ENABLE;"); + +# Confirm that the conflict detection slot is created on Node A and the xmin +# value is valid. +ok( $node_A->poll_query_until( + 'postgres', + "SELECT xmin IS NOT NULL from pg_replication_slots WHERE slot_name = 'pg_conflict_detection'" + ), + "the xmin value of slot 'pg_conflict_detection' is valid on Node A"); + +################################################## +# Check the WARNING when changing the origin to ANY, if retain_dead_tuples is +# enabled. This warns of the possibility of receiving changes from origins +# other than the publisher. +################################################## + +($cmdret, $stdout, $stderr) = $node_A->psql('postgres', + "ALTER SUBSCRIPTION $subname_AB SET (origin = any);"); +ok( $stderr =~ + /WARNING: subscription "tap_sub_a_b" enabled retain_dead_tuples but might not reliably detect conflicts for changes from different origins/, + "warn of the possibility of receiving changes from origins other than the publisher"); + +# Reset the origin to none +$node_A->psql('postgres', + "ALTER SUBSCRIPTION $subname_AB SET (origin = none);"); + +############################################################################### +# Check that dead tuples on node A cannot be cleaned by VACUUM until the +# concurrent transactions on Node B have been applied and flushed on Node A. +############################################################################### + +# Insert a record +$node_A->safe_psql('postgres', "INSERT INTO tab VALUES (1, 1), (2, 2);"); +$node_A->wait_for_catchup($subname_BA); + +my $result = $node_B->safe_psql('postgres', "SELECT * FROM tab;"); +is($result, qq(1|1 +2|2), 'check replicated insert on node B'); + +# Disable the logical replication from node B to node A +$node_A->safe_psql('postgres', "ALTER SUBSCRIPTION $subname_AB DISABLE"); + +$node_B->safe_psql('postgres', "UPDATE tab SET b = 3 WHERE a = 1;"); +$node_A->safe_psql('postgres', "DELETE FROM tab WHERE a = 1;"); + +($cmdret, $stdout, $stderr) = $node_A->psql( + 'postgres', qq(VACUUM (verbose) public.tab;) +); + +ok( $stderr =~ + qr/1 are dead but not yet removable/, + 'the deleted column is non-removable'); + +$node_A->safe_psql( + 'postgres', "ALTER SUBSCRIPTION $subname_AB ENABLE;"); +$node_B->wait_for_catchup($subname_AB); + +# Remember the next transaction ID to be assigned +my $next_xid = $node_A->safe_psql('postgres', "SELECT txid_current() + 1;"); + +# Confirm that the xmin value is advanced to the latest nextXid. If no +# transactions are running, the apply worker selects nextXid as the candidate +# for the non-removable xid. See GetOldestActiveTransactionId(). +ok( $node_A->poll_query_until( + 'postgres', + "SELECT xmin = $next_xid from pg_replication_slots WHERE slot_name = 'pg_conflict_detection'" + ), + "the xmin value of slot 'pg_conflict_detection' is updated on Node A"); + +# Confirm that the dead tuple can be removed now +($cmdret, $stdout, $stderr) = $node_A->psql( + 'postgres', qq(VACUUM (verbose) public.tab;) +); + +ok( $stderr =~ + qr/1 removed, 1 remain, 0 are dead but not yet removable/, + 'the deleted column is removed'); + +############################################################################### +# Check that the replication slot pg_conflict_detection is dropped after +# removing all the subscriptions. +############################################################################### + +$node_B->safe_psql( + 'postgres', "DROP SUBSCRIPTION $subname_BA"); + +ok( $node_B->poll_query_until( + 'postgres', + "SELECT count(*) = 0 FROM pg_replication_slots WHERE slot_name = 'pg_conflict_detection'" + ), + "the slot 'pg_conflict_detection' has been dropped on Node B"); + +$node_A->safe_psql( + 'postgres', "DROP SUBSCRIPTION $subname_AB"); + +ok( $node_A->poll_query_until( + 'postgres', + "SELECT count(*) = 0 FROM pg_replication_slots WHERE slot_name = 'pg_conflict_detection'" + ), + "the slot 'pg_conflict_detection' has been dropped on Node A"); + done_testing(); diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index cd89746708889..a8656419cb608 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -2566,6 +2566,8 @@ RestrictInfo Result ResultRelInfo ResultState +RetainDeadTuplesData +RetainDeadTuplesPhase ReturnSetInfo ReturnStmt ReturningClause From 196063d6761d2c8d6f78cc03afad08efc95a0708 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Herrera?= Date: Wed, 23 Jul 2025 11:02:13 +0200 Subject: [PATCH 358/602] Move enum RecoveryTargetAction to xlogrecovery.h Commit 70e81861fadd split out xlogrecovery.c/h and moved some enums related to recovery targets to xlogrecovery.h. However, it seems that the enum RecoveryTargetAction was inadvertently left out by that commit. This commit moves it to xlogrecovery.h for consistency. Author: Kyotaro Horiguchi Discussion: https://postgr.es/m/20240904.173013.1132986940678039655.horikyota.ntt@gmail.com --- src/include/access/xlog_internal.h | 10 ---------- src/include/access/xlogrecovery.h | 10 ++++++++++ 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/include/access/xlog_internal.h b/src/include/access/xlog_internal.h index 2cf8d55d706d1..cc06fc29ab2b2 100644 --- a/src/include/access/xlog_internal.h +++ b/src/include/access/xlog_internal.h @@ -316,16 +316,6 @@ typedef struct XLogRecData uint32 len; /* length of rmgr data to include */ } XLogRecData; -/* - * Recovery target action. - */ -typedef enum -{ - RECOVERY_TARGET_ACTION_PAUSE, - RECOVERY_TARGET_ACTION_PROMOTE, - RECOVERY_TARGET_ACTION_SHUTDOWN, -} RecoveryTargetAction; - struct LogicalDecodingContext; struct XLogRecordBuffer; diff --git a/src/include/access/xlogrecovery.h b/src/include/access/xlogrecovery.h index 91446303024ae..8e475e266d18e 100644 --- a/src/include/access/xlogrecovery.h +++ b/src/include/access/xlogrecovery.h @@ -40,6 +40,16 @@ typedef enum RECOVERY_TARGET_TIMELINE_NUMERIC, } RecoveryTargetTimeLineGoal; +/* + * Recovery target action. + */ +typedef enum +{ + RECOVERY_TARGET_ACTION_PAUSE, + RECOVERY_TARGET_ACTION_PROMOTE, + RECOVERY_TARGET_ACTION_SHUTDOWN, +} RecoveryTargetAction; + /* Recovery pause states */ typedef enum RecoveryPauseState { From 37c7a7eeb6d13aac8edd7af032562233b0769e34 Mon Sep 17 00:00:00 2001 From: Nathan Bossart Date: Wed, 23 Jul 2025 10:29:45 -0500 Subject: [PATCH 359/602] Use PqMsg_* macros in walsender.c Oversights in commits f4b54e1ed9, dc21234005, and 228c370868. Author: Dave Cramer Discussion: https://postgr.es/m/CADK3HH%2BowWVdnbmWH4NHG8%3D%2BkXA_wjsyEVLoY719iJnb%3D%2BtT6A%40mail.gmail.com --- src/backend/replication/walsender.c | 33 +++++++++++++++-------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c index 4c72a0d43b32b..ee911394a23c6 100644 --- a/src/backend/replication/walsender.c +++ b/src/backend/replication/walsender.c @@ -65,6 +65,7 @@ #include "funcapi.h" #include "libpq/libpq.h" #include "libpq/pqformat.h" +#include "libpq/protocol.h" #include "miscadmin.h" #include "nodes/replnodes.h" #include "pgstat.h" @@ -735,13 +736,13 @@ HandleUploadManifestPacket(StringInfo buf, off_t *offset, switch (mtype) { - case 'd': /* CopyData */ + case PqMsg_CopyData: maxmsglen = PQ_LARGE_MESSAGE_LIMIT; break; - case 'c': /* CopyDone */ - case 'f': /* CopyFail */ - case 'H': /* Flush */ - case 'S': /* Sync */ + case PqMsg_CopyDone: + case PqMsg_CopyFail: + case PqMsg_Flush: + case PqMsg_Sync: maxmsglen = PQ_SMALL_MESSAGE_LIMIT; break; default: @@ -763,19 +764,19 @@ HandleUploadManifestPacket(StringInfo buf, off_t *offset, /* Process the message */ switch (mtype) { - case 'd': /* CopyData */ + case PqMsg_CopyData: AppendIncrementalManifestData(ib, buf->data, buf->len); return true; - case 'c': /* CopyDone */ + case PqMsg_CopyDone: return false; - case 'H': /* Sync */ - case 'S': /* Flush */ + case PqMsg_Sync: + case PqMsg_Flush: /* Ignore these while in CopyOut mode as we do elsewhere. */ return true; - case 'f': + case PqMsg_CopyFail: ereport(ERROR, (errcode(ERRCODE_QUERY_CANCELED), errmsg("COPY from stdin failed: %s", @@ -1569,7 +1570,7 @@ WalSndWriteData(LogicalDecodingContext *ctx, XLogRecPtr lsn, TransactionId xid, tmpbuf.data, sizeof(int64)); /* output previously gathered data in a CopyData packet */ - pq_putmessage_noblock('d', ctx->out->data, ctx->out->len); + pq_putmessage_noblock(PqMsg_CopyData, ctx->out->data, ctx->out->len); CHECK_FOR_INTERRUPTS(); @@ -2305,7 +2306,7 @@ ProcessRepliesIfAny(void) case PqMsg_CopyDone: if (!streamingDoneSending) { - pq_putmessage_noblock('c', NULL, 0); + pq_putmessage_noblock(PqMsg_CopyDone, NULL, 0); streamingDoneSending = true; } @@ -2758,7 +2759,7 @@ ProcessStandbyPSRequestMessage(void) pq_sendint64(&output_message, GetCurrentTimestamp()); /* ... and send it wrapped in CopyData */ - pq_putmessage_noblock('d', output_message.data, output_message.len); + pq_putmessage_noblock(PqMsg_CopyData, output_message.data, output_message.len); } /* @@ -3306,7 +3307,7 @@ XLogSendPhysical(void) wal_segment_close(xlogreader); /* Send CopyDone */ - pq_putmessage_noblock('c', NULL, 0); + pq_putmessage_noblock(PqMsg_CopyDone, NULL, 0); streamingDoneSending = true; WalSndCaughtUp = true; @@ -3434,7 +3435,7 @@ XLogSendPhysical(void) memcpy(&output_message.data[1 + sizeof(int64) + sizeof(int64)], tmpbuf.data, sizeof(int64)); - pq_putmessage_noblock('d', output_message.data, output_message.len); + pq_putmessage_noblock(PqMsg_CopyData, output_message.data, output_message.len); sentPtr = endptr; @@ -4140,7 +4141,7 @@ WalSndKeepalive(bool requestReply, XLogRecPtr writePtr) pq_sendbyte(&output_message, requestReply ? 1 : 0); /* ... and send it wrapped in CopyData */ - pq_putmessage_noblock('d', output_message.data, output_message.len); + pq_putmessage_noblock(PqMsg_CopyData, output_message.data, output_message.len); /* Set local flag */ if (requestReply) From 2047ad068139f0b8c6da73d0b845ca9ba30fb33d Mon Sep 17 00:00:00 2001 From: Nathan Bossart Date: Wed, 23 Jul 2025 12:06:20 -0500 Subject: [PATCH 360/602] Cross-check lists of built-in LWLock tranches. lwlock.c, lwlock.h, and wait_event_names.txt each contain a list of built-in LWLock tranches. It is easy to miss one or the other when adding or removing tranches, and discrepancies have adverse effects (e.g., breaking JOINs between pg_stat_activity and pg_wait_events). This commit moves the lists of built-in tranches in lwlock.{c,h} to lwlocklist.h and adds a cross-check to the script that generates lwlocknames.h. If the lists do not match exactly, building will fail. Author: Bertrand Drouvot Reviewed-by: Michael Paquier Discussion: https://postgr.es/m/aHpOgwuFQfcFMZ/B%40ip-10-97-1-34.eu-west-3.compute.internal --- .../storage/lmgr/generate-lwlocknames.pl | 110 +++++++++++++----- src/backend/storage/lmgr/lwlock.c | 48 +------- .../utils/activity/wait_event_names.txt | 10 +- src/include/storage/lwlock.h | 56 +++------ src/include/storage/lwlocklist.h | 57 ++++++++- 5 files changed, 162 insertions(+), 119 deletions(-) diff --git a/src/backend/storage/lmgr/generate-lwlocknames.pl b/src/backend/storage/lmgr/generate-lwlocknames.pl index c7a6720440db6..cd3e43c448aed 100644 --- a/src/backend/storage/lmgr/generate-lwlocknames.pl +++ b/src/backend/storage/lmgr/generate-lwlocknames.pl @@ -27,18 +27,24 @@ # -# First, record the predefined LWLocks listed in wait_event_names.txt. We'll -# cross-check those with the ones in lwlocklist.h. +# First, record the predefined LWLocks and built-in tranches listed in +# wait_event_names.txt. We'll cross-check those with the ones in lwlocklist.h. # +my @wait_event_tranches; my @wait_event_lwlocks; my $record_lwlocks = 0; +my $in_tranches = 0; while (<$wait_event_names>) { chomp; # Check for end marker. - last if /^# END OF PREDEFINED LWLOCKS/; + if (/^# END OF PREDEFINED LWLOCKS/) + { + $in_tranches = 1; + next; + } # Skip comments and empty lines. next if /^#/; @@ -54,13 +60,29 @@ # Go to the next line if we are not yet recording LWLocks. next if not $record_lwlocks; + # Stop recording if we reach another section. + last if /^Section:/; + # Record the LWLock. (my $waiteventname, my $waitevendocsentence) = split(/\t/, $_); - push(@wait_event_lwlocks, $waiteventname); + + if ($in_tranches) + { + push(@wait_event_tranches, $waiteventname); + } + else + { + push(@wait_event_lwlocks, $waiteventname); + } } +# +# While gathering the list of predefined LWLocks, cross-check the lists in +# lwlocklist.h with the wait events we just recorded. +# my $in_comment = 0; -my $i = 0; +my $lwlock_count = 0; +my $tranche_count = 0; while (<$lwlocklist>) { chomp; @@ -81,38 +103,72 @@ next; } - die "unable to parse lwlocklist.h line \"$_\"" - unless /^PG_LWLOCK\((\d+),\s+(\w+)\)$/; + # + # Gather list of predefined LWLocks and cross-check with the wait events. + # + if (/^PG_LWLOCK\((\d+),\s+(\w+)\)$/) + { + my ($lockidx, $lockname) = ($1, $2); - (my $lockidx, my $lockname) = ($1, $2); + die "lwlocklist.h not in order" if $lockidx < $lastlockidx; + die "lwlocklist.h has duplicates" if $lockidx == $lastlockidx; - die "lwlocklist.h not in order" if $lockidx < $lastlockidx; - die "lwlocklist.h has duplicates" if $lockidx == $lastlockidx; + die "$lockname defined in lwlocklist.h but missing from " + . "wait_event_names.txt" + if $lwlock_count >= scalar @wait_event_lwlocks; + die "lists of predefined LWLocks do not match (first mismatch at " + . "$wait_event_lwlocks[$lwlock_count] in wait_event_names.txt and " + . "$lockname in lwlocklist.h)" + if $wait_event_lwlocks[$lwlock_count] ne $lockname; - die "$lockname defined in lwlocklist.h but missing from " - . "wait_event_names.txt" - if $i >= scalar @wait_event_lwlocks; - die "lists of predefined LWLocks do not match (first mismatch at " - . "$wait_event_lwlocks[$i] in wait_event_names.txt and $lockname in " - . "lwlocklist.h)" - if $wait_event_lwlocks[$i] ne $lockname; - $i++; + $lwlock_count++; - while ($lastlockidx < $lockidx - 1) + while ($lastlockidx < $lockidx - 1) + { + ++$lastlockidx; + } + $lastlockidx = $lockidx; + + # Add a "Lock" suffix to each lock name, as the C code depends on that. + printf $h "#define %-32s (&MainLWLockArray[$lockidx].lock)\n", + $lockname . "Lock"; + + next; + } + + # + # Cross-check the built-in LWLock tranches with the wait events. + # + if (/^PG_LWLOCKTRANCHE\((\w+),\s+(\w+)\)$/) { - ++$lastlockidx; + my ($tranche_id, $tranche_name) = ($1, $2); + + die "$tranche_name defined in lwlocklist.h but missing from " + . "wait_event_names.txt" + if $tranche_count >= scalar @wait_event_tranches; + die + "lists of built-in LWLock tranches do not match (first mismatch at " + . "$wait_event_tranches[$tranche_count] in wait_event_names.txt and " + . "$tranche_name in lwlocklist.h)" + if $wait_event_tranches[$tranche_count] ne $tranche_name; + + $tranche_count++; + + next; } - $lastlockidx = $lockidx; - # Add a "Lock" suffix to each lock name, as the C code depends on that - printf $h "#define %-32s (&MainLWLockArray[$lockidx].lock)\n", - $lockname . "Lock"; + die "unable to parse lwlocklist.h line \"$_\""; } die - "$wait_event_lwlocks[$i] defined in wait_event_names.txt but missing from " - . "lwlocklist.h" - if $i < scalar @wait_event_lwlocks; + "$wait_event_lwlocks[$lwlock_count] defined in wait_event_names.txt but " + . " missing from lwlocklist.h" + if $lwlock_count < scalar @wait_event_lwlocks; + +die + "$wait_event_tranches[$tranche_count] defined in wait_event_names.txt but " + . "missing from lwlocklist.h" + if $tranche_count < scalar @wait_event_tranches; print $h "\n"; printf $h "#define NUM_INDIVIDUAL_LWLOCKS %s\n", $lastlockidx + 1; diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c index 2d43bf2cc1323..ec9c345ffdfb8 100644 --- a/src/backend/storage/lmgr/lwlock.c +++ b/src/backend/storage/lmgr/lwlock.c @@ -122,9 +122,8 @@ StaticAssertDecl((LW_VAL_EXCLUSIVE & LW_FLAG_MASK) == 0, * own tranche. We absorb the names of these tranches from there into * BuiltinTrancheNames here. * - * 2. There are some predefined tranches for built-in groups of locks. - * These are listed in enum BuiltinTrancheIds in lwlock.h, and their names - * appear in BuiltinTrancheNames[] below. + * 2. There are some predefined tranches for built-in groups of locks defined + * in lwlocklist.h. We absorb the names of these tranches, too. * * 3. Extensions can create new tranches, via either RequestNamedLWLockTranche * or LWLockRegisterTranche. The names of these that are known in the current @@ -135,49 +134,10 @@ StaticAssertDecl((LW_VAL_EXCLUSIVE & LW_FLAG_MASK) == 0, */ static const char *const BuiltinTrancheNames[] = { #define PG_LWLOCK(id, lockname) [id] = CppAsString(lockname), +#define PG_LWLOCKTRANCHE(id, lockname) [LWTRANCHE_##id] = CppAsString(lockname), #include "storage/lwlocklist.h" #undef PG_LWLOCK - [LWTRANCHE_XACT_BUFFER] = "XactBuffer", - [LWTRANCHE_COMMITTS_BUFFER] = "CommitTsBuffer", - [LWTRANCHE_SUBTRANS_BUFFER] = "SubtransBuffer", - [LWTRANCHE_MULTIXACTOFFSET_BUFFER] = "MultiXactOffsetBuffer", - [LWTRANCHE_MULTIXACTMEMBER_BUFFER] = "MultiXactMemberBuffer", - [LWTRANCHE_NOTIFY_BUFFER] = "NotifyBuffer", - [LWTRANCHE_SERIAL_BUFFER] = "SerialBuffer", - [LWTRANCHE_WAL_INSERT] = "WALInsert", - [LWTRANCHE_BUFFER_CONTENT] = "BufferContent", - [LWTRANCHE_REPLICATION_ORIGIN_STATE] = "ReplicationOriginState", - [LWTRANCHE_REPLICATION_SLOT_IO] = "ReplicationSlotIO", - [LWTRANCHE_LOCK_FASTPATH] = "LockFastPath", - [LWTRANCHE_BUFFER_MAPPING] = "BufferMapping", - [LWTRANCHE_LOCK_MANAGER] = "LockManager", - [LWTRANCHE_PREDICATE_LOCK_MANAGER] = "PredicateLockManager", - [LWTRANCHE_PARALLEL_HASH_JOIN] = "ParallelHashJoin", - [LWTRANCHE_PARALLEL_BTREE_SCAN] = "ParallelBtreeScan", - [LWTRANCHE_PARALLEL_QUERY_DSA] = "ParallelQueryDSA", - [LWTRANCHE_PER_SESSION_DSA] = "PerSessionDSA", - [LWTRANCHE_PER_SESSION_RECORD_TYPE] = "PerSessionRecordType", - [LWTRANCHE_PER_SESSION_RECORD_TYPMOD] = "PerSessionRecordTypmod", - [LWTRANCHE_SHARED_TUPLESTORE] = "SharedTupleStore", - [LWTRANCHE_SHARED_TIDBITMAP] = "SharedTidBitmap", - [LWTRANCHE_PARALLEL_APPEND] = "ParallelAppend", - [LWTRANCHE_PER_XACT_PREDICATE_LIST] = "PerXactPredicateList", - [LWTRANCHE_PGSTATS_DSA] = "PgStatsDSA", - [LWTRANCHE_PGSTATS_HASH] = "PgStatsHash", - [LWTRANCHE_PGSTATS_DATA] = "PgStatsData", - [LWTRANCHE_LAUNCHER_DSA] = "LogicalRepLauncherDSA", - [LWTRANCHE_LAUNCHER_HASH] = "LogicalRepLauncherHash", - [LWTRANCHE_DSM_REGISTRY_DSA] = "DSMRegistryDSA", - [LWTRANCHE_DSM_REGISTRY_HASH] = "DSMRegistryHash", - [LWTRANCHE_COMMITTS_SLRU] = "CommitTsSLRU", - [LWTRANCHE_MULTIXACTOFFSET_SLRU] = "MultiXactOffsetSLRU", - [LWTRANCHE_MULTIXACTMEMBER_SLRU] = "MultiXactMemberSLRU", - [LWTRANCHE_NOTIFY_SLRU] = "NotifySLRU", - [LWTRANCHE_SERIAL_SLRU] = "SerialSLRU", - [LWTRANCHE_SUBTRANS_SLRU] = "SubtransSLRU", - [LWTRANCHE_XACT_SLRU] = "XactSLRU", - [LWTRANCHE_PARALLEL_VACUUM_DSA] = "ParallelVacuumDSA", - [LWTRANCHE_AIO_URING_COMPLETION] = "AioUringCompletion", +#undef PG_LWLOCKTRANCHE }; StaticAssertDecl(lengthof(BuiltinTrancheNames) == diff --git a/src/backend/utils/activity/wait_event_names.txt b/src/backend/utils/activity/wait_event_names.txt index 4da68312b5f97..0be307d2ca04b 100644 --- a/src/backend/utils/activity/wait_event_names.txt +++ b/src/backend/utils/activity/wait_event_names.txt @@ -356,9 +356,13 @@ AioWorkerSubmissionQueue "Waiting to access AIO worker submission queue." # # END OF PREDEFINED LWLOCKS (DO NOT CHANGE THIS LINE) # -# Predefined LWLocks (i.e., those declared in lwlocknames.h) must be listed -# in the section above and must be listed in the same order as in -# lwlocknames.h. Other LWLocks must be listed in the section below. +# Predefined LWLocks (i.e., those declared at the top of lwlocknames.h) must be +# listed in the section above and must be listed in the same order as in +# lwlocknames.h. +# +# Likewise, the built-in LWLock tranches (i.e., those declared at the bottom of +# lwlocknames.h) must be listed in the section below and must be listed in the +# same order as in lwlocknames.h. # XactBuffer "Waiting for I/O on a transaction status SLRU buffer." diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h index 08a72569ae5fd..5e717765764f4 100644 --- a/src/include/storage/lwlock.h +++ b/src/include/storage/lwlock.h @@ -176,51 +176,23 @@ extern void LWLockInitialize(LWLock *lock, int tranche_id); * Every tranche ID less than NUM_INDIVIDUAL_LWLOCKS is reserved; also, * we reserve additional tranche IDs for builtin tranches not included in * the set of individual LWLocks. A call to LWLockNewTrancheId will never - * return a value less than LWTRANCHE_FIRST_USER_DEFINED. + * return a value less than LWTRANCHE_FIRST_USER_DEFINED. The actual list of + * built-in tranches is kept in lwlocklist.h. */ typedef enum BuiltinTrancheIds { - LWTRANCHE_XACT_BUFFER = NUM_INDIVIDUAL_LWLOCKS, - LWTRANCHE_COMMITTS_BUFFER, - LWTRANCHE_SUBTRANS_BUFFER, - LWTRANCHE_MULTIXACTOFFSET_BUFFER, - LWTRANCHE_MULTIXACTMEMBER_BUFFER, - LWTRANCHE_NOTIFY_BUFFER, - LWTRANCHE_SERIAL_BUFFER, - LWTRANCHE_WAL_INSERT, - LWTRANCHE_BUFFER_CONTENT, - LWTRANCHE_REPLICATION_ORIGIN_STATE, - LWTRANCHE_REPLICATION_SLOT_IO, - LWTRANCHE_LOCK_FASTPATH, - LWTRANCHE_BUFFER_MAPPING, - LWTRANCHE_LOCK_MANAGER, - LWTRANCHE_PREDICATE_LOCK_MANAGER, - LWTRANCHE_PARALLEL_HASH_JOIN, - LWTRANCHE_PARALLEL_BTREE_SCAN, - LWTRANCHE_PARALLEL_QUERY_DSA, - LWTRANCHE_PER_SESSION_DSA, - LWTRANCHE_PER_SESSION_RECORD_TYPE, - LWTRANCHE_PER_SESSION_RECORD_TYPMOD, - LWTRANCHE_SHARED_TUPLESTORE, - LWTRANCHE_SHARED_TIDBITMAP, - LWTRANCHE_PARALLEL_APPEND, - LWTRANCHE_PER_XACT_PREDICATE_LIST, - LWTRANCHE_PGSTATS_DSA, - LWTRANCHE_PGSTATS_HASH, - LWTRANCHE_PGSTATS_DATA, - LWTRANCHE_LAUNCHER_DSA, - LWTRANCHE_LAUNCHER_HASH, - LWTRANCHE_DSM_REGISTRY_DSA, - LWTRANCHE_DSM_REGISTRY_HASH, - LWTRANCHE_COMMITTS_SLRU, - LWTRANCHE_MULTIXACTMEMBER_SLRU, - LWTRANCHE_MULTIXACTOFFSET_SLRU, - LWTRANCHE_NOTIFY_SLRU, - LWTRANCHE_SERIAL_SLRU, - LWTRANCHE_SUBTRANS_SLRU, - LWTRANCHE_XACT_SLRU, - LWTRANCHE_PARALLEL_VACUUM_DSA, - LWTRANCHE_AIO_URING_COMPLETION, + /* + * LWTRANCHE_INVALID is an unused value that only exists to initialize the + * rest of the tranches to appropriate values. + */ + LWTRANCHE_INVALID = NUM_INDIVIDUAL_LWLOCKS - 1, + +#define PG_LWLOCK(id, name) +#define PG_LWLOCKTRANCHE(id, name) LWTRANCHE_##id, +#include "storage/lwlocklist.h" +#undef PG_LWLOCK +#undef PG_LWLOCKTRANCHE + LWTRANCHE_FIRST_USER_DEFINED, } BuiltinTrancheIds; diff --git a/src/include/storage/lwlocklist.h b/src/include/storage/lwlocklist.h index a9681738146e1..208d2e3a8ed9e 100644 --- a/src/include/storage/lwlocklist.h +++ b/src/include/storage/lwlocklist.h @@ -2,9 +2,10 @@ * * lwlocklist.h * - * The predefined LWLock list is kept in its own source file for use by - * automatic tools. The exact representation of a keyword is determined by - * the PG_LWLOCK macro, which is not defined in this file; it can be + * The list of predefined LWLocks and built-in LWLock tranches is kept in + * its own source file for use by automatic tools. The exact + * representation of a keyword is determined by the PG_LWLOCK and + * PG_LWLOCKTRANCHE macros, which are not defined in this file; they can be * defined by the caller for special purposes. * * Also, generate-lwlocknames.pl processes this file to create lwlocknames.h. @@ -84,3 +85,53 @@ PG_LWLOCK(50, DSMRegistry) PG_LWLOCK(51, InjectionPoint) PG_LWLOCK(52, SerialControl) PG_LWLOCK(53, AioWorkerSubmissionQueue) + +/* + * There also exist several built-in LWLock tranches. As with the predefined + * LWLocks, be sure to update the WaitEventLWLock section of + * src/backend/utils/activity/wait_event_names.txt when modifying this list. + * + * Note that the IDs here (the first value) don't include the LWTRANCHE_ + * prefix. It's added elsewhere. + */ +PG_LWLOCKTRANCHE(XACT_BUFFER, XactBuffer) +PG_LWLOCKTRANCHE(COMMITTS_BUFFER, CommitTsBuffer) +PG_LWLOCKTRANCHE(SUBTRANS_BUFFER, SubtransBuffer) +PG_LWLOCKTRANCHE(MULTIXACTOFFSET_BUFFER, MultiXactOffsetBuffer) +PG_LWLOCKTRANCHE(MULTIXACTMEMBER_BUFFER, MultiXactMemberBuffer) +PG_LWLOCKTRANCHE(NOTIFY_BUFFER, NotifyBuffer) +PG_LWLOCKTRANCHE(SERIAL_BUFFER, SerialBuffer) +PG_LWLOCKTRANCHE(WAL_INSERT, WALInsert) +PG_LWLOCKTRANCHE(BUFFER_CONTENT, BufferContent) +PG_LWLOCKTRANCHE(REPLICATION_ORIGIN_STATE, ReplicationOriginState) +PG_LWLOCKTRANCHE(REPLICATION_SLOT_IO, ReplicationSlotIO) +PG_LWLOCKTRANCHE(LOCK_FASTPATH, LockFastPath) +PG_LWLOCKTRANCHE(BUFFER_MAPPING, BufferMapping) +PG_LWLOCKTRANCHE(LOCK_MANAGER, LockManager) +PG_LWLOCKTRANCHE(PREDICATE_LOCK_MANAGER, PredicateLockManager) +PG_LWLOCKTRANCHE(PARALLEL_HASH_JOIN, ParallelHashJoin) +PG_LWLOCKTRANCHE(PARALLEL_BTREE_SCAN, ParallelBtreeScan) +PG_LWLOCKTRANCHE(PARALLEL_QUERY_DSA, ParallelQueryDSA) +PG_LWLOCKTRANCHE(PER_SESSION_DSA, PerSessionDSA) +PG_LWLOCKTRANCHE(PER_SESSION_RECORD_TYPE, PerSessionRecordType) +PG_LWLOCKTRANCHE(PER_SESSION_RECORD_TYPMOD, PerSessionRecordTypmod) +PG_LWLOCKTRANCHE(SHARED_TUPLESTORE, SharedTupleStore) +PG_LWLOCKTRANCHE(SHARED_TIDBITMAP, SharedTidBitmap) +PG_LWLOCKTRANCHE(PARALLEL_APPEND, ParallelAppend) +PG_LWLOCKTRANCHE(PER_XACT_PREDICATE_LIST, PerXactPredicateList) +PG_LWLOCKTRANCHE(PGSTATS_DSA, PgStatsDSA) +PG_LWLOCKTRANCHE(PGSTATS_HASH, PgStatsHash) +PG_LWLOCKTRANCHE(PGSTATS_DATA, PgStatsData) +PG_LWLOCKTRANCHE(LAUNCHER_DSA, LogicalRepLauncherDSA) +PG_LWLOCKTRANCHE(LAUNCHER_HASH, LogicalRepLauncherHash) +PG_LWLOCKTRANCHE(DSM_REGISTRY_DSA, DSMRegistryDSA) +PG_LWLOCKTRANCHE(DSM_REGISTRY_HASH, DSMRegistryHash) +PG_LWLOCKTRANCHE(COMMITTS_SLRU, CommitTsSLRU) +PG_LWLOCKTRANCHE(MULTIXACTOFFSET_SLRU, MultiXactOffsetSLRU) +PG_LWLOCKTRANCHE(MULTIXACTMEMBER_SLRU, MultiXactMemberSLRU) +PG_LWLOCKTRANCHE(NOTIFY_SLRU, NotifySLRU) +PG_LWLOCKTRANCHE(SERIAL_SLRU, SerialSLRU) +PG_LWLOCKTRANCHE(SUBTRANS_SLRU, SubtransSLRU) +PG_LWLOCKTRANCHE(XACT_SLRU, XactSLRU) +PG_LWLOCKTRANCHE(PARALLEL_VACUUM_DSA, ParallelVacuumDSA) +PG_LWLOCKTRANCHE(AIO_URING_COMPLETION, AioUringCompletion) From e6dfd068ed453b8690551dac700d57fbf32ba187 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Wed, 23 Jul 2025 15:44:29 -0400 Subject: [PATCH 361/602] Fix build breakage on Solaris-alikes with late-model GCC. Solaris has never bothered to add "const" to the second argument of PAM conversation procs, as all other Unixen did decades ago. This resulted in an "incompatible pointer" compiler warning when building --with-pam, but had no more serious effect than that, so we never did anything about it. However, as of GCC 14 the case is an error not warning by default. To complicate matters, recent OpenIndiana (and maybe illumos in general?) *does* supply the "const" by default, so we can't just assume that platforms using our solaris template need help. What we can do, short of building a configure-time probe, is to make solaris.h #define _PAM_LEGACY_NONCONST, which causes OpenIndiana's pam_appl.h to revert to the traditional definition, and hopefully will have no effect anywhere else. Then we can use that same symbol to control whether we include "const" in the declaration of pam_passwd_conv_proc(). Bug: #18995 Reported-by: Andrew Watkins Author: Tom Lane Discussion: https://postgr.es/m/18995-82058da9ab4337a7@postgresql.org Backpatch-through: 13 --- src/backend/libpq/auth.c | 12 ++++++++++-- src/include/port/solaris.h | 9 +++++++++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/src/backend/libpq/auth.c b/src/backend/libpq/auth.c index 9f4d05ffbd453..4da46666439db 100644 --- a/src/backend/libpq/auth.c +++ b/src/backend/libpq/auth.c @@ -94,8 +94,16 @@ static int auth_peer(hbaPort *port); #define PGSQL_PAM_SERVICE "postgresql" /* Service name passed to PAM */ +/* Work around original Solaris' lack of "const" in the conv_proc signature */ +#ifdef _PAM_LEGACY_NONCONST +#define PG_PAM_CONST +#else +#define PG_PAM_CONST const +#endif + static int CheckPAMAuth(Port *port, const char *user, const char *password); -static int pam_passwd_conv_proc(int num_msg, const struct pam_message **msg, +static int pam_passwd_conv_proc(int num_msg, + PG_PAM_CONST struct pam_message **msg, struct pam_response **resp, void *appdata_ptr); static struct pam_conv pam_passw_conv = { @@ -1917,7 +1925,7 @@ auth_peer(hbaPort *port) */ static int -pam_passwd_conv_proc(int num_msg, const struct pam_message **msg, +pam_passwd_conv_proc(int num_msg, PG_PAM_CONST struct pam_message **msg, struct pam_response **resp, void *appdata_ptr) { const char *passwd; diff --git a/src/include/port/solaris.h b/src/include/port/solaris.h index e63a3bd824d6d..8ff40007c7f6a 100644 --- a/src/include/port/solaris.h +++ b/src/include/port/solaris.h @@ -24,3 +24,12 @@ #if defined(__i386__) #include #endif + +/* + * On original Solaris, PAM conversation procs lack a "const" in their + * declaration; but recent OpenIndiana versions put it there by default. + * The least messy way to deal with this is to define _PAM_LEGACY_NONCONST, + * which causes OpenIndiana to declare pam_conv per the Solaris tradition, + * and also use that symbol to control omitting the "const" in our own code. + */ +#define _PAM_LEGACY_NONCONST 1 From 086b9a33aafeeeaa0af0c806410ea6228f2f63f4 Mon Sep 17 00:00:00 2001 From: Fujii Masao Date: Thu, 24 Jul 2025 11:43:20 +0900 Subject: [PATCH 362/602] doc: Add missing index entries and fix title formatting in pg_buffercache docs. This commit adds missing index entries for the functions pg_buffercache_numa() and pg_buffercache_usage_counts() in the pg_buffercache documentation. It also makes the function titles consistent by adding parentheses after function names where they were previously missing. Author: Fujii Masao Reviewed-by: Michael Paquier Discussion: https://postgr.es/m/7d19af4b-7da3-4862-9f52-ff958960bd8d@oss.nttdata.com Backpatch-through: 18 --- doc/src/sgml/pgbuffercache.sgml | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/doc/src/sgml/pgbuffercache.sgml b/doc/src/sgml/pgbuffercache.sgml index 546ace8369e28..eeb85a0e04908 100644 --- a/doc/src/sgml/pgbuffercache.sgml +++ b/doc/src/sgml/pgbuffercache.sgml @@ -19,10 +19,18 @@ pg_buffercache_pages + + pg_buffercache_numa + + pg_buffercache_summary + + pg_buffercache_usage_counts + + pg_buffercache_evict @@ -489,7 +497,7 @@ - The <structname>pg_buffercache_evict_relation</structname> Function + The <structname>pg_buffercache_evict_relation()</structname> Function The pg_buffercache_evict_relation() function is very similar to the pg_buffercache_evict() function. The @@ -507,7 +515,7 @@ - The <structname>pg_buffercache_evict_all</structname> Function + The <structname>pg_buffercache_evict_all()</structname> Function The pg_buffercache_evict_all() function is very similar to the pg_buffercache_evict() function. The From df335618ed87eecdef44a95e453e345a55a14ad8 Mon Sep 17 00:00:00 2001 From: Amit Kapila Date: Thu, 24 Jul 2025 03:51:55 +0000 Subject: [PATCH 363/602] Fix cfbot failure caused by commit 228c370868. Ensure the test waits for the apply worker to exit after disabling the subscription. This is necessary to safely enable the retain_dead_tuples option. Also added a similar wait in another part of the test to prevent unintended apply worker activity that could lead to test failures post-subscription disable. Reported by Michael Paquier as per cfbot. Author: Zhijie Hou Discussion: https://postgr.es/m/aIGLgfRJIBwExoPj@paquier.xyz --- src/test/subscription/t/035_conflicts.pl | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/test/subscription/t/035_conflicts.pl b/src/test/subscription/t/035_conflicts.pl index 7458d7fba7e9a..976d53a870e5e 100644 --- a/src/test/subscription/t/035_conflicts.pl +++ b/src/test/subscription/t/035_conflicts.pl @@ -228,6 +228,11 @@ # Disable the subscription $node_A->psql('postgres', "ALTER SUBSCRIPTION $subname_AB DISABLE;"); +# Wait for the apply worker to stop +$node_A->poll_query_until('postgres', + "SELECT count(*) = 0 FROM pg_stat_activity WHERE backend_type = 'logical replication apply worker'" +); + # Enable retain_dead_tuples for disabled subscription ($cmdret, $stdout, $stderr) = $node_A->psql('postgres', "ALTER SUBSCRIPTION $subname_AB SET (retain_dead_tuples = true);"); @@ -278,6 +283,11 @@ # Disable the logical replication from node B to node A $node_A->safe_psql('postgres', "ALTER SUBSCRIPTION $subname_AB DISABLE"); +# Wait for the apply worker to stop +$node_A->poll_query_until('postgres', + "SELECT count(*) = 0 FROM pg_stat_activity WHERE backend_type = 'logical replication apply worker'" +); + $node_B->safe_psql('postgres', "UPDATE tab SET b = 3 WHERE a = 1;"); $node_A->safe_psql('postgres', "DELETE FROM tab WHERE a = 1;"); From 719dcf3c42260ceebfa2e8f6171a61161737a265 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Thu, 24 Jul 2025 15:41:18 +0900 Subject: [PATCH 364/602] Introduce field tracking cached plan type in PlannedStmt PlannedStmt gains a new field, called CachedPlanType, able to track if a given plan tree originates from the cache and if we are dealing with a generic or custom cached plan. This field can be used for monitoring or statistical purposes, in the executor hooks, for example, based on the planned statement attached to a QueryDesc. A patch is under discussion for pg_stat_statements to provide an equivalent of the counters in pg_prepared_statements for custom and generic plans, to provide a more global view of such data, as this data is now restricted to the current session. The concept introduced in this commit is useful on its own, and has been extracted from a larger patch by the same author. Author: Sami Imseih Reviewed-by: Andrei Lepikhov Reviewed-by: Michael Paquier Discussion: https://postgr.es/m/CAA5RZ0uFw8Y9GCFvafhC=OA8NnMqVZyzXPfv_EePOt+iv1T-qQ@mail.gmail.com --- src/backend/commands/foreigncmds.c | 1 + src/backend/commands/schemacmds.c | 1 + src/backend/executor/execParallel.c | 1 + src/backend/optimizer/plan/planner.c | 1 + src/backend/tcop/postgres.c | 1 + src/backend/tcop/utility.c | 2 ++ src/backend/utils/cache/plancache.c | 8 ++++++++ src/include/nodes/plannodes.h | 17 +++++++++++++++++ src/tools/pgindent/typedefs.list | 1 + 9 files changed, 33 insertions(+) diff --git a/src/backend/commands/foreigncmds.c b/src/backend/commands/foreigncmds.c index 8d2d743154462..fcd5fcd8915e3 100644 --- a/src/backend/commands/foreigncmds.c +++ b/src/backend/commands/foreigncmds.c @@ -1588,6 +1588,7 @@ ImportForeignSchema(ImportForeignSchemaStmt *stmt) pstmt->utilityStmt = (Node *) cstmt; pstmt->stmt_location = rs->stmt_location; pstmt->stmt_len = rs->stmt_len; + pstmt->cached_plan_type = PLAN_CACHE_NONE; /* Execute statement */ ProcessUtility(pstmt, cmd, false, diff --git a/src/backend/commands/schemacmds.c b/src/backend/commands/schemacmds.c index 546160f09410e..c00f1a11384f1 100644 --- a/src/backend/commands/schemacmds.c +++ b/src/backend/commands/schemacmds.c @@ -215,6 +215,7 @@ CreateSchemaCommand(CreateSchemaStmt *stmt, const char *queryString, wrapper->utilityStmt = stmt; wrapper->stmt_location = stmt_location; wrapper->stmt_len = stmt_len; + wrapper->cached_plan_type = PLAN_CACHE_NONE; /* do this step */ ProcessUtility(wrapper, diff --git a/src/backend/executor/execParallel.c b/src/backend/executor/execParallel.c index f3e77bda27906..fc76f22fb8238 100644 --- a/src/backend/executor/execParallel.c +++ b/src/backend/executor/execParallel.c @@ -189,6 +189,7 @@ ExecSerializePlan(Plan *plan, EState *estate) pstmt->permInfos = estate->es_rteperminfos; pstmt->resultRelations = NIL; pstmt->appendRelations = NIL; + pstmt->cached_plan_type = PLAN_CACHE_NONE; /* * Transfer only parallel-safe subplans, leaving a NULL "hole" in the list diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index c989e72cac5cf..a77b2147e9592 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -582,6 +582,7 @@ standard_planner(Query *parse, const char *query_string, int cursorOptions, result->utilityStmt = parse->utilityStmt; result->stmt_location = parse->stmt_location; result->stmt_len = parse->stmt_len; + result->cached_plan_type = PLAN_CACHE_NONE; result->jitFlags = PGJIT_NONE; if (jit_enabled && jit_above_cost >= 0 && diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index 2f8c3d5f91822..a297606cdd7fa 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -988,6 +988,7 @@ pg_plan_queries(List *querytrees, const char *query_string, int cursorOptions, stmt->stmt_location = query->stmt_location; stmt->stmt_len = query->stmt_len; stmt->queryId = query->queryId; + stmt->cached_plan_type = PLAN_CACHE_NONE; } else { diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c index 4c1faf5575c4d..babc34d0cbe1d 100644 --- a/src/backend/tcop/utility.c +++ b/src/backend/tcop/utility.c @@ -1234,6 +1234,7 @@ ProcessUtilitySlow(ParseState *pstate, wrapper->utilityStmt = stmt; wrapper->stmt_location = pstmt->stmt_location; wrapper->stmt_len = pstmt->stmt_len; + wrapper->cached_plan_type = PLAN_CACHE_NONE; ProcessUtility(wrapper, queryString, @@ -1964,6 +1965,7 @@ ProcessUtilityForAlterTable(Node *stmt, AlterTableUtilityContext *context) wrapper->utilityStmt = stmt; wrapper->stmt_location = context->pstmt->stmt_location; wrapper->stmt_len = context->pstmt->stmt_len; + wrapper->cached_plan_type = PLAN_CACHE_NONE; ProcessUtility(wrapper, context->queryString, diff --git a/src/backend/utils/cache/plancache.c b/src/backend/utils/cache/plancache.c index 89a1c79e984d1..f4d2b9458a5ea 100644 --- a/src/backend/utils/cache/plancache.c +++ b/src/backend/utils/cache/plancache.c @@ -1283,6 +1283,7 @@ GetCachedPlan(CachedPlanSource *plansource, ParamListInfo boundParams, CachedPlan *plan = NULL; List *qlist; bool customplan; + ListCell *lc; /* Assert caller is doing things in a sane order */ Assert(plansource->magic == CACHEDPLANSOURCE_MAGIC); @@ -1385,6 +1386,13 @@ GetCachedPlan(CachedPlanSource *plansource, ParamListInfo boundParams, plan->is_saved = true; } + foreach(lc, plan->stmt_list) + { + PlannedStmt *pstmt = (PlannedStmt *) lfirst(lc); + + pstmt->cached_plan_type = customplan ? PLAN_CACHE_CUSTOM : PLAN_CACHE_GENERIC; + } + return plan; } diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index 4f59e30d62d5e..46e2e09ea35be 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -28,6 +28,20 @@ * ---------------------------------------------------------------- */ +/* ---------------- + * CachedPlanType + * + * CachedPlanType identifies whether a PlannedStmt is a cached plan, and if + * so, whether it is generic or custom. + * ---------------- + */ +typedef enum CachedPlanType +{ + PLAN_CACHE_NONE = 0, /* Not a cached plan */ + PLAN_CACHE_GENERIC, /* Generic cached plan */ + PLAN_CACHE_CUSTOM, /* Custom cached plan */ +} CachedPlanType; + /* ---------------- * PlannedStmt node * @@ -58,6 +72,9 @@ typedef struct PlannedStmt /* plan identifier (can be set by plugins) */ int64 planId; + /* type of cached plan */ + CachedPlanType cached_plan_type; + /* is it insert|update|delete|merge RETURNING? */ bool hasReturning; diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index a8656419cb608..4353befab9934 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -391,6 +391,7 @@ CachedFunctionHashEntry CachedFunctionHashKey CachedPlan CachedPlanSource +CachedPlanType CallContext CallStmt CancelRequestPacket From e1c3654839e464957675344a1e949489d98b103b Mon Sep 17 00:00:00 2001 From: Amit Kapila Date: Thu, 24 Jul 2025 09:05:32 +0000 Subject: [PATCH 365/602] Fix duplicate transaction replay during pg_createsubscriber. Previously, the tool could replay the same transaction twice, once during recovery, then again during replication after the subscriber was set up. This occurred because the same recovery_target_lsn was used both to finalize recovery and to start replication. If recovery_target_inclusive = true, the transaction at that LSN would be applied during recovery and then sent again by the publisher leading to duplication. To prevent this, we now set recovery_target_inclusive = false. This ensures the transaction at recovery_target_lsn is not reapplied during recovery, avoiding duplication when replication begins. Bug #18897 Reported-by: Zane Duffield Author: Shlok Kyal Reviewed-by: vignesh C Reviewed-by: Amit Kapila Backpatch-through: 17, where it was introduced Discussion: https://postgr.es/m/18897-d3db67535860dddb@postgresql.org --- src/bin/pg_basebackup/pg_createsubscriber.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/bin/pg_basebackup/pg_createsubscriber.c b/src/bin/pg_basebackup/pg_createsubscriber.c index 025b893a41e83..3986882f04292 100644 --- a/src/bin/pg_basebackup/pg_createsubscriber.c +++ b/src/bin/pg_basebackup/pg_createsubscriber.c @@ -1250,8 +1250,17 @@ setup_recovery(const struct LogicalRepInfo *dbinfo, const char *datadir, const c appendPQExpBufferStr(recoveryconfcontents, "recovery_target = ''\n"); appendPQExpBufferStr(recoveryconfcontents, "recovery_target_timeline = 'latest'\n"); + + /* + * Set recovery_target_inclusive = false to avoid reapplying the + * transaction committed at 'lsn' after subscription is enabled. This is + * because the provided 'lsn' is also used as the replication start point + * for the subscription. So, the server can send the transaction committed + * at that 'lsn' after replication is started which can lead to applying + * the same transaction twice if we keep recovery_target_inclusive = true. + */ appendPQExpBufferStr(recoveryconfcontents, - "recovery_target_inclusive = true\n"); + "recovery_target_inclusive = false\n"); appendPQExpBufferStr(recoveryconfcontents, "recovery_target_action = promote\n"); appendPQExpBufferStr(recoveryconfcontents, "recovery_target_name = ''\n"); From 15d33eb1924c1093102b8ce142ede4cb3912e85e Mon Sep 17 00:00:00 2001 From: Nathan Bossart Date: Thu, 24 Jul 2025 10:13:45 -0500 Subject: [PATCH 366/602] Fix return value of visibilitymap_get_status(). This function is declared as returning a uint8, but it returns a bool in one code path. To fix, return (uint8) 0 instead of false there. This should behave exactly the same as before, but it might prevent future compiler complaints. Oversight in commit a892234f83. Author: Julien Rouhaud Discussion: https://postgr.es/m/aIHluT2isN58jqHV%40jrouhaud --- src/backend/access/heap/visibilitymap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/backend/access/heap/visibilitymap.c b/src/backend/access/heap/visibilitymap.c index 745a04ef26e29..8f918e00af7ed 100644 --- a/src/backend/access/heap/visibilitymap.c +++ b/src/backend/access/heap/visibilitymap.c @@ -364,7 +364,7 @@ visibilitymap_get_status(Relation rel, BlockNumber heapBlk, Buffer *vmbuf) { *vmbuf = vm_readbuf(rel, mapBlock, false); if (!BufferIsValid(*vmbuf)) - return false; + return (uint8) 0; } map = PageGetContents(BufferGetPage(*vmbuf)); From ac000fca743eff923d1feb4bc722d905901ae540 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Fri, 25 Jul 2025 11:17:48 +0900 Subject: [PATCH 367/602] Lower bounds related to pgstats kinds This commit changes stats kinds to have the following bounds, making their handling in core cheaper by default: - PGSTAT_KIND_CUSTOM_MIN 128 -> 24 - PGSTAT_KIND_MAX 256 -> 32 The original numbers were rather high, and showed an impact on performance in pgstat_report_stat() for the case of simple queries with its early-exit path if there are no pending statistics to flush. This logic will be improved more in a follow-up commit to bring the performance of pgstat_report_stat() on par with v17 and older versions. Lowering the bounds is a change worth doing on its own, independently of the other improvement. These new numbers should be enough to leave some room for the following years for built-in and custom stats kinds, with stable ID numbers. At least that should be enough to start with this facility for extension developers. It can be always increased in the tree depending on the requirements wanted. Per discussion with Andres Freund and Bertrand Drouvot. Discussion: https://postgr.es/m/eb224uegsga2hgq7dfq3ps5cduhpqej7ir2hjxzzozjthrekx5@dysei6buqthe Backpatch-through: 18 --- src/include/utils/pgstat_kind.h | 6 +++--- src/test/modules/injection_points/injection_stats.c | 2 +- src/test/modules/injection_points/injection_stats_fixed.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/include/utils/pgstat_kind.h b/src/include/utils/pgstat_kind.h index f44169fd5a3c7..eb5f0b3ae6db7 100644 --- a/src/include/utils/pgstat_kind.h +++ b/src/include/utils/pgstat_kind.h @@ -18,7 +18,7 @@ /* Range of IDs allowed, for built-in and custom kinds */ #define PGSTAT_KIND_MIN 1 /* Minimum ID allowed */ -#define PGSTAT_KIND_MAX 256 /* Maximum ID allowed */ +#define PGSTAT_KIND_MAX 32 /* Maximum ID allowed */ /* use 0 for INVALID, to catch zero-initialized data */ #define PGSTAT_KIND_INVALID 0 @@ -46,7 +46,7 @@ /* Custom stats kinds */ /* Range of IDs allowed for custom stats kinds */ -#define PGSTAT_KIND_CUSTOM_MIN 128 +#define PGSTAT_KIND_CUSTOM_MIN 24 #define PGSTAT_KIND_CUSTOM_MAX PGSTAT_KIND_MAX #define PGSTAT_KIND_CUSTOM_SIZE (PGSTAT_KIND_CUSTOM_MAX - PGSTAT_KIND_CUSTOM_MIN + 1) @@ -55,7 +55,7 @@ * development and have not reserved their own unique kind ID yet. See: * https://wiki.postgresql.org/wiki/CustomCumulativeStats */ -#define PGSTAT_KIND_EXPERIMENTAL 128 +#define PGSTAT_KIND_EXPERIMENTAL 24 static inline bool pgstat_is_kind_builtin(PgStat_Kind kind) diff --git a/src/test/modules/injection_points/injection_stats.c b/src/test/modules/injection_points/injection_stats.c index 14903c629e0d1..e3947b23ba573 100644 --- a/src/test/modules/injection_points/injection_stats.c +++ b/src/test/modules/injection_points/injection_stats.c @@ -59,7 +59,7 @@ static const PgStat_KindInfo injection_stats = { /* * Kind ID reserved for statistics of injection points. */ -#define PGSTAT_KIND_INJECTION 129 +#define PGSTAT_KIND_INJECTION 25 /* Track if stats are loaded */ static bool inj_stats_loaded = false; diff --git a/src/test/modules/injection_points/injection_stats_fixed.c b/src/test/modules/injection_points/injection_stats_fixed.c index 3d0c01bdd05ab..bc54c79d190b9 100644 --- a/src/test/modules/injection_points/injection_stats_fixed.c +++ b/src/test/modules/injection_points/injection_stats_fixed.c @@ -64,7 +64,7 @@ static const PgStat_KindInfo injection_stats_fixed = { /* * Kind ID reserved for statistics of injection points. */ -#define PGSTAT_KIND_INJECTION_FIXED 130 +#define PGSTAT_KIND_INJECTION_FIXED 26 /* Track if fixed-numbered stats are loaded */ static bool inj_fixed_loaded = false; From 641f20d4c433b66df2928408fb2b44bd165c2329 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Fri, 25 Jul 2025 16:17:13 +0900 Subject: [PATCH 368/602] Fix assertion failure with latch wait in single-user mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit LatchWaitSetPostmasterDeathPos, the latch event position for the postmaster death event, is initialized under IsUnderPostmaster. WaitLatch() considered it as a valid wait target in single-user mode (!IsUnderPostmaster), which was incorrect. One code path found to fail with an assertion failure is a database drop in single-user mode while waiting in WaitForProcSignalBarrier() after the drop. Oversight in commit 84e5b2f07a5e. Author: Patrick Stählin Co-authored-by: Ronan Dunklau Discussion: https://postgr.es/m/18996-3a2744c8140488de@postgresql.org Backpatch-through: 18 --- src/backend/storage/ipc/latch.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/backend/storage/ipc/latch.c b/src/backend/storage/ipc/latch.c index c6aefd2f688dd..beadeb5e46afa 100644 --- a/src/backend/storage/ipc/latch.c +++ b/src/backend/storage/ipc/latch.c @@ -187,9 +187,11 @@ WaitLatch(Latch *latch, int wakeEvents, long timeout, if (!(wakeEvents & WL_LATCH_SET)) latch = NULL; ModifyWaitEvent(LatchWaitSet, LatchWaitSetLatchPos, WL_LATCH_SET, latch); - ModifyWaitEvent(LatchWaitSet, LatchWaitSetPostmasterDeathPos, - (wakeEvents & (WL_EXIT_ON_PM_DEATH | WL_POSTMASTER_DEATH)), - NULL); + + if (IsUnderPostmaster) + ModifyWaitEvent(LatchWaitSet, LatchWaitSetPostmasterDeathPos, + (wakeEvents & (WL_EXIT_ON_PM_DEATH | WL_POSTMASTER_DEATH)), + NULL); if (WaitEventSetWait(LatchWaitSet, (wakeEvents & WL_TIMEOUT) ? timeout : -1, From b5d084c5353f29e2e217dfa86f327e14d02998c1 Mon Sep 17 00:00:00 2001 From: Fujii Masao Date: Fri, 25 Jul 2025 18:38:36 +0900 Subject: [PATCH 369/602] Fix background worker not restarting after crash-and-restart cycle. Previously, if a background worker crashed (e.g., due to a SIGKILL) and the server restarted due to restart_after_crash being enabled, the worker was not restarted as expected. Background workers without the never-restart flag should automatically restart in this case. This issue was introduced in commit 28a520c0b77, which failed to reset the rw_pid field in the RegisteredBgWorker struct for the crashed worker. This commit fixes the problem by resetting rw_pid for all eligible background workers during the crash-and-restart cycle. Back-patched to v18, where the bug was introduced. Bug fix patches were proposed by Andrey Rudometov and ChangAo Chen, but this commit uses a different approach. Reported-by: Andrey Rudometov Reported-by: ChangAo Chen Author: Andrey Rudometov Author: ChangAo Chen Co-authored-by: Fujii Masao Reviewed-by: ChangAo Chen Reviewed-by: Shveta Malik Discussion: https://postgr.es/m/CAF6JsWiO=i24qYitWe6ns1sXqcL86rYxdyU+pNYk-WueKPSySg@mail.gmail.com Discussion: https://postgr.es/m/tencent_E00A056B3953EE6440F0F40F80EC30427D09@qq.com Backpatch-through: 18 --- src/backend/postmaster/bgworker.c | 1 + src/backend/postmaster/postmaster.c | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/src/backend/postmaster/bgworker.c b/src/backend/postmaster/bgworker.c index 116ddf7b835f1..1ad65c237c34e 100644 --- a/src/backend/postmaster/bgworker.c +++ b/src/backend/postmaster/bgworker.c @@ -613,6 +613,7 @@ ResetBackgroundWorkerCrashTimes(void) * resetting. */ rw->rw_crashed_at = 0; + rw->rw_pid = 0; /* * If there was anyone waiting for it, they're history. diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index cca9b946e5384..e01d9f0cfe81e 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -2630,6 +2630,13 @@ CleanupBackend(PMChild *bp, } bp = NULL; + /* + * In a crash case, exit immediately without resetting background worker + * state. However, if restart_after_crash is enabled, the background + * worker state (e.g., rw_pid) still needs be reset so the worker can + * restart after crash recovery. This reset is handled in + * ResetBackgroundWorkerCrashTimes(), not here. + */ if (crashed) { HandleChildCrash(bp_pid, exitstatus, procname); From 1dfe3ef3f960d6924eb1f18facf4fbdae6e1cc1d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Herrera?= Date: Fri, 25 Jul 2025 12:03:19 +0200 Subject: [PATCH 370/602] Refactor grammar to create opt_utility_option_list This changes the grammar for REINDEX, CHECKPOINT, CLUSTER, ANALYZE/ANALYSE; they still accept the same options as before, but the grammar is written differently for convenience of future development. Reviewed-by: Nathan Bossart Discussion: https://postgr.es/m/202507231538.ir7pjzoow6oe@alvherre.pgsql --- src/backend/parser/gram.y | 124 +++++++++++++++++--------------------- 1 file changed, 56 insertions(+), 68 deletions(-) diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 73345bb3c7045..db43034b9db57 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -318,6 +318,11 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); %type opt_qualified_name %type opt_concurrently %type opt_drop_behavior +%type opt_utility_option_list +%type utility_option_list +%type utility_option_elem +%type utility_option_name +%type utility_option_arg %type alter_column_default opclass_item opclass_drop alter_using %type add_drop opt_asc_desc opt_nulls_order @@ -338,10 +343,6 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); create_extension_opt_item alter_extension_opt_item %type opt_lock lock_type cast_context -%type utility_option_name -%type utility_option_elem -%type utility_option_list -%type utility_option_arg %type drop_option %type opt_or_replace opt_no opt_grant_grant_option @@ -556,7 +557,6 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); %type generic_option_list alter_generic_option_list %type reindex_target_relation reindex_target_all -%type opt_reindex_option_list %type copy_generic_opt_arg copy_generic_opt_arg_list_item %type copy_generic_opt_elem @@ -1141,6 +1141,41 @@ opt_drop_behavior: | /* EMPTY */ { $$ = DROP_RESTRICT; /* default */ } ; +opt_utility_option_list: + '(' utility_option_list ')' { $$ = $2; } + | /* EMPTY */ { $$ = NULL; } + ; + +utility_option_list: + utility_option_elem + { + $$ = list_make1($1); + } + | utility_option_list ',' utility_option_elem + { + $$ = lappend($1, $3); + } + ; + +utility_option_elem: + utility_option_name utility_option_arg + { + $$ = makeDefElem($1, $2, @1); + } + ; + +utility_option_name: + NonReservedWord { $$ = $1; } + | analyze_keyword { $$ = "analyze"; } + | FORMAT_LA { $$ = "format"; } + ; + +utility_option_arg: + opt_boolean_or_string { $$ = (Node *) makeString($1); } + | NumericOnly { $$ = (Node *) $1; } + | /* EMPTY */ { $$ = NULL; } + ; + /***************************************************************************** * * CALL statement @@ -2028,18 +2063,12 @@ constraints_set_mode: * Checkpoint statement */ CheckPointStmt: - CHECKPOINT + CHECKPOINT opt_utility_option_list { CheckPointStmt *n = makeNode(CheckPointStmt); $$ = (Node *) n; - } - | CHECKPOINT '(' utility_option_list ')' - { - CheckPointStmt *n = makeNode(CheckPointStmt); - - $$ = (Node *) n; - n->options = $3; + n->options = $2; } ; @@ -9354,7 +9383,7 @@ DropTransformStmt: DROP TRANSFORM opt_if_exists FOR Typename LANGUAGE name opt_d *****************************************************************************/ ReindexStmt: - REINDEX opt_reindex_option_list reindex_target_relation opt_concurrently qualified_name + REINDEX opt_utility_option_list reindex_target_relation opt_concurrently qualified_name { ReindexStmt *n = makeNode(ReindexStmt); @@ -9367,7 +9396,7 @@ ReindexStmt: makeDefElem("concurrently", NULL, @4)); $$ = (Node *) n; } - | REINDEX opt_reindex_option_list SCHEMA opt_concurrently name + | REINDEX opt_utility_option_list SCHEMA opt_concurrently name { ReindexStmt *n = makeNode(ReindexStmt); @@ -9380,7 +9409,7 @@ ReindexStmt: makeDefElem("concurrently", NULL, @4)); $$ = (Node *) n; } - | REINDEX opt_reindex_option_list reindex_target_all opt_concurrently opt_single_name + | REINDEX opt_utility_option_list reindex_target_all opt_concurrently opt_single_name { ReindexStmt *n = makeNode(ReindexStmt); @@ -9402,10 +9431,6 @@ reindex_target_all: SYSTEM_P { $$ = REINDEX_OBJECT_SYSTEM; } | DATABASE { $$ = REINDEX_OBJECT_DATABASE; } ; -opt_reindex_option_list: - '(' utility_option_list ')' { $$ = $2; } - | /* EMPTY */ { $$ = NULL; } - ; /***************************************************************************** * @@ -11903,13 +11928,13 @@ ClusterStmt: n->params = $3; $$ = (Node *) n; } - | CLUSTER '(' utility_option_list ')' + | CLUSTER opt_utility_option_list { ClusterStmt *n = makeNode(ClusterStmt); n->relation = NULL; n->indexname = NULL; - n->params = $3; + n->params = $2; $$ = (Node *) n; } /* unparenthesized VERBOSE kept for pre-14 compatibility */ @@ -11919,21 +11944,18 @@ ClusterStmt: n->relation = $3; n->indexname = $4; - n->params = NIL; if ($2) - n->params = lappend(n->params, makeDefElem("verbose", NULL, @2)); + n->params = list_make1(makeDefElem("verbose", NULL, @2)); $$ = (Node *) n; } /* unparenthesized VERBOSE kept for pre-17 compatibility */ - | CLUSTER opt_verbose + | CLUSTER VERBOSE { ClusterStmt *n = makeNode(ClusterStmt); n->relation = NULL; n->indexname = NULL; - n->params = NIL; - if ($2) - n->params = lappend(n->params, makeDefElem("verbose", NULL, @2)); + n->params = list_make1(makeDefElem("verbose", NULL, @2)); $$ = (Node *) n; } /* kept for pre-8.3 compatibility */ @@ -11943,9 +11965,8 @@ ClusterStmt: n->relation = $5; n->indexname = $3; - n->params = NIL; if ($2) - n->params = lappend(n->params, makeDefElem("verbose", NULL, @2)); + n->params = list_make1(makeDefElem("verbose", NULL, @2)); $$ = (Node *) n; } ; @@ -11996,64 +12017,31 @@ VacuumStmt: VACUUM opt_full opt_freeze opt_verbose opt_analyze opt_vacuum_relati } ; -AnalyzeStmt: analyze_keyword opt_verbose opt_vacuum_relation_list +AnalyzeStmt: analyze_keyword opt_utility_option_list opt_vacuum_relation_list { VacuumStmt *n = makeNode(VacuumStmt); - n->options = NIL; - if ($2) - n->options = lappend(n->options, - makeDefElem("verbose", NULL, @2)); + n->options = $2; n->rels = $3; n->is_vacuumcmd = false; $$ = (Node *) n; } - | analyze_keyword '(' utility_option_list ')' opt_vacuum_relation_list + | analyze_keyword VERBOSE opt_vacuum_relation_list { VacuumStmt *n = makeNode(VacuumStmt); - n->options = $3; - n->rels = $5; + n->options = list_make1(makeDefElem("verbose", NULL, @2)); + n->rels = $3; n->is_vacuumcmd = false; $$ = (Node *) n; } ; -utility_option_list: - utility_option_elem - { - $$ = list_make1($1); - } - | utility_option_list ',' utility_option_elem - { - $$ = lappend($1, $3); - } - ; - analyze_keyword: ANALYZE | ANALYSE /* British */ ; -utility_option_elem: - utility_option_name utility_option_arg - { - $$ = makeDefElem($1, $2, @1); - } - ; - -utility_option_name: - NonReservedWord { $$ = $1; } - | analyze_keyword { $$ = "analyze"; } - | FORMAT_LA { $$ = "format"; } - ; - -utility_option_arg: - opt_boolean_or_string { $$ = (Node *) makeString($1); } - | NumericOnly { $$ = (Node *) $1; } - | /* EMPTY */ { $$ = NULL; } - ; - opt_analyze: analyze_keyword { $$ = true; } | /*EMPTY*/ { $$ = false; } From 5457ea46d181f8b8dbe1ae482720b23bff4029de Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Fri, 25 Jul 2025 10:56:55 -0400 Subject: [PATCH 371/602] Fix dynahash's HASH_FIXED_SIZE ("isfixed") option. This flag was effectively a no-op in EXEC_BACKEND (ie, Windows) builds, because it was kept in the process-local HTAB struct, and it could only ever become set in the postmaster's copy. The simplest fix is to move it to the shared HASHHDR struct. We could keep a copy in HTAB as well, as we do with keysize and some other fields, but the "too much contention" argument doesn't seem to apply here: we only examine isfixed during element_alloc(), which had better not get hit very often for a shared hashtable. This oversight dates to 7c797e719 which invented the option. But back-patching doesn't seem appropriate given the lack of field complaints. If there is anyone running an affected workload on Windows, they might be unhappy about the behavior changing in a minor release. Author: Aidar Imamov Reviewed-by: Tom Lane Discussion: https://postgr.es/m/4d0cb35ff01c5c74d2b9a582ecb73823@postgrespro.ru --- src/backend/utils/hash/dynahash.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/backend/utils/hash/dynahash.c b/src/backend/utils/hash/dynahash.c index 1ad155d446e51..42e9be274fc6a 100644 --- a/src/backend/utils/hash/dynahash.c +++ b/src/backend/utils/hash/dynahash.c @@ -195,6 +195,7 @@ struct HASHHDR long ssize; /* segment size --- must be power of 2 */ int sshift; /* segment shift = log2(ssize) */ int nelem_alloc; /* number of entries to allocate at once */ + bool isfixed; /* if true, don't enlarge */ #ifdef HASH_STATISTICS @@ -227,7 +228,6 @@ struct HTAB MemoryContext hcxt; /* memory context if default allocator used */ char *tabname; /* table name (for error messages) */ bool isshared; /* true if table is in shared memory */ - bool isfixed; /* if true, don't enlarge */ /* freezing a shared table isn't allowed, so we can keep state here */ bool frozen; /* true = no more inserts allowed */ @@ -618,8 +618,10 @@ hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags) } } + /* Set isfixed if requested, but not till after we build initial entries */ if (flags & HASH_FIXED_SIZE) - hashp->isfixed = true; + hctl->isfixed = true; + return hashp; } @@ -644,6 +646,8 @@ hdefault(HTAB *hashp) hctl->ssize = DEF_SEGSIZE; hctl->sshift = DEF_SEGSIZE_SHIFT; + hctl->isfixed = false; /* can be enlarged */ + #ifdef HASH_STATISTICS hctl->accesses = hctl->collisions = 0; #endif @@ -1713,7 +1717,7 @@ element_alloc(HTAB *hashp, int nelem, int freelist_idx) HASHELEMENT *prevElement; int i; - if (hashp->isfixed) + if (hctl->isfixed) return false; /* Each element has a HASHELEMENT header plus user data. */ From 7d8f5957792421ec3bb9d1b9b6ca25d689d974b7 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Fri, 25 Jul 2025 16:30:00 -0400 Subject: [PATCH 372/602] Create infrastructure to reliably prevent leakage of PGresults. Commit 232d8caea fixed a case where postgres_fdw could lose track of a PGresult object, resulting in a process-lifespan memory leak. But I have little faith that there aren't other potential PGresult leakages, now or in future, in the backend modules that use libpq. Therefore, this patch proposes infrastructure that makes all PGresults returned from libpq act as though they are palloc'd in the CurrentMemoryContext (with the option to relocate them to another context later). This should greatly reduce the risk of careless leaks, and it also permits removal of a bunch of code that attempted to prevent such leaks via PG_TRY blocks. This patch adds infrastructure that wraps each PGresult in a "libpqsrv_PGresult" that provides a memory context reset callback to PQclear the PGresult. Code using this abstraction is inherently memory-safe to the same extent as we are accustomed to in most backend code. Furthermore, we add some macros that automatically redirect calls of the libpq functions concerned with PGresults to use this infrastructure, so that almost no source-code changes are needed to wheel this infrastructure into place in all the backend code that uses libpq. Perhaps in future we could create similar infrastructure for PGconn objects, but there seems less need for that. This patch just creates the infrastructure and makes relevant code use it, including reverting 232d8caea in favor of this mechanism. A good deal of follow-on simplification is possible now that we don't have to be so cautious about freeing PGresults, but I'll put that in a separate patch. Author: Tom Lane Reviewed-by: Matheus Alcantara Discussion: https://postgr.es/m/2976982.1748049023@sss.pgh.pa.us --- contrib/postgres_fdw/postgres_fdw.c | 36 +--- contrib/postgres_fdw/postgres_fdw.h | 2 +- src/backend/utils/mmgr/mcxt.c | 39 +++- src/include/libpq/libpq-be-fe-helpers.h | 27 +-- src/include/libpq/libpq-be-fe.h | 259 ++++++++++++++++++++++++ src/include/utils/palloc.h | 2 + src/tools/pgindent/typedefs.list | 1 + 7 files changed, 322 insertions(+), 44 deletions(-) create mode 100644 src/include/libpq/libpq-be-fe.h diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c index e0a34b27c7cfd..3a84d06cfd1f7 100644 --- a/contrib/postgres_fdw/postgres_fdw.c +++ b/contrib/postgres_fdw/postgres_fdw.c @@ -240,7 +240,6 @@ typedef struct PgFdwDirectModifyState PGresult *result; /* result for query */ int num_tuples; /* # of result tuples */ int next_tuple; /* index of next one to return */ - MemoryContextCallback result_cb; /* ensures result will get freed */ Relation resultRel; /* relcache entry for the target relation */ AttrNumber *attnoMap; /* array of attnums of input user columns */ AttrNumber ctidAttno; /* attnum of input ctid column */ @@ -2671,17 +2670,6 @@ postgresBeginDirectModify(ForeignScanState *node, int eflags) dmstate = (PgFdwDirectModifyState *) palloc0(sizeof(PgFdwDirectModifyState)); node->fdw_state = dmstate; - /* - * We use a memory context callback to ensure that the dmstate's PGresult - * (if any) will be released, even if the query fails somewhere that's - * outside our control. The callback is always armed for the duration of - * the query; this relies on PQclear(NULL) being a no-op. - */ - dmstate->result_cb.func = (MemoryContextCallbackFunction) PQclear; - dmstate->result_cb.arg = NULL; - MemoryContextRegisterResetCallback(CurrentMemoryContext, - &dmstate->result_cb); - /* * Identify which user to do the remote access as. This should match what * ExecCheckPermissions() does. @@ -2829,13 +2817,7 @@ postgresEndDirectModify(ForeignScanState *node) return; /* Release PGresult */ - if (dmstate->result) - { - PQclear(dmstate->result); - dmstate->result = NULL; - /* ... and don't forget to disable the callback */ - dmstate->result_cb.arg = NULL; - } + PQclear(dmstate->result); /* Release remote connection */ ReleaseConnection(dmstate->conn); @@ -4615,20 +4597,20 @@ execute_dml_stmt(ForeignScanState *node) /* * Get the result, and check for success. - * - * We use a memory context callback to ensure that the PGresult will be - * released, even if the query fails somewhere that's outside our control. - * The callback is already registered, just need to fill in its arg. */ - Assert(dmstate->result == NULL); dmstate->result = pgfdw_get_result(dmstate->conn); - dmstate->result_cb.arg = dmstate->result; - if (PQresultStatus(dmstate->result) != (dmstate->has_returning ? PGRES_TUPLES_OK : PGRES_COMMAND_OK)) - pgfdw_report_error(ERROR, dmstate->result, dmstate->conn, false, + pgfdw_report_error(ERROR, dmstate->result, dmstate->conn, true, dmstate->query); + /* + * The result potentially needs to survive across multiple executor row + * cycles, so move it to the context where the dmstate is. + */ + dmstate->result = libpqsrv_PGresultSetParent(dmstate->result, + GetMemoryChunkContext(dmstate)); + /* Get the number of rows affected. */ if (dmstate->has_returning) dmstate->num_tuples = PQntuples(dmstate->result); diff --git a/contrib/postgres_fdw/postgres_fdw.h b/contrib/postgres_fdw/postgres_fdw.h index 81358f3bde7df..9cb4ee84139ea 100644 --- a/contrib/postgres_fdw/postgres_fdw.h +++ b/contrib/postgres_fdw/postgres_fdw.h @@ -15,7 +15,7 @@ #include "foreign/foreign.h" #include "lib/stringinfo.h" -#include "libpq-fe.h" +#include "libpq/libpq-be-fe.h" #include "nodes/execnodes.h" #include "nodes/pathnodes.h" #include "utils/relcache.h" diff --git a/src/backend/utils/mmgr/mcxt.c b/src/backend/utils/mmgr/mcxt.c index 15fa4d0a55eeb..ce01dce9861da 100644 --- a/src/backend/utils/mmgr/mcxt.c +++ b/src/backend/utils/mmgr/mcxt.c @@ -560,9 +560,7 @@ MemoryContextDeleteChildren(MemoryContext context) * the specified context, since that means it will automatically be freed * when no longer needed. * - * There is no API for deregistering a callback once registered. If you - * want it to not do anything anymore, adjust the state pointed to by its - * "arg" to indicate that. + * Note that callers can assume this cannot fail. */ void MemoryContextRegisterResetCallback(MemoryContext context, @@ -577,6 +575,41 @@ MemoryContextRegisterResetCallback(MemoryContext context, context->isReset = false; } +/* + * MemoryContextUnregisterResetCallback + * Undo the effects of MemoryContextRegisterResetCallback. + * + * This can be used if a callback's effects are no longer required + * at some point before the context has been reset/deleted. It is the + * caller's responsibility to pfree the callback struct (if needed). + * + * An assertion failure occurs if the callback was not registered. + * We could alternatively define that case as a no-op, but that seems too + * likely to mask programming errors such as passing the wrong context. + */ +void +MemoryContextUnregisterResetCallback(MemoryContext context, + MemoryContextCallback *cb) +{ + MemoryContextCallback *prev, + *cur; + + Assert(MemoryContextIsValid(context)); + + for (prev = NULL, cur = context->reset_cbs; cur != NULL; + prev = cur, cur = cur->next) + { + if (cur != cb) + continue; + if (prev) + prev->next = cur->next; + else + context->reset_cbs = cur->next; + return; + } + Assert(false); +} + /* * MemoryContextCallResetCallbacks * Internal function to call all registered callbacks for context. diff --git a/src/include/libpq/libpq-be-fe-helpers.h b/src/include/libpq/libpq-be-fe-helpers.h index af13bd6bf3da3..8d12a331497f8 100644 --- a/src/include/libpq/libpq-be-fe-helpers.h +++ b/src/include/libpq/libpq-be-fe-helpers.h @@ -30,17 +30,7 @@ #ifndef LIBPQ_BE_FE_HELPERS_H #define LIBPQ_BE_FE_HELPERS_H -/* - * Despite the name, BUILDING_DLL is set only when building code directly part - * of the backend. Which also is where libpq isn't allowed to be - * used. Obviously this doesn't protect against libpq-fe.h getting included - * otherwise, but perhaps still protects against a few mistakes... - */ -#ifdef BUILDING_DLL -#error "libpq may not be used code directly built into the backend" -#endif - -#include "libpq-fe.h" +#include "libpq/libpq-be-fe.h" #include "miscadmin.h" #include "storage/fd.h" #include "storage/latch.h" @@ -462,13 +452,21 @@ exit: ; * This function is intended to be set via PQsetNoticeReceiver() so that * NOTICE, WARNING, and similar messages from the connection are reported via * ereport(), instead of being printed to stderr. + * + * Because this will be called from libpq with a "real" (not wrapped) + * PGresult, we need to temporarily ignore libpq-be-fe.h's wrapper macros + * for PGresult and also PQresultErrorMessage, and put back the wrappers + * afterwards. That's not pretty, but there seems no better alternative. */ +#undef PGresult +#undef PQresultErrorMessage + static inline void libpqsrv_notice_receiver(void *arg, const PGresult *res) { - char *message; + const char *message; int len; - char *prefix = (char *) arg; + const char *prefix = (const char *) arg; /* * Trim the trailing newline from the message text returned from @@ -484,4 +482,7 @@ libpqsrv_notice_receiver(void *arg, const PGresult *res) errmsg_internal("%s: %.*s", prefix, len, message)); } +#define PGresult libpqsrv_PGresult +#define PQresultErrorMessage libpqsrv_PQresultErrorMessage + #endif /* LIBPQ_BE_FE_HELPERS_H */ diff --git a/src/include/libpq/libpq-be-fe.h b/src/include/libpq/libpq-be-fe.h new file mode 100644 index 0000000000000..e3f796b023092 --- /dev/null +++ b/src/include/libpq/libpq-be-fe.h @@ -0,0 +1,259 @@ +/*------------------------------------------------------------------------- + * + * libpq-be-fe.h + * Wrapper functions for using libpq in extensions + * + * Code built directly into the backend is not allowed to link to libpq + * directly. Extension code is allowed to use libpq however. One of the + * main risks in doing so is leaking the malloc-allocated structures + * returned by libpq, causing a process-lifespan memory leak. + * + * This file provides wrapper objects to help in building memory-safe code. + * A PGresult object wrapped this way acts much as if it were palloc'd: + * it will go away when the specified context is reset or deleted. + * We might later extend the concept to other objects such as PGconns. + * + * See also the libpq-be-fe-helpers.h file, which provides additional + * facilities built on top of this one. + * + * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/libpq/libpq-be-fe.h + * + *------------------------------------------------------------------------- + */ +#ifndef LIBPQ_BE_FE_H +#define LIBPQ_BE_FE_H + +/* + * Despite the name, BUILDING_DLL is set only when building code directly part + * of the backend. Which also is where libpq isn't allowed to be + * used. Obviously this doesn't protect against libpq-fe.h getting included + * otherwise, but perhaps still protects against a few mistakes... + */ +#ifdef BUILDING_DLL +#error "libpq may not be used in code directly built into the backend" +#endif + +#include "libpq-fe.h" + +/* + * Memory-context-safe wrapper object for a PGresult. + */ +typedef struct libpqsrv_PGresult +{ + PGresult *res; /* the wrapped PGresult */ + MemoryContext ctx; /* the MemoryContext it's attached to */ + MemoryContextCallback cb; /* the callback that implements freeing */ +} libpqsrv_PGresult; + + +/* + * Wrap the given PGresult in a libpqsrv_PGresult object, so that it will + * go away automatically if the current memory context is reset or deleted. + * + * To avoid potential memory leaks, backend code must always apply this + * immediately to the output of any PGresult-yielding libpq function. + */ +static inline libpqsrv_PGresult * +libpqsrv_PQwrap(PGresult *res) +{ + libpqsrv_PGresult *bres; + MemoryContext ctx = CurrentMemoryContext; + + /* We pass through a NULL result as-is, since there's nothing to free */ + if (res == NULL) + return NULL; + /* Attempt to allocate the wrapper ... this had better not throw error */ + bres = (libpqsrv_PGresult *) + MemoryContextAllocExtended(ctx, + sizeof(libpqsrv_PGresult), + MCXT_ALLOC_NO_OOM); + /* If we failed to allocate a wrapper, free the PGresult before failing */ + if (bres == NULL) + { + PQclear(res); + ereport(ERROR, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("out of memory"))); + } + /* Okay, set up the wrapper */ + bres->res = res; + bres->ctx = ctx; + bres->cb.func = (MemoryContextCallbackFunction) PQclear; + bres->cb.arg = res; + MemoryContextRegisterResetCallback(ctx, &bres->cb); + return bres; +} + +/* + * Free a wrapped PGresult, after detaching it from the memory context. + * Like PQclear(), allow the argument to be NULL. + */ +static inline void +libpqsrv_PQclear(libpqsrv_PGresult *bres) +{ + if (bres) + { + MemoryContextUnregisterResetCallback(bres->ctx, &bres->cb); + PQclear(bres->res); + pfree(bres); + } +} + +/* + * Move a wrapped PGresult to have a different parent context. + */ +static inline libpqsrv_PGresult * +libpqsrv_PGresultSetParent(libpqsrv_PGresult *bres, MemoryContext ctx) +{ + libpqsrv_PGresult *newres; + + /* We pass through a NULL result as-is */ + if (bres == NULL) + return NULL; + /* Make a new wrapper in the target context, raising error on OOM */ + newres = (libpqsrv_PGresult *) + MemoryContextAlloc(ctx, sizeof(libpqsrv_PGresult)); + /* Okay, set up the new wrapper */ + newres->res = bres->res; + newres->ctx = ctx; + newres->cb.func = (MemoryContextCallbackFunction) PQclear; + newres->cb.arg = bres->res; + MemoryContextRegisterResetCallback(ctx, &newres->cb); + /* Disarm and delete the old wrapper */ + MemoryContextUnregisterResetCallback(bres->ctx, &bres->cb); + pfree(bres); + return newres; +} + +/* + * Convenience wrapper for PQgetResult. + * + * We could supply wrappers for other PGresult-returning functions too, + * but at present there's no need. + */ +static inline libpqsrv_PGresult * +libpqsrv_PQgetResult(PGconn *conn) +{ + return libpqsrv_PQwrap(PQgetResult(conn)); +} + +/* + * Accessor functions for libpqsrv_PGresult. While it's not necessary to use + * these, they emulate the behavior of the underlying libpq functions when + * passed a NULL pointer. This is particularly important for PQresultStatus, + * which is often the first check on a result. + */ + +static inline ExecStatusType +libpqsrv_PQresultStatus(const libpqsrv_PGresult *res) +{ + if (!res) + return PGRES_FATAL_ERROR; + return PQresultStatus(res->res); +} + +static inline const char * +libpqsrv_PQresultErrorMessage(const libpqsrv_PGresult *res) +{ + if (!res) + return ""; + return PQresultErrorMessage(res->res); +} + +static inline char * +libpqsrv_PQresultErrorField(const libpqsrv_PGresult *res, int fieldcode) +{ + if (!res) + return NULL; + return PQresultErrorField(res->res, fieldcode); +} + +static inline char * +libpqsrv_PQcmdStatus(const libpqsrv_PGresult *res) +{ + if (!res) + return NULL; + return PQcmdStatus(res->res); +} + +static inline int +libpqsrv_PQntuples(const libpqsrv_PGresult *res) +{ + if (!res) + return 0; + return PQntuples(res->res); +} + +static inline int +libpqsrv_PQnfields(const libpqsrv_PGresult *res) +{ + if (!res) + return 0; + return PQnfields(res->res); +} + +static inline char * +libpqsrv_PQgetvalue(const libpqsrv_PGresult *res, int tup_num, int field_num) +{ + if (!res) + return NULL; + return PQgetvalue(res->res, tup_num, field_num); +} + +static inline int +libpqsrv_PQgetlength(const libpqsrv_PGresult *res, int tup_num, int field_num) +{ + if (!res) + return 0; + return PQgetlength(res->res, tup_num, field_num); +} + +static inline int +libpqsrv_PQgetisnull(const libpqsrv_PGresult *res, int tup_num, int field_num) +{ + if (!res) + return 1; /* pretend it is null */ + return PQgetisnull(res->res, tup_num, field_num); +} + +static inline char * +libpqsrv_PQfname(const libpqsrv_PGresult *res, int field_num) +{ + if (!res) + return NULL; + return PQfname(res->res, field_num); +} + +static inline const char * +libpqsrv_PQcmdTuples(const libpqsrv_PGresult *res) +{ + if (!res) + return ""; + return PQcmdTuples(res->res); +} + +/* + * Redefine these libpq entry point names concerned with PGresults so that + * they will operate on libpqsrv_PGresults instead. This avoids needing to + * convert a lot of pre-existing code, and reduces the notational differences + * between frontend and backend libpq-using code. + */ +#define PGresult libpqsrv_PGresult +#define PQclear libpqsrv_PQclear +#define PQgetResult libpqsrv_PQgetResult +#define PQresultStatus libpqsrv_PQresultStatus +#define PQresultErrorMessage libpqsrv_PQresultErrorMessage +#define PQresultErrorField libpqsrv_PQresultErrorField +#define PQcmdStatus libpqsrv_PQcmdStatus +#define PQntuples libpqsrv_PQntuples +#define PQnfields libpqsrv_PQnfields +#define PQgetvalue libpqsrv_PQgetvalue +#define PQgetlength libpqsrv_PQgetlength +#define PQgetisnull libpqsrv_PQgetisnull +#define PQfname libpqsrv_PQfname +#define PQcmdTuples libpqsrv_PQcmdTuples + +#endif /* LIBPQ_BE_FE_H */ diff --git a/src/include/utils/palloc.h b/src/include/utils/palloc.h index e1b42267b22aa..039b9cba61a32 100644 --- a/src/include/utils/palloc.h +++ b/src/include/utils/palloc.h @@ -133,6 +133,8 @@ MemoryContextSwitchTo(MemoryContext context) /* Registration of memory context reset/delete callbacks */ extern void MemoryContextRegisterResetCallback(MemoryContext context, MemoryContextCallback *cb); +extern void MemoryContextUnregisterResetCallback(MemoryContext context, + MemoryContextCallback *cb); /* * These are like standard strdup() except the copied string is diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 4353befab9934..3daba26b23723 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -3757,6 +3757,7 @@ leafSegmentInfo leaf_item libpq_gettext_func libpq_source +libpqsrv_PGresult line_t lineno_t list_sort_comparator From 80aa9848befc13c188d2775a859deaf172fdd3a2 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Fri, 25 Jul 2025 16:31:43 -0400 Subject: [PATCH 373/602] Reap the benefits of not having to avoid leaking PGresults. Remove a bunch of PG_TRY constructs, de-volatilize related variables, remove some PQclear calls in error paths. Aside from making the code simpler and shorter, this should provide some marginal performance gains. For ease of review, I did not re-indent code within the removed PG_TRY constructs. That'll be done in a separate patch. Author: Tom Lane Reviewed-by: Matheus Alcantara Discussion: https://postgr.es/m/2976982.1748049023@sss.pgh.pa.us --- contrib/dblink/dblink.c | 96 +++-------- contrib/postgres_fdw/connection.c | 52 ++---- contrib/postgres_fdw/postgres_fdw.c | 163 ++++-------------- contrib/postgres_fdw/postgres_fdw.h | 2 +- .../libpqwalreceiver/libpqwalreceiver.c | 31 +--- src/include/libpq/libpq-be-fe-helpers.h | 13 +- 6 files changed, 83 insertions(+), 274 deletions(-) diff --git a/contrib/dblink/dblink.c b/contrib/dblink/dblink.c index de5bed282f3f0..fc423c0544d3f 100644 --- a/contrib/dblink/dblink.c +++ b/contrib/dblink/dblink.c @@ -101,8 +101,8 @@ static void materializeQueryResult(FunctionCallInfo fcinfo, const char *conname, const char *sql, bool fail); -static PGresult *storeQueryResult(volatile storeInfo *sinfo, PGconn *conn, const char *sql); -static void storeRow(volatile storeInfo *sinfo, PGresult *res, bool first); +static PGresult *storeQueryResult(storeInfo *sinfo, PGconn *conn, const char *sql); +static void storeRow(storeInfo *sinfo, PGresult *res, bool first); static remoteConn *getConnectionByName(const char *name); static HTAB *createConnHash(void); static remoteConn *createNewConnection(const char *name); @@ -169,14 +169,6 @@ typedef struct remoteConnHashEnt /* initial number of connection hashes */ #define NUMCONN 16 -static char * -xpstrdup(const char *in) -{ - if (in == NULL) - return NULL; - return pstrdup(in); -} - pg_noreturn static void dblink_res_internalerror(PGconn *conn, PGresult *res, const char *p2) { @@ -870,17 +862,14 @@ static void materializeResult(FunctionCallInfo fcinfo, PGconn *conn, PGresult *res) { ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + TupleDesc tupdesc; + bool is_sql_cmd; + int ntuples; + int nfields; /* prepTuplestoreResult must have been called previously */ Assert(rsinfo->returnMode == SFRM_Materialize); - PG_TRY(); - { - TupleDesc tupdesc; - bool is_sql_cmd; - int ntuples; - int nfields; - if (PQresultStatus(res) == PGRES_COMMAND_OK) { is_sql_cmd = true; @@ -988,13 +977,8 @@ materializeResult(FunctionCallInfo fcinfo, PGconn *conn, PGresult *res) /* clean up GUC settings, if we changed any */ restoreLocalGucs(nestlevel); } - } - PG_FINALLY(); - { - /* be sure to release the libpq result */ + PQclear(res); - } - PG_END_TRY(); } /* @@ -1013,16 +997,17 @@ materializeQueryResult(FunctionCallInfo fcinfo, bool fail) { ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; - PGresult *volatile res = NULL; - volatile storeInfo sinfo = {0}; /* prepTuplestoreResult must have been called previously */ Assert(rsinfo->returnMode == SFRM_Materialize); - sinfo.fcinfo = fcinfo; - + /* Use a PG_TRY block to ensure we pump libpq dry of results */ PG_TRY(); { + storeInfo sinfo = {0}; + PGresult *res; + + sinfo.fcinfo = fcinfo; /* Create short-lived memory context for data conversions */ sinfo.tmpcontext = AllocSetContextCreate(CurrentMemoryContext, "dblink temporary context", @@ -1035,14 +1020,7 @@ materializeQueryResult(FunctionCallInfo fcinfo, (PQresultStatus(res) != PGRES_COMMAND_OK && PQresultStatus(res) != PGRES_TUPLES_OK)) { - /* - * dblink_res_error will clear the passed PGresult, so we need - * this ugly dance to avoid doing so twice during error exit - */ - PGresult *res1 = res; - - res = NULL; - dblink_res_error(conn, conname, res1, fail, + dblink_res_error(conn, conname, res, fail, "while executing query"); /* if fail isn't set, we'll return an empty query result */ } @@ -1081,7 +1059,6 @@ materializeQueryResult(FunctionCallInfo fcinfo, tuplestore_puttuple(tupstore, tuple); PQclear(res); - res = NULL; } else { @@ -1090,26 +1067,20 @@ materializeQueryResult(FunctionCallInfo fcinfo, Assert(rsinfo->setResult != NULL); PQclear(res); - res = NULL; } /* clean up data conversion short-lived memory context */ if (sinfo.tmpcontext != NULL) MemoryContextDelete(sinfo.tmpcontext); - sinfo.tmpcontext = NULL; PQclear(sinfo.last_res); - sinfo.last_res = NULL; PQclear(sinfo.cur_res); - sinfo.cur_res = NULL; } PG_CATCH(); { - /* be sure to release any libpq result we collected */ - PQclear(res); - PQclear(sinfo.last_res); - PQclear(sinfo.cur_res); - /* and clear out any pending data in libpq */ + PGresult *res; + + /* be sure to clear out any pending data in libpq */ while ((res = libpqsrv_get_result(conn, dblink_we_get_result)) != NULL) PQclear(res); @@ -1122,7 +1093,7 @@ materializeQueryResult(FunctionCallInfo fcinfo, * Execute query, and send any result rows to sinfo->tuplestore. */ static PGresult * -storeQueryResult(volatile storeInfo *sinfo, PGconn *conn, const char *sql) +storeQueryResult(storeInfo *sinfo, PGconn *conn, const char *sql) { bool first = true; int nestlevel = -1; @@ -1190,7 +1161,7 @@ storeQueryResult(volatile storeInfo *sinfo, PGconn *conn, const char *sql) * (in this case the PGresult might contain either zero or one row). */ static void -storeRow(volatile storeInfo *sinfo, PGresult *res, bool first) +storeRow(storeInfo *sinfo, PGresult *res, bool first) { int nfields = PQnfields(res); HeapTuple tuple; @@ -2795,10 +2766,13 @@ dblink_connstr_check(const char *connstr) /* * Report an error received from the remote server * - * res: the received error result (will be freed) + * res: the received error result * fail: true for ERROR ereport, false for NOTICE * fmt and following args: sprintf-style format and values for errcontext; * the resulting string should be worded like "while " + * + * If "res" is not NULL, it'll be PQclear'ed here (unless we throw error, + * in which case memory context cleanup will clear it eventually). */ static void dblink_res_error(PGconn *conn, const char *conname, PGresult *res, @@ -2806,15 +2780,11 @@ dblink_res_error(PGconn *conn, const char *conname, PGresult *res, { int level; char *pg_diag_sqlstate = PQresultErrorField(res, PG_DIAG_SQLSTATE); - char *pg_diag_message_primary = PQresultErrorField(res, PG_DIAG_MESSAGE_PRIMARY); - char *pg_diag_message_detail = PQresultErrorField(res, PG_DIAG_MESSAGE_DETAIL); - char *pg_diag_message_hint = PQresultErrorField(res, PG_DIAG_MESSAGE_HINT); - char *pg_diag_context = PQresultErrorField(res, PG_DIAG_CONTEXT); + char *message_primary = PQresultErrorField(res, PG_DIAG_MESSAGE_PRIMARY); + char *message_detail = PQresultErrorField(res, PG_DIAG_MESSAGE_DETAIL); + char *message_hint = PQresultErrorField(res, PG_DIAG_MESSAGE_HINT); + char *message_context = PQresultErrorField(res, PG_DIAG_CONTEXT); int sqlstate; - char *message_primary; - char *message_detail; - char *message_hint; - char *message_context; va_list ap; char dblink_context_msg[512]; @@ -2832,11 +2802,6 @@ dblink_res_error(PGconn *conn, const char *conname, PGresult *res, else sqlstate = ERRCODE_CONNECTION_FAILURE; - message_primary = xpstrdup(pg_diag_message_primary); - message_detail = xpstrdup(pg_diag_message_detail); - message_hint = xpstrdup(pg_diag_message_hint); - message_context = xpstrdup(pg_diag_context); - /* * If we don't get a message from the PGresult, try the PGconn. This is * needed because for connection-level failures, PQgetResult may just @@ -2845,14 +2810,6 @@ dblink_res_error(PGconn *conn, const char *conname, PGresult *res, if (message_primary == NULL) message_primary = pchomp(PQerrorMessage(conn)); - /* - * Now that we've copied all the data we need out of the PGresult, it's - * safe to free it. We must do this to avoid PGresult leakage. We're - * leaking all the strings too, but those are in palloc'd memory that will - * get cleaned up eventually. - */ - PQclear(res); - /* * Format the basic errcontext string. Below, we'll add on something * about the connection name. That's a violation of the translatability @@ -2877,6 +2834,7 @@ dblink_res_error(PGconn *conn, const char *conname, PGresult *res, dblink_context_msg, conname)) : (errcontext("%s on unnamed dblink connection", dblink_context_msg)))); + PQclear(res); } /* diff --git a/contrib/postgres_fdw/connection.c b/contrib/postgres_fdw/connection.c index c1ce6f3343665..c654c1a1ff0a7 100644 --- a/contrib/postgres_fdw/connection.c +++ b/contrib/postgres_fdw/connection.c @@ -815,7 +815,7 @@ static void do_sql_command_begin(PGconn *conn, const char *sql) { if (!PQsendQuery(conn, sql)) - pgfdw_report_error(ERROR, NULL, conn, false, sql); + pgfdw_report_error(ERROR, NULL, conn, sql); } static void @@ -830,10 +830,10 @@ do_sql_command_end(PGconn *conn, const char *sql, bool consume_input) * would be large compared to the overhead of PQconsumeInput.) */ if (consume_input && !PQconsumeInput(conn)) - pgfdw_report_error(ERROR, NULL, conn, false, sql); + pgfdw_report_error(ERROR, NULL, conn, sql); res = pgfdw_get_result(conn); if (PQresultStatus(res) != PGRES_COMMAND_OK) - pgfdw_report_error(ERROR, res, conn, true, sql); + pgfdw_report_error(ERROR, res, conn, sql); PQclear(res); } @@ -967,22 +967,21 @@ pgfdw_get_result(PGconn *conn) * Report an error we got from the remote server. * * elevel: error level to use (typically ERROR, but might be less) - * res: PGresult containing the error + * res: PGresult containing the error (might be NULL) * conn: connection we did the query on - * clear: if true, PQclear the result (otherwise caller will handle it) * sql: NULL, or text of remote command we tried to execute * + * If "res" is not NULL, it'll be PQclear'ed here (unless we throw error, + * in which case memory context cleanup will clear it eventually). + * * Note: callers that choose not to throw ERROR for a remote error are * responsible for making sure that the associated ConnCacheEntry gets * marked with have_error = true. */ void pgfdw_report_error(int elevel, PGresult *res, PGconn *conn, - bool clear, const char *sql) + const char *sql) { - /* If requested, PGresult must be released before leaving this function. */ - PG_TRY(); - { char *diag_sqlstate = PQresultErrorField(res, PG_DIAG_SQLSTATE); char *message_primary = PQresultErrorField(res, PG_DIAG_MESSAGE_PRIMARY); char *message_detail = PQresultErrorField(res, PG_DIAG_MESSAGE_DETAIL); @@ -1016,13 +1015,7 @@ pgfdw_report_error(int elevel, PGresult *res, PGconn *conn, message_hint ? errhint("%s", message_hint) : 0, message_context ? errcontext("%s", message_context) : 0, sql ? errcontext("remote SQL command: %s", sql) : 0)); - } - PG_FINALLY(); - { - if (clear) PQclear(res); - } - PG_END_TRY(); } /* @@ -1545,7 +1538,7 @@ pgfdw_exec_cleanup_query_begin(PGconn *conn, const char *query) */ if (!PQsendQuery(conn, query)) { - pgfdw_report_error(WARNING, NULL, conn, false, query); + pgfdw_report_error(WARNING, NULL, conn, query); return false; } @@ -1570,7 +1563,7 @@ pgfdw_exec_cleanup_query_end(PGconn *conn, const char *query, */ if (consume_input && !PQconsumeInput(conn)) { - pgfdw_report_error(WARNING, NULL, conn, false, query); + pgfdw_report_error(WARNING, NULL, conn, query); return false; } @@ -1582,7 +1575,7 @@ pgfdw_exec_cleanup_query_end(PGconn *conn, const char *query, (errmsg("could not get query result due to timeout"), errcontext("remote SQL command: %s", query))); else - pgfdw_report_error(WARNING, NULL, conn, false, query); + pgfdw_report_error(WARNING, NULL, conn, query); return false; } @@ -1590,7 +1583,7 @@ pgfdw_exec_cleanup_query_end(PGconn *conn, const char *query, /* Issue a warning if not successful. */ if (PQresultStatus(result) != PGRES_COMMAND_OK) { - pgfdw_report_error(WARNING, result, conn, true, query); + pgfdw_report_error(WARNING, result, conn, query); return ignore_errors; } PQclear(result); @@ -1618,17 +1611,12 @@ pgfdw_get_cleanup_result(PGconn *conn, TimestampTz endtime, PGresult **result, bool *timed_out) { - volatile bool failed = false; - PGresult *volatile last_res = NULL; + bool failed = false; + PGresult *last_res = NULL; + int canceldelta = RETRY_CANCEL_TIMEOUT * 2; *result = NULL; *timed_out = false; - - /* In what follows, do not leak any PGresults on an error. */ - PG_TRY(); - { - int canceldelta = RETRY_CANCEL_TIMEOUT * 2; - for (;;) { PGresult *res; @@ -1706,15 +1694,7 @@ pgfdw_get_cleanup_result(PGconn *conn, TimestampTz endtime, PQclear(last_res); last_res = res; } -exit: ; - } - PG_CATCH(); - { - PQclear(last_res); - PG_RE_THROW(); - } - PG_END_TRY(); - +exit: if (failed) PQclear(last_res); else diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c index 3a84d06cfd1f7..f2dee7b1c69ab 100644 --- a/contrib/postgres_fdw/postgres_fdw.c +++ b/contrib/postgres_fdw/postgres_fdw.c @@ -1702,13 +1702,9 @@ postgresReScanForeignScan(ForeignScanState *node) return; } - /* - * We don't use a PG_TRY block here, so be careful not to throw error - * without releasing the PGresult. - */ res = pgfdw_exec_query(fsstate->conn, sql, fsstate->conn_state); if (PQresultStatus(res) != PGRES_COMMAND_OK) - pgfdw_report_error(ERROR, res, fsstate->conn, true, sql); + pgfdw_report_error(ERROR, res, fsstate->conn, sql); PQclear(res); /* Now force a fresh FETCH. */ @@ -3608,11 +3604,7 @@ get_remote_estimate(const char *sql, PGconn *conn, double *rows, int *width, Cost *startup_cost, Cost *total_cost) { - PGresult *volatile res = NULL; - - /* PGresult must be released before leaving this function. */ - PG_TRY(); - { + PGresult *res; char *line; char *p; int n; @@ -3622,7 +3614,7 @@ get_remote_estimate(const char *sql, PGconn *conn, */ res = pgfdw_exec_query(conn, sql, NULL); if (PQresultStatus(res) != PGRES_TUPLES_OK) - pgfdw_report_error(ERROR, res, conn, false, sql); + pgfdw_report_error(ERROR, res, conn, sql); /* * Extract cost numbers for topmost plan node. Note we search for a @@ -3637,12 +3629,7 @@ get_remote_estimate(const char *sql, PGconn *conn, startup_cost, total_cost, rows, width); if (n != 4) elog(ERROR, "could not interpret EXPLAIN output: \"%s\"", line); - } - PG_FINALLY(); - { PQclear(res); - } - PG_END_TRY(); } /* @@ -3782,17 +3769,14 @@ create_cursor(ForeignScanState *node) */ if (!PQsendQueryParams(conn, buf.data, numParams, NULL, values, NULL, NULL, 0)) - pgfdw_report_error(ERROR, NULL, conn, false, buf.data); + pgfdw_report_error(ERROR, NULL, conn, buf.data); /* * Get the result, and check for success. - * - * We don't use a PG_TRY block here, so be careful not to throw error - * without releasing the PGresult. */ res = pgfdw_get_result(conn); if (PQresultStatus(res) != PGRES_COMMAND_OK) - pgfdw_report_error(ERROR, res, conn, true, fsstate->query); + pgfdw_report_error(ERROR, res, conn, fsstate->query); PQclear(res); /* Mark the cursor as created, and show no tuples have been retrieved */ @@ -3814,7 +3798,10 @@ static void fetch_more_data(ForeignScanState *node) { PgFdwScanState *fsstate = (PgFdwScanState *) node->fdw_state; - PGresult *volatile res = NULL; + PGconn *conn = fsstate->conn; + PGresult *res; + int numrows; + int i; MemoryContext oldcontext; /* @@ -3825,13 +3812,6 @@ fetch_more_data(ForeignScanState *node) MemoryContextReset(fsstate->batch_cxt); oldcontext = MemoryContextSwitchTo(fsstate->batch_cxt); - /* PGresult must be released before leaving this function. */ - PG_TRY(); - { - PGconn *conn = fsstate->conn; - int numrows; - int i; - if (fsstate->async_capable) { Assert(fsstate->conn_state->pendingAreq); @@ -3843,7 +3823,7 @@ fetch_more_data(ForeignScanState *node) res = pgfdw_get_result(conn); /* On error, report the original query, not the FETCH. */ if (PQresultStatus(res) != PGRES_TUPLES_OK) - pgfdw_report_error(ERROR, res, conn, false, fsstate->query); + pgfdw_report_error(ERROR, res, conn, fsstate->query); /* Reset per-connection state */ fsstate->conn_state->pendingAreq = NULL; @@ -3859,7 +3839,7 @@ fetch_more_data(ForeignScanState *node) res = pgfdw_exec_query(conn, sql, fsstate->conn_state); /* On error, report the original query, not the FETCH. */ if (PQresultStatus(res) != PGRES_TUPLES_OK) - pgfdw_report_error(ERROR, res, conn, false, fsstate->query); + pgfdw_report_error(ERROR, res, conn, fsstate->query); } /* Convert the data into HeapTuples */ @@ -3887,12 +3867,8 @@ fetch_more_data(ForeignScanState *node) /* Must be EOF if we didn't get as many tuples as we asked for. */ fsstate->eof_reached = (numrows < fsstate->fetch_size); - } - PG_FINALLY(); - { + PQclear(res); - } - PG_END_TRY(); MemoryContextSwitchTo(oldcontext); } @@ -3966,14 +3942,9 @@ close_cursor(PGconn *conn, unsigned int cursor_number, PGresult *res; snprintf(sql, sizeof(sql), "CLOSE c%u", cursor_number); - - /* - * We don't use a PG_TRY block here, so be careful not to throw error - * without releasing the PGresult. - */ res = pgfdw_exec_query(conn, sql, conn_state); if (PQresultStatus(res) != PGRES_COMMAND_OK) - pgfdw_report_error(ERROR, res, conn, true, sql); + pgfdw_report_error(ERROR, res, conn, sql); PQclear(res); } @@ -4181,18 +4152,15 @@ execute_foreign_modify(EState *estate, NULL, NULL, 0)) - pgfdw_report_error(ERROR, NULL, fmstate->conn, false, fmstate->query); + pgfdw_report_error(ERROR, NULL, fmstate->conn, fmstate->query); /* * Get the result, and check for success. - * - * We don't use a PG_TRY block here, so be careful not to throw error - * without releasing the PGresult. */ res = pgfdw_get_result(fmstate->conn); if (PQresultStatus(res) != (fmstate->has_returning ? PGRES_TUPLES_OK : PGRES_COMMAND_OK)) - pgfdw_report_error(ERROR, res, fmstate->conn, true, fmstate->query); + pgfdw_report_error(ERROR, res, fmstate->conn, fmstate->query); /* Check number of rows affected, and fetch RETURNING tuple if any */ if (fmstate->has_returning) @@ -4251,17 +4219,14 @@ prepare_foreign_modify(PgFdwModifyState *fmstate) fmstate->query, 0, NULL)) - pgfdw_report_error(ERROR, NULL, fmstate->conn, false, fmstate->query); + pgfdw_report_error(ERROR, NULL, fmstate->conn, fmstate->query); /* * Get the result, and check for success. - * - * We don't use a PG_TRY block here, so be careful not to throw error - * without releasing the PGresult. */ res = pgfdw_get_result(fmstate->conn); if (PQresultStatus(res) != PGRES_COMMAND_OK) - pgfdw_report_error(ERROR, res, fmstate->conn, true, fmstate->query); + pgfdw_report_error(ERROR, res, fmstate->conn, fmstate->query); PQclear(res); /* This action shows that the prepare has been done. */ @@ -4352,16 +4317,11 @@ convert_prep_stmt_params(PgFdwModifyState *fmstate, /* * store_returning_result * Store the result of a RETURNING clause - * - * On error, be sure to release the PGresult on the way out. Callers do not - * have PG_TRY blocks to ensure this happens. */ static void store_returning_result(PgFdwModifyState *fmstate, TupleTableSlot *slot, PGresult *res) { - PG_TRY(); - { HeapTuple newtup; newtup = make_tuple_from_result_row(res, 0, @@ -4376,13 +4336,6 @@ store_returning_result(PgFdwModifyState *fmstate, * heaptuples directly, so allow for conversion. */ ExecForceStoreHeapTuple(newtup, slot, true); - } - PG_CATCH(); - { - PQclear(res); - PG_RE_THROW(); - } - PG_END_TRY(); } /* @@ -4418,14 +4371,9 @@ deallocate_query(PgFdwModifyState *fmstate) return; snprintf(sql, sizeof(sql), "DEALLOCATE %s", fmstate->p_name); - - /* - * We don't use a PG_TRY block here, so be careful not to throw error - * without releasing the PGresult. - */ res = pgfdw_exec_query(fmstate->conn, sql, fmstate->conn_state); if (PQresultStatus(res) != PGRES_COMMAND_OK) - pgfdw_report_error(ERROR, res, fmstate->conn, true, sql); + pgfdw_report_error(ERROR, res, fmstate->conn, sql); PQclear(res); pfree(fmstate->p_name); fmstate->p_name = NULL; @@ -4593,7 +4541,7 @@ execute_dml_stmt(ForeignScanState *node) */ if (!PQsendQueryParams(dmstate->conn, dmstate->query, numParams, NULL, values, NULL, NULL, 0)) - pgfdw_report_error(ERROR, NULL, dmstate->conn, false, dmstate->query); + pgfdw_report_error(ERROR, NULL, dmstate->conn, dmstate->query); /* * Get the result, and check for success. @@ -4601,7 +4549,7 @@ execute_dml_stmt(ForeignScanState *node) dmstate->result = pgfdw_get_result(dmstate->conn); if (PQresultStatus(dmstate->result) != (dmstate->has_returning ? PGRES_TUPLES_OK : PGRES_COMMAND_OK)) - pgfdw_report_error(ERROR, dmstate->result, dmstate->conn, true, + pgfdw_report_error(ERROR, dmstate->result, dmstate->conn, dmstate->query); /* @@ -4947,7 +4895,7 @@ postgresAnalyzeForeignTable(Relation relation, UserMapping *user; PGconn *conn; StringInfoData sql; - PGresult *volatile res = NULL; + PGresult *res; /* Return the row-analysis function pointer */ *func = postgresAcquireSampleRowsFunc; @@ -4973,22 +4921,14 @@ postgresAnalyzeForeignTable(Relation relation, initStringInfo(&sql); deparseAnalyzeSizeSql(&sql, relation); - /* In what follows, do not risk leaking any PGresults. */ - PG_TRY(); - { res = pgfdw_exec_query(conn, sql.data, NULL); if (PQresultStatus(res) != PGRES_TUPLES_OK) - pgfdw_report_error(ERROR, res, conn, false, sql.data); + pgfdw_report_error(ERROR, res, conn, sql.data); if (PQntuples(res) != 1 || PQnfields(res) != 1) elog(ERROR, "unexpected result from deparseAnalyzeSizeSql query"); *totalpages = strtoul(PQgetvalue(res, 0, 0), NULL, 10); - } - PG_FINALLY(); - { PQclear(res); - } - PG_END_TRY(); ReleaseConnection(conn); @@ -5009,9 +4949,9 @@ postgresGetAnalyzeInfoForForeignTable(Relation relation, bool *can_tablesample) UserMapping *user; PGconn *conn; StringInfoData sql; - PGresult *volatile res = NULL; - volatile double reltuples = -1; - volatile char relkind = 0; + PGresult *res; + double reltuples; + char relkind; /* assume the remote relation does not support TABLESAMPLE */ *can_tablesample = false; @@ -5030,24 +4970,15 @@ postgresGetAnalyzeInfoForForeignTable(Relation relation, bool *can_tablesample) initStringInfo(&sql); deparseAnalyzeInfoSql(&sql, relation); - /* In what follows, do not risk leaking any PGresults. */ - PG_TRY(); - { res = pgfdw_exec_query(conn, sql.data, NULL); if (PQresultStatus(res) != PGRES_TUPLES_OK) - pgfdw_report_error(ERROR, res, conn, false, sql.data); + pgfdw_report_error(ERROR, res, conn, sql.data); if (PQntuples(res) != 1 || PQnfields(res) != 2) elog(ERROR, "unexpected result from deparseAnalyzeInfoSql query"); reltuples = strtod(PQgetvalue(res, 0, 0), NULL); relkind = *(PQgetvalue(res, 0, 1)); - } - PG_FINALLY(); - { - if (res) PQclear(res); - } - PG_END_TRY(); ReleaseConnection(conn); @@ -5090,7 +5021,9 @@ postgresAcquireSampleRowsFunc(Relation relation, int elevel, double reltuples; unsigned int cursor_number; StringInfoData sql; - PGresult *volatile res = NULL; + PGresult *res; + char fetch_sql[64]; + int fetch_size; ListCell *lc; /* Initialize workspace state */ @@ -5267,17 +5200,10 @@ postgresAcquireSampleRowsFunc(Relation relation, int elevel, deparseAnalyzeSql(&sql, relation, method, sample_frac, &astate.retrieved_attrs); - /* In what follows, do not risk leaking any PGresults. */ - PG_TRY(); - { - char fetch_sql[64]; - int fetch_size; - res = pgfdw_exec_query(conn, sql.data, NULL); if (PQresultStatus(res) != PGRES_COMMAND_OK) - pgfdw_report_error(ERROR, res, conn, false, sql.data); + pgfdw_report_error(ERROR, res, conn, sql.data); PQclear(res); - res = NULL; /* * Determine the fetch size. The default is arbitrary, but shouldn't @@ -5328,7 +5254,7 @@ postgresAcquireSampleRowsFunc(Relation relation, int elevel, res = pgfdw_exec_query(conn, fetch_sql, NULL); /* On error, report the original query, not the FETCH. */ if (PQresultStatus(res) != PGRES_TUPLES_OK) - pgfdw_report_error(ERROR, res, conn, false, sql.data); + pgfdw_report_error(ERROR, res, conn, sql.data); /* Process whatever we got. */ numrows = PQntuples(res); @@ -5336,7 +5262,6 @@ postgresAcquireSampleRowsFunc(Relation relation, int elevel, analyze_row_processor(res, i, &astate); PQclear(res); - res = NULL; /* Must be EOF if we didn't get all the rows requested. */ if (numrows < fetch_size) @@ -5345,13 +5270,6 @@ postgresAcquireSampleRowsFunc(Relation relation, int elevel, /* Close the cursor, just to be tidy. */ close_cursor(conn, cursor_number, NULL); - } - PG_CATCH(); - { - PQclear(res); - PG_RE_THROW(); - } - PG_END_TRY(); ReleaseConnection(conn); @@ -5463,7 +5381,7 @@ postgresImportForeignSchema(ImportForeignSchemaStmt *stmt, Oid serverOid) UserMapping *mapping; PGconn *conn; StringInfoData buf; - PGresult *volatile res = NULL; + PGresult *res; int numrows, i; ListCell *lc; @@ -5502,16 +5420,13 @@ postgresImportForeignSchema(ImportForeignSchemaStmt *stmt, Oid serverOid) /* Create workspace for strings */ initStringInfo(&buf); - /* In what follows, do not risk leaking any PGresults. */ - PG_TRY(); - { /* Check that the schema really exists */ appendStringInfoString(&buf, "SELECT 1 FROM pg_catalog.pg_namespace WHERE nspname = "); deparseStringLiteral(&buf, stmt->remote_schema); res = pgfdw_exec_query(conn, buf.data, NULL); if (PQresultStatus(res) != PGRES_TUPLES_OK) - pgfdw_report_error(ERROR, res, conn, false, buf.data); + pgfdw_report_error(ERROR, res, conn, buf.data); if (PQntuples(res) != 1) ereport(ERROR, @@ -5520,7 +5435,6 @@ postgresImportForeignSchema(ImportForeignSchemaStmt *stmt, Oid serverOid) stmt->remote_schema, server->servername))); PQclear(res); - res = NULL; resetStringInfo(&buf); /* @@ -5628,7 +5542,7 @@ postgresImportForeignSchema(ImportForeignSchemaStmt *stmt, Oid serverOid) /* Fetch the data */ res = pgfdw_exec_query(conn, buf.data, NULL); if (PQresultStatus(res) != PGRES_TUPLES_OK) - pgfdw_report_error(ERROR, res, conn, false, buf.data); + pgfdw_report_error(ERROR, res, conn, buf.data); /* Process results */ numrows = PQntuples(res); @@ -5733,12 +5647,7 @@ postgresImportForeignSchema(ImportForeignSchemaStmt *stmt, Oid serverOid) commands = lappend(commands, pstrdup(buf.data)); } - } - PG_FINALLY(); - { PQclear(res); - } - PG_END_TRY(); ReleaseConnection(conn); @@ -7406,7 +7315,7 @@ postgresForeignAsyncNotify(AsyncRequest *areq) /* On error, report the original query, not the FETCH. */ if (!PQconsumeInput(fsstate->conn)) - pgfdw_report_error(ERROR, NULL, fsstate->conn, false, fsstate->query); + pgfdw_report_error(ERROR, NULL, fsstate->conn, fsstate->query); fetch_more_data(node); @@ -7505,7 +7414,7 @@ fetch_more_data_begin(AsyncRequest *areq) fsstate->fetch_size, fsstate->cursor_number); if (!PQsendQuery(fsstate->conn, sql)) - pgfdw_report_error(ERROR, NULL, fsstate->conn, false, fsstate->query); + pgfdw_report_error(ERROR, NULL, fsstate->conn, fsstate->query); /* Remember that the request is in process */ fsstate->conn_state->pendingAreq = areq; diff --git a/contrib/postgres_fdw/postgres_fdw.h b/contrib/postgres_fdw/postgres_fdw.h index 9cb4ee84139ea..38e1a88594131 100644 --- a/contrib/postgres_fdw/postgres_fdw.h +++ b/contrib/postgres_fdw/postgres_fdw.h @@ -167,7 +167,7 @@ extern PGresult *pgfdw_get_result(PGconn *conn); extern PGresult *pgfdw_exec_query(PGconn *conn, const char *query, PgFdwConnState *state); extern void pgfdw_report_error(int elevel, PGresult *res, PGconn *conn, - bool clear, const char *sql); + const char *sql); /* in option.c */ extern int ExtractConnectionOptions(List *defelems, diff --git a/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c b/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c index 886d99951dddf..239641bfbb66a 100644 --- a/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c +++ b/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c @@ -421,31 +421,22 @@ libpqrcv_identify_system(WalReceiverConn *conn, TimeLineID *primary_tli) "IDENTIFY_SYSTEM", WAIT_EVENT_LIBPQWALRECEIVER_RECEIVE); if (PQresultStatus(res) != PGRES_TUPLES_OK) - { - PQclear(res); ereport(ERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION), errmsg("could not receive database system identifier and timeline ID from " "the primary server: %s", pchomp(PQerrorMessage(conn->streamConn))))); - } /* * IDENTIFY_SYSTEM returns 3 columns in 9.3 and earlier, and 4 columns in * 9.4 and onwards. */ if (PQnfields(res) < 3 || PQntuples(res) != 1) - { - int ntuples = PQntuples(res); - int nfields = PQnfields(res); - - PQclear(res); ereport(ERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION), errmsg("invalid response from primary server"), errdetail("Could not identify system: got %d rows and %d fields, expected %d rows and %d or more fields.", - ntuples, nfields, 1, 3))); - } + PQntuples(res), PQnfields(res), 1, 3))); primary_sysid = pstrdup(PQgetvalue(res, 0, 0)); *primary_tli = pg_strtoint32(PQgetvalue(res, 0, 1)); PQclear(res); @@ -607,13 +598,10 @@ libpqrcv_startstreaming(WalReceiverConn *conn, return false; } else if (PQresultStatus(res) != PGRES_COPY_BOTH) - { - PQclear(res); ereport(ERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION), errmsg("could not start WAL streaming: %s", pchomp(PQerrorMessage(conn->streamConn))))); - } PQclear(res); return true; } @@ -721,26 +709,17 @@ libpqrcv_readtimelinehistoryfile(WalReceiverConn *conn, cmd, WAIT_EVENT_LIBPQWALRECEIVER_RECEIVE); if (PQresultStatus(res) != PGRES_TUPLES_OK) - { - PQclear(res); ereport(ERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION), errmsg("could not receive timeline history file from " "the primary server: %s", pchomp(PQerrorMessage(conn->streamConn))))); - } if (PQnfields(res) != 2 || PQntuples(res) != 1) - { - int ntuples = PQntuples(res); - int nfields = PQnfields(res); - - PQclear(res); ereport(ERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION), errmsg("invalid response from primary server"), errdetail("Expected 1 tuple with 2 fields, got %d tuples with %d fields.", - ntuples, nfields))); - } + PQntuples(res), PQnfields(res)))); *filename = pstrdup(PQgetvalue(res, 0, 0)); *len = PQgetlength(res, 0, 1); @@ -844,13 +823,10 @@ libpqrcv_receive(WalReceiverConn *conn, char **buffer, return -1; } else - { - PQclear(res); ereport(ERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION), errmsg("could not receive data from WAL stream: %s", pchomp(PQerrorMessage(conn->streamConn))))); - } } if (rawlen < -1) ereport(ERROR, @@ -974,13 +950,10 @@ libpqrcv_create_slot(WalReceiverConn *conn, const char *slotname, pfree(cmd.data); if (PQresultStatus(res) != PGRES_TUPLES_OK) - { - PQclear(res); ereport(ERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION), errmsg("could not create replication slot \"%s\": %s", slotname, pchomp(PQerrorMessage(conn->streamConn))))); - } if (lsn) *lsn = DatumGetLSN(DirectFunctionCall1Coll(pg_lsn_in, InvalidOid, diff --git a/src/include/libpq/libpq-be-fe-helpers.h b/src/include/libpq/libpq-be-fe-helpers.h index 8d12a331497f8..4ba635aa96f92 100644 --- a/src/include/libpq/libpq-be-fe-helpers.h +++ b/src/include/libpq/libpq-be-fe-helpers.h @@ -279,11 +279,8 @@ libpqsrv_exec_params(PGconn *conn, static inline PGresult * libpqsrv_get_result_last(PGconn *conn, uint32 wait_event_info) { - PGresult *volatile lastResult = NULL; + PGresult *lastResult = NULL; - /* In what follows, do not leak any PGresults on an error. */ - PG_TRY(); - { for (;;) { /* Wait for, and collect, the next PGresult. */ @@ -306,14 +303,6 @@ libpqsrv_get_result_last(PGconn *conn, uint32 wait_event_info) PQstatus(conn) == CONNECTION_BAD) break; } - } - PG_CATCH(); - { - PQclear(lastResult); - PG_RE_THROW(); - } - PG_END_TRY(); - return lastResult; } From 73873805fb3627cb23937c750fa83ffd8f16fc6c Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Fri, 25 Jul 2025 16:36:44 -0400 Subject: [PATCH 374/602] Run pgindent on the changes of the previous patch. This step can be checked mechanically. Author: Tom Lane Reviewed-by: Matheus Alcantara Discussion: https://postgr.es/m/2976982.1748049023@sss.pgh.pa.us --- contrib/dblink/dblink.c | 180 +++--- contrib/postgres_fdw/connection.c | 188 +++---- contrib/postgres_fdw/postgres_fdw.c | 703 ++++++++++++------------ src/include/libpq/libpq-be-fe-helpers.h | 38 +- 4 files changed, 553 insertions(+), 556 deletions(-) diff --git a/contrib/dblink/dblink.c b/contrib/dblink/dblink.c index fc423c0544d3f..f98805fb5f735 100644 --- a/contrib/dblink/dblink.c +++ b/contrib/dblink/dblink.c @@ -870,115 +870,115 @@ materializeResult(FunctionCallInfo fcinfo, PGconn *conn, PGresult *res) /* prepTuplestoreResult must have been called previously */ Assert(rsinfo->returnMode == SFRM_Materialize); - if (PQresultStatus(res) == PGRES_COMMAND_OK) - { - is_sql_cmd = true; - - /* - * need a tuple descriptor representing one TEXT column to return - * the command status string as our result tuple - */ - tupdesc = CreateTemplateTupleDesc(1); - TupleDescInitEntry(tupdesc, (AttrNumber) 1, "status", - TEXTOID, -1, 0); - ntuples = 1; - nfields = 1; - } - else - { - Assert(PQresultStatus(res) == PGRES_TUPLES_OK); + if (PQresultStatus(res) == PGRES_COMMAND_OK) + { + is_sql_cmd = true; - is_sql_cmd = false; + /* + * need a tuple descriptor representing one TEXT column to return the + * command status string as our result tuple + */ + tupdesc = CreateTemplateTupleDesc(1); + TupleDescInitEntry(tupdesc, (AttrNumber) 1, "status", + TEXTOID, -1, 0); + ntuples = 1; + nfields = 1; + } + else + { + Assert(PQresultStatus(res) == PGRES_TUPLES_OK); - /* get a tuple descriptor for our result type */ - switch (get_call_result_type(fcinfo, NULL, &tupdesc)) - { - case TYPEFUNC_COMPOSITE: - /* success */ - break; - case TYPEFUNC_RECORD: - /* failed to determine actual type of RECORD */ - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("function returning record called in context " - "that cannot accept type record"))); - break; - default: - /* result type isn't composite */ - elog(ERROR, "return type must be a row type"); - break; - } + is_sql_cmd = false; - /* make sure we have a persistent copy of the tupdesc */ - tupdesc = CreateTupleDescCopy(tupdesc); - ntuples = PQntuples(res); - nfields = PQnfields(res); + /* get a tuple descriptor for our result type */ + switch (get_call_result_type(fcinfo, NULL, &tupdesc)) + { + case TYPEFUNC_COMPOSITE: + /* success */ + break; + case TYPEFUNC_RECORD: + /* failed to determine actual type of RECORD */ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("function returning record called in context " + "that cannot accept type record"))); + break; + default: + /* result type isn't composite */ + elog(ERROR, "return type must be a row type"); + break; } - /* - * check result and tuple descriptor have the same number of columns - */ - if (nfields != tupdesc->natts) - ereport(ERROR, - (errcode(ERRCODE_DATATYPE_MISMATCH), - errmsg("remote query result rowtype does not match " - "the specified FROM clause rowtype"))); + /* make sure we have a persistent copy of the tupdesc */ + tupdesc = CreateTupleDescCopy(tupdesc); + ntuples = PQntuples(res); + nfields = PQnfields(res); + } - if (ntuples > 0) - { - AttInMetadata *attinmeta; - int nestlevel = -1; - Tuplestorestate *tupstore; - MemoryContext oldcontext; - int row; - char **values; + /* + * check result and tuple descriptor have the same number of columns + */ + if (nfields != tupdesc->natts) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("remote query result rowtype does not match " + "the specified FROM clause rowtype"))); - attinmeta = TupleDescGetAttInMetadata(tupdesc); + if (ntuples > 0) + { + AttInMetadata *attinmeta; + int nestlevel = -1; + Tuplestorestate *tupstore; + MemoryContext oldcontext; + int row; + char **values; - /* Set GUCs to ensure we read GUC-sensitive data types correctly */ - if (!is_sql_cmd) - nestlevel = applyRemoteGucs(conn); + attinmeta = TupleDescGetAttInMetadata(tupdesc); - oldcontext = MemoryContextSwitchTo(rsinfo->econtext->ecxt_per_query_memory); - tupstore = tuplestore_begin_heap(true, false, work_mem); - rsinfo->setResult = tupstore; - rsinfo->setDesc = tupdesc; - MemoryContextSwitchTo(oldcontext); + /* Set GUCs to ensure we read GUC-sensitive data types correctly */ + if (!is_sql_cmd) + nestlevel = applyRemoteGucs(conn); - values = palloc_array(char *, nfields); + oldcontext = MemoryContextSwitchTo(rsinfo->econtext->ecxt_per_query_memory); + tupstore = tuplestore_begin_heap(true, false, work_mem); + rsinfo->setResult = tupstore; + rsinfo->setDesc = tupdesc; + MemoryContextSwitchTo(oldcontext); - /* put all tuples into the tuplestore */ - for (row = 0; row < ntuples; row++) + values = palloc_array(char *, nfields); + + /* put all tuples into the tuplestore */ + for (row = 0; row < ntuples; row++) + { + HeapTuple tuple; + + if (!is_sql_cmd) { - HeapTuple tuple; + int i; - if (!is_sql_cmd) - { - int i; - - for (i = 0; i < nfields; i++) - { - if (PQgetisnull(res, row, i)) - values[i] = NULL; - else - values[i] = PQgetvalue(res, row, i); - } - } - else + for (i = 0; i < nfields; i++) { - values[0] = PQcmdStatus(res); + if (PQgetisnull(res, row, i)) + values[i] = NULL; + else + values[i] = PQgetvalue(res, row, i); } - - /* build the tuple and put it into the tuplestore. */ - tuple = BuildTupleFromCStrings(attinmeta, values); - tuplestore_puttuple(tupstore, tuple); + } + else + { + values[0] = PQcmdStatus(res); } - /* clean up GUC settings, if we changed any */ - restoreLocalGucs(nestlevel); + /* build the tuple and put it into the tuplestore. */ + tuple = BuildTupleFromCStrings(attinmeta, values); + tuplestore_puttuple(tupstore, tuple); } - PQclear(res); + /* clean up GUC settings, if we changed any */ + restoreLocalGucs(nestlevel); + } + + PQclear(res); } /* diff --git a/contrib/postgres_fdw/connection.c b/contrib/postgres_fdw/connection.c index c654c1a1ff0a7..a33843fcf8531 100644 --- a/contrib/postgres_fdw/connection.c +++ b/contrib/postgres_fdw/connection.c @@ -982,40 +982,40 @@ void pgfdw_report_error(int elevel, PGresult *res, PGconn *conn, const char *sql) { - char *diag_sqlstate = PQresultErrorField(res, PG_DIAG_SQLSTATE); - char *message_primary = PQresultErrorField(res, PG_DIAG_MESSAGE_PRIMARY); - char *message_detail = PQresultErrorField(res, PG_DIAG_MESSAGE_DETAIL); - char *message_hint = PQresultErrorField(res, PG_DIAG_MESSAGE_HINT); - char *message_context = PQresultErrorField(res, PG_DIAG_CONTEXT); - int sqlstate; - - if (diag_sqlstate) - sqlstate = MAKE_SQLSTATE(diag_sqlstate[0], - diag_sqlstate[1], - diag_sqlstate[2], - diag_sqlstate[3], - diag_sqlstate[4]); - else - sqlstate = ERRCODE_CONNECTION_FAILURE; + char *diag_sqlstate = PQresultErrorField(res, PG_DIAG_SQLSTATE); + char *message_primary = PQresultErrorField(res, PG_DIAG_MESSAGE_PRIMARY); + char *message_detail = PQresultErrorField(res, PG_DIAG_MESSAGE_DETAIL); + char *message_hint = PQresultErrorField(res, PG_DIAG_MESSAGE_HINT); + char *message_context = PQresultErrorField(res, PG_DIAG_CONTEXT); + int sqlstate; + + if (diag_sqlstate) + sqlstate = MAKE_SQLSTATE(diag_sqlstate[0], + diag_sqlstate[1], + diag_sqlstate[2], + diag_sqlstate[3], + diag_sqlstate[4]); + else + sqlstate = ERRCODE_CONNECTION_FAILURE; - /* - * If we don't get a message from the PGresult, try the PGconn. This - * is needed because for connection-level failures, PQgetResult may - * just return NULL, not a PGresult at all. - */ - if (message_primary == NULL) - message_primary = pchomp(PQerrorMessage(conn)); - - ereport(elevel, - (errcode(sqlstate), - (message_primary != NULL && message_primary[0] != '\0') ? - errmsg_internal("%s", message_primary) : - errmsg("could not obtain message string for remote error"), - message_detail ? errdetail_internal("%s", message_detail) : 0, - message_hint ? errhint("%s", message_hint) : 0, - message_context ? errcontext("%s", message_context) : 0, - sql ? errcontext("remote SQL command: %s", sql) : 0)); - PQclear(res); + /* + * If we don't get a message from the PGresult, try the PGconn. This is + * needed because for connection-level failures, PQgetResult may just + * return NULL, not a PGresult at all. + */ + if (message_primary == NULL) + message_primary = pchomp(PQerrorMessage(conn)); + + ereport(elevel, + (errcode(sqlstate), + (message_primary != NULL && message_primary[0] != '\0') ? + errmsg_internal("%s", message_primary) : + errmsg("could not obtain message string for remote error"), + message_detail ? errdetail_internal("%s", message_detail) : 0, + message_hint ? errhint("%s", message_hint) : 0, + message_context ? errcontext("%s", message_context) : 0, + sql ? errcontext("remote SQL command: %s", sql) : 0)); + PQclear(res); } /* @@ -1617,83 +1617,83 @@ pgfdw_get_cleanup_result(PGconn *conn, TimestampTz endtime, *result = NULL; *timed_out = false; - for (;;) + for (;;) + { + PGresult *res; + + while (PQisBusy(conn)) { - PGresult *res; + int wc; + TimestampTz now = GetCurrentTimestamp(); + long cur_timeout; - while (PQisBusy(conn)) + /* If timeout has expired, give up. */ + if (now >= endtime) { - int wc; - TimestampTz now = GetCurrentTimestamp(); - long cur_timeout; - - /* If timeout has expired, give up. */ - if (now >= endtime) - { - *timed_out = true; - failed = true; - goto exit; - } + *timed_out = true; + failed = true; + goto exit; + } - /* If we need to re-issue the cancel request, do that. */ - if (now >= retrycanceltime) - { - /* We ignore failure to issue the repeated request. */ - (void) libpqsrv_cancel(conn, endtime); + /* If we need to re-issue the cancel request, do that. */ + if (now >= retrycanceltime) + { + /* We ignore failure to issue the repeated request. */ + (void) libpqsrv_cancel(conn, endtime); - /* Recompute "now" in case that took measurable time. */ - now = GetCurrentTimestamp(); + /* Recompute "now" in case that took measurable time. */ + now = GetCurrentTimestamp(); - /* Adjust re-cancel timeout in increasing steps. */ - retrycanceltime = TimestampTzPlusMilliseconds(now, - canceldelta); - canceldelta += canceldelta; - } + /* Adjust re-cancel timeout in increasing steps. */ + retrycanceltime = TimestampTzPlusMilliseconds(now, + canceldelta); + canceldelta += canceldelta; + } - /* If timeout has expired, give up, else get sleep time. */ - cur_timeout = TimestampDifferenceMilliseconds(now, - Min(endtime, - retrycanceltime)); - if (cur_timeout <= 0) - { - *timed_out = true; - failed = true; - goto exit; - } + /* If timeout has expired, give up, else get sleep time. */ + cur_timeout = TimestampDifferenceMilliseconds(now, + Min(endtime, + retrycanceltime)); + if (cur_timeout <= 0) + { + *timed_out = true; + failed = true; + goto exit; + } - /* first time, allocate or get the custom wait event */ - if (pgfdw_we_cleanup_result == 0) - pgfdw_we_cleanup_result = WaitEventExtensionNew("PostgresFdwCleanupResult"); + /* first time, allocate or get the custom wait event */ + if (pgfdw_we_cleanup_result == 0) + pgfdw_we_cleanup_result = WaitEventExtensionNew("PostgresFdwCleanupResult"); - /* Sleep until there's something to do */ - wc = WaitLatchOrSocket(MyLatch, - WL_LATCH_SET | WL_SOCKET_READABLE | - WL_TIMEOUT | WL_EXIT_ON_PM_DEATH, - PQsocket(conn), - cur_timeout, pgfdw_we_cleanup_result); - ResetLatch(MyLatch); + /* Sleep until there's something to do */ + wc = WaitLatchOrSocket(MyLatch, + WL_LATCH_SET | WL_SOCKET_READABLE | + WL_TIMEOUT | WL_EXIT_ON_PM_DEATH, + PQsocket(conn), + cur_timeout, pgfdw_we_cleanup_result); + ResetLatch(MyLatch); - CHECK_FOR_INTERRUPTS(); + CHECK_FOR_INTERRUPTS(); - /* Data available in socket? */ - if (wc & WL_SOCKET_READABLE) + /* Data available in socket? */ + if (wc & WL_SOCKET_READABLE) + { + if (!PQconsumeInput(conn)) { - if (!PQconsumeInput(conn)) - { - /* connection trouble */ - failed = true; - goto exit; - } + /* connection trouble */ + failed = true; + goto exit; } } + } - res = PQgetResult(conn); - if (res == NULL) - break; /* query is complete */ + res = PQgetResult(conn); + if (res == NULL) + break; /* query is complete */ - PQclear(last_res); - last_res = res; - } + PQclear(last_res); + last_res = res; + } exit: if (failed) PQclear(last_res); diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c index f2dee7b1c69ab..25b287be069fa 100644 --- a/contrib/postgres_fdw/postgres_fdw.c +++ b/contrib/postgres_fdw/postgres_fdw.c @@ -3605,31 +3605,31 @@ get_remote_estimate(const char *sql, PGconn *conn, Cost *startup_cost, Cost *total_cost) { PGresult *res; - char *line; - char *p; - int n; + char *line; + char *p; + int n; - /* - * Execute EXPLAIN remotely. - */ - res = pgfdw_exec_query(conn, sql, NULL); - if (PQresultStatus(res) != PGRES_TUPLES_OK) - pgfdw_report_error(ERROR, res, conn, sql); + /* + * Execute EXPLAIN remotely. + */ + res = pgfdw_exec_query(conn, sql, NULL); + if (PQresultStatus(res) != PGRES_TUPLES_OK) + pgfdw_report_error(ERROR, res, conn, sql); - /* - * Extract cost numbers for topmost plan node. Note we search for a - * left paren from the end of the line to avoid being confused by - * other uses of parentheses. - */ - line = PQgetvalue(res, 0, 0); - p = strrchr(line, '('); - if (p == NULL) - elog(ERROR, "could not interpret EXPLAIN output: \"%s\"", line); - n = sscanf(p, "(cost=%lf..%lf rows=%lf width=%d)", - startup_cost, total_cost, rows, width); - if (n != 4) - elog(ERROR, "could not interpret EXPLAIN output: \"%s\"", line); - PQclear(res); + /* + * Extract cost numbers for topmost plan node. Note we search for a left + * paren from the end of the line to avoid being confused by other uses of + * parentheses. + */ + line = PQgetvalue(res, 0, 0); + p = strrchr(line, '('); + if (p == NULL) + elog(ERROR, "could not interpret EXPLAIN output: \"%s\"", line); + n = sscanf(p, "(cost=%lf..%lf rows=%lf width=%d)", + startup_cost, total_cost, rows, width); + if (n != 4) + elog(ERROR, "could not interpret EXPLAIN output: \"%s\"", line); + PQclear(res); } /* @@ -3812,63 +3812,63 @@ fetch_more_data(ForeignScanState *node) MemoryContextReset(fsstate->batch_cxt); oldcontext = MemoryContextSwitchTo(fsstate->batch_cxt); - if (fsstate->async_capable) - { - Assert(fsstate->conn_state->pendingAreq); + if (fsstate->async_capable) + { + Assert(fsstate->conn_state->pendingAreq); - /* - * The query was already sent by an earlier call to - * fetch_more_data_begin. So now we just fetch the result. - */ - res = pgfdw_get_result(conn); - /* On error, report the original query, not the FETCH. */ - if (PQresultStatus(res) != PGRES_TUPLES_OK) - pgfdw_report_error(ERROR, res, conn, fsstate->query); + /* + * The query was already sent by an earlier call to + * fetch_more_data_begin. So now we just fetch the result. + */ + res = pgfdw_get_result(conn); + /* On error, report the original query, not the FETCH. */ + if (PQresultStatus(res) != PGRES_TUPLES_OK) + pgfdw_report_error(ERROR, res, conn, fsstate->query); - /* Reset per-connection state */ - fsstate->conn_state->pendingAreq = NULL; - } - else - { - char sql[64]; + /* Reset per-connection state */ + fsstate->conn_state->pendingAreq = NULL; + } + else + { + char sql[64]; - /* This is a regular synchronous fetch. */ - snprintf(sql, sizeof(sql), "FETCH %d FROM c%u", - fsstate->fetch_size, fsstate->cursor_number); + /* This is a regular synchronous fetch. */ + snprintf(sql, sizeof(sql), "FETCH %d FROM c%u", + fsstate->fetch_size, fsstate->cursor_number); - res = pgfdw_exec_query(conn, sql, fsstate->conn_state); - /* On error, report the original query, not the FETCH. */ - if (PQresultStatus(res) != PGRES_TUPLES_OK) - pgfdw_report_error(ERROR, res, conn, fsstate->query); - } + res = pgfdw_exec_query(conn, sql, fsstate->conn_state); + /* On error, report the original query, not the FETCH. */ + if (PQresultStatus(res) != PGRES_TUPLES_OK) + pgfdw_report_error(ERROR, res, conn, fsstate->query); + } - /* Convert the data into HeapTuples */ - numrows = PQntuples(res); - fsstate->tuples = (HeapTuple *) palloc0(numrows * sizeof(HeapTuple)); - fsstate->num_tuples = numrows; - fsstate->next_tuple = 0; + /* Convert the data into HeapTuples */ + numrows = PQntuples(res); + fsstate->tuples = (HeapTuple *) palloc0(numrows * sizeof(HeapTuple)); + fsstate->num_tuples = numrows; + fsstate->next_tuple = 0; - for (i = 0; i < numrows; i++) - { - Assert(IsA(node->ss.ps.plan, ForeignScan)); - - fsstate->tuples[i] = - make_tuple_from_result_row(res, i, - fsstate->rel, - fsstate->attinmeta, - fsstate->retrieved_attrs, - node, - fsstate->temp_cxt); - } + for (i = 0; i < numrows; i++) + { + Assert(IsA(node->ss.ps.plan, ForeignScan)); + + fsstate->tuples[i] = + make_tuple_from_result_row(res, i, + fsstate->rel, + fsstate->attinmeta, + fsstate->retrieved_attrs, + node, + fsstate->temp_cxt); + } - /* Update fetch_ct_2 */ - if (fsstate->fetch_ct_2 < 2) - fsstate->fetch_ct_2++; + /* Update fetch_ct_2 */ + if (fsstate->fetch_ct_2 < 2) + fsstate->fetch_ct_2++; - /* Must be EOF if we didn't get as many tuples as we asked for. */ - fsstate->eof_reached = (numrows < fsstate->fetch_size); + /* Must be EOF if we didn't get as many tuples as we asked for. */ + fsstate->eof_reached = (numrows < fsstate->fetch_size); - PQclear(res); + PQclear(res); MemoryContextSwitchTo(oldcontext); } @@ -4322,20 +4322,20 @@ static void store_returning_result(PgFdwModifyState *fmstate, TupleTableSlot *slot, PGresult *res) { - HeapTuple newtup; + HeapTuple newtup; - newtup = make_tuple_from_result_row(res, 0, - fmstate->rel, - fmstate->attinmeta, - fmstate->retrieved_attrs, - NULL, - fmstate->temp_cxt); + newtup = make_tuple_from_result_row(res, 0, + fmstate->rel, + fmstate->attinmeta, + fmstate->retrieved_attrs, + NULL, + fmstate->temp_cxt); - /* - * The returning slot will not necessarily be suitable to store - * heaptuples directly, so allow for conversion. - */ - ExecForceStoreHeapTuple(newtup, slot, true); + /* + * The returning slot will not necessarily be suitable to store heaptuples + * directly, so allow for conversion. + */ + ExecForceStoreHeapTuple(newtup, slot, true); } /* @@ -4921,14 +4921,14 @@ postgresAnalyzeForeignTable(Relation relation, initStringInfo(&sql); deparseAnalyzeSizeSql(&sql, relation); - res = pgfdw_exec_query(conn, sql.data, NULL); - if (PQresultStatus(res) != PGRES_TUPLES_OK) - pgfdw_report_error(ERROR, res, conn, sql.data); + res = pgfdw_exec_query(conn, sql.data, NULL); + if (PQresultStatus(res) != PGRES_TUPLES_OK) + pgfdw_report_error(ERROR, res, conn, sql.data); - if (PQntuples(res) != 1 || PQnfields(res) != 1) - elog(ERROR, "unexpected result from deparseAnalyzeSizeSql query"); - *totalpages = strtoul(PQgetvalue(res, 0, 0), NULL, 10); - PQclear(res); + if (PQntuples(res) != 1 || PQnfields(res) != 1) + elog(ERROR, "unexpected result from deparseAnalyzeSizeSql query"); + *totalpages = strtoul(PQgetvalue(res, 0, 0), NULL, 10); + PQclear(res); ReleaseConnection(conn); @@ -4970,15 +4970,15 @@ postgresGetAnalyzeInfoForForeignTable(Relation relation, bool *can_tablesample) initStringInfo(&sql); deparseAnalyzeInfoSql(&sql, relation); - res = pgfdw_exec_query(conn, sql.data, NULL); - if (PQresultStatus(res) != PGRES_TUPLES_OK) - pgfdw_report_error(ERROR, res, conn, sql.data); + res = pgfdw_exec_query(conn, sql.data, NULL); + if (PQresultStatus(res) != PGRES_TUPLES_OK) + pgfdw_report_error(ERROR, res, conn, sql.data); - if (PQntuples(res) != 1 || PQnfields(res) != 2) - elog(ERROR, "unexpected result from deparseAnalyzeInfoSql query"); - reltuples = strtod(PQgetvalue(res, 0, 0), NULL); - relkind = *(PQgetvalue(res, 0, 1)); - PQclear(res); + if (PQntuples(res) != 1 || PQnfields(res) != 2) + elog(ERROR, "unexpected result from deparseAnalyzeInfoSql query"); + reltuples = strtod(PQgetvalue(res, 0, 0), NULL); + relkind = *(PQgetvalue(res, 0, 1)); + PQclear(res); ReleaseConnection(conn); @@ -5200,76 +5200,76 @@ postgresAcquireSampleRowsFunc(Relation relation, int elevel, deparseAnalyzeSql(&sql, relation, method, sample_frac, &astate.retrieved_attrs); - res = pgfdw_exec_query(conn, sql.data, NULL); - if (PQresultStatus(res) != PGRES_COMMAND_OK) - pgfdw_report_error(ERROR, res, conn, sql.data); - PQclear(res); + res = pgfdw_exec_query(conn, sql.data, NULL); + if (PQresultStatus(res) != PGRES_COMMAND_OK) + pgfdw_report_error(ERROR, res, conn, sql.data); + PQclear(res); - /* - * Determine the fetch size. The default is arbitrary, but shouldn't - * be enormous. - */ - fetch_size = 100; - foreach(lc, server->options) - { - DefElem *def = (DefElem *) lfirst(lc); + /* + * Determine the fetch size. The default is arbitrary, but shouldn't be + * enormous. + */ + fetch_size = 100; + foreach(lc, server->options) + { + DefElem *def = (DefElem *) lfirst(lc); - if (strcmp(def->defname, "fetch_size") == 0) - { - (void) parse_int(defGetString(def), &fetch_size, 0, NULL); - break; - } - } - foreach(lc, table->options) + if (strcmp(def->defname, "fetch_size") == 0) { - DefElem *def = (DefElem *) lfirst(lc); + (void) parse_int(defGetString(def), &fetch_size, 0, NULL); + break; + } + } + foreach(lc, table->options) + { + DefElem *def = (DefElem *) lfirst(lc); - if (strcmp(def->defname, "fetch_size") == 0) - { - (void) parse_int(defGetString(def), &fetch_size, 0, NULL); - break; - } + if (strcmp(def->defname, "fetch_size") == 0) + { + (void) parse_int(defGetString(def), &fetch_size, 0, NULL); + break; } + } - /* Construct command to fetch rows from remote. */ - snprintf(fetch_sql, sizeof(fetch_sql), "FETCH %d FROM c%u", - fetch_size, cursor_number); + /* Construct command to fetch rows from remote. */ + snprintf(fetch_sql, sizeof(fetch_sql), "FETCH %d FROM c%u", + fetch_size, cursor_number); - /* Retrieve and process rows a batch at a time. */ - for (;;) - { - int numrows; - int i; + /* Retrieve and process rows a batch at a time. */ + for (;;) + { + int numrows; + int i; - /* Allow users to cancel long query */ - CHECK_FOR_INTERRUPTS(); + /* Allow users to cancel long query */ + CHECK_FOR_INTERRUPTS(); - /* - * XXX possible future improvement: if rowstoskip is large, we - * could issue a MOVE rather than physically fetching the rows, - * then just adjust rowstoskip and samplerows appropriately. - */ + /* + * XXX possible future improvement: if rowstoskip is large, we could + * issue a MOVE rather than physically fetching the rows, then just + * adjust rowstoskip and samplerows appropriately. + */ - /* Fetch some rows */ - res = pgfdw_exec_query(conn, fetch_sql, NULL); - /* On error, report the original query, not the FETCH. */ - if (PQresultStatus(res) != PGRES_TUPLES_OK) - pgfdw_report_error(ERROR, res, conn, sql.data); + /* Fetch some rows */ + res = pgfdw_exec_query(conn, fetch_sql, NULL); + /* On error, report the original query, not the FETCH. */ + if (PQresultStatus(res) != PGRES_TUPLES_OK) + pgfdw_report_error(ERROR, res, conn, sql.data); - /* Process whatever we got. */ - numrows = PQntuples(res); - for (i = 0; i < numrows; i++) - analyze_row_processor(res, i, &astate); + /* Process whatever we got. */ + numrows = PQntuples(res); + for (i = 0; i < numrows; i++) + analyze_row_processor(res, i, &astate); - PQclear(res); + PQclear(res); - /* Must be EOF if we didn't get all the rows requested. */ - if (numrows < fetch_size) - break; - } + /* Must be EOF if we didn't get all the rows requested. */ + if (numrows < fetch_size) + break; + } - /* Close the cursor, just to be tidy. */ - close_cursor(conn, cursor_number, NULL); + /* Close the cursor, just to be tidy. */ + close_cursor(conn, cursor_number, NULL); ReleaseConnection(conn); @@ -5420,234 +5420,231 @@ postgresImportForeignSchema(ImportForeignSchemaStmt *stmt, Oid serverOid) /* Create workspace for strings */ initStringInfo(&buf); - /* Check that the schema really exists */ - appendStringInfoString(&buf, "SELECT 1 FROM pg_catalog.pg_namespace WHERE nspname = "); - deparseStringLiteral(&buf, stmt->remote_schema); + /* Check that the schema really exists */ + appendStringInfoString(&buf, "SELECT 1 FROM pg_catalog.pg_namespace WHERE nspname = "); + deparseStringLiteral(&buf, stmt->remote_schema); - res = pgfdw_exec_query(conn, buf.data, NULL); - if (PQresultStatus(res) != PGRES_TUPLES_OK) - pgfdw_report_error(ERROR, res, conn, buf.data); + res = pgfdw_exec_query(conn, buf.data, NULL); + if (PQresultStatus(res) != PGRES_TUPLES_OK) + pgfdw_report_error(ERROR, res, conn, buf.data); - if (PQntuples(res) != 1) - ereport(ERROR, - (errcode(ERRCODE_FDW_SCHEMA_NOT_FOUND), - errmsg("schema \"%s\" is not present on foreign server \"%s\"", - stmt->remote_schema, server->servername))); + if (PQntuples(res) != 1) + ereport(ERROR, + (errcode(ERRCODE_FDW_SCHEMA_NOT_FOUND), + errmsg("schema \"%s\" is not present on foreign server \"%s\"", + stmt->remote_schema, server->servername))); - PQclear(res); - resetStringInfo(&buf); + PQclear(res); + resetStringInfo(&buf); - /* - * Fetch all table data from this schema, possibly restricted by - * EXCEPT or LIMIT TO. (We don't actually need to pay any attention - * to EXCEPT/LIMIT TO here, because the core code will filter the - * statements we return according to those lists anyway. But it - * should save a few cycles to not process excluded tables in the - * first place.) - * - * Import table data for partitions only when they are explicitly - * specified in LIMIT TO clause. Otherwise ignore them and only - * include the definitions of the root partitioned tables to allow - * access to the complete remote data set locally in the schema - * imported. - * - * Note: because we run the connection with search_path restricted to - * pg_catalog, the format_type() and pg_get_expr() outputs will always - * include a schema name for types/functions in other schemas, which - * is what we want. - */ + /* + * Fetch all table data from this schema, possibly restricted by EXCEPT or + * LIMIT TO. (We don't actually need to pay any attention to EXCEPT/LIMIT + * TO here, because the core code will filter the statements we return + * according to those lists anyway. But it should save a few cycles to + * not process excluded tables in the first place.) + * + * Import table data for partitions only when they are explicitly + * specified in LIMIT TO clause. Otherwise ignore them and only include + * the definitions of the root partitioned tables to allow access to the + * complete remote data set locally in the schema imported. + * + * Note: because we run the connection with search_path restricted to + * pg_catalog, the format_type() and pg_get_expr() outputs will always + * include a schema name for types/functions in other schemas, which is + * what we want. + */ + appendStringInfoString(&buf, + "SELECT relname, " + " attname, " + " format_type(atttypid, atttypmod), " + " attnotnull, " + " pg_get_expr(adbin, adrelid), "); + + /* Generated columns are supported since Postgres 12 */ + if (PQserverVersion(conn) >= 120000) appendStringInfoString(&buf, - "SELECT relname, " - " attname, " - " format_type(atttypid, atttypmod), " - " attnotnull, " - " pg_get_expr(adbin, adrelid), "); - - /* Generated columns are supported since Postgres 12 */ - if (PQserverVersion(conn) >= 120000) - appendStringInfoString(&buf, - " attgenerated, "); - else - appendStringInfoString(&buf, - " NULL, "); - - if (import_collate) - appendStringInfoString(&buf, - " collname, " - " collnsp.nspname "); - else - appendStringInfoString(&buf, - " NULL, NULL "); - + " attgenerated, "); + else appendStringInfoString(&buf, - "FROM pg_class c " - " JOIN pg_namespace n ON " - " relnamespace = n.oid " - " LEFT JOIN pg_attribute a ON " - " attrelid = c.oid AND attnum > 0 " - " AND NOT attisdropped " - " LEFT JOIN pg_attrdef ad ON " - " adrelid = c.oid AND adnum = attnum "); - - if (import_collate) - appendStringInfoString(&buf, - " LEFT JOIN pg_collation coll ON " - " coll.oid = attcollation " - " LEFT JOIN pg_namespace collnsp ON " - " collnsp.oid = collnamespace "); + " NULL, "); + if (import_collate) appendStringInfoString(&buf, - "WHERE c.relkind IN (" - CppAsString2(RELKIND_RELATION) "," - CppAsString2(RELKIND_VIEW) "," - CppAsString2(RELKIND_FOREIGN_TABLE) "," - CppAsString2(RELKIND_MATVIEW) "," - CppAsString2(RELKIND_PARTITIONED_TABLE) ") " - " AND n.nspname = "); - deparseStringLiteral(&buf, stmt->remote_schema); + " collname, " + " collnsp.nspname "); + else + appendStringInfoString(&buf, + " NULL, NULL "); + + appendStringInfoString(&buf, + "FROM pg_class c " + " JOIN pg_namespace n ON " + " relnamespace = n.oid " + " LEFT JOIN pg_attribute a ON " + " attrelid = c.oid AND attnum > 0 " + " AND NOT attisdropped " + " LEFT JOIN pg_attrdef ad ON " + " adrelid = c.oid AND adnum = attnum "); + + if (import_collate) + appendStringInfoString(&buf, + " LEFT JOIN pg_collation coll ON " + " coll.oid = attcollation " + " LEFT JOIN pg_namespace collnsp ON " + " collnsp.oid = collnamespace "); + + appendStringInfoString(&buf, + "WHERE c.relkind IN (" + CppAsString2(RELKIND_RELATION) "," + CppAsString2(RELKIND_VIEW) "," + CppAsString2(RELKIND_FOREIGN_TABLE) "," + CppAsString2(RELKIND_MATVIEW) "," + CppAsString2(RELKIND_PARTITIONED_TABLE) ") " + " AND n.nspname = "); + deparseStringLiteral(&buf, stmt->remote_schema); + + /* Partitions are supported since Postgres 10 */ + if (PQserverVersion(conn) >= 100000 && + stmt->list_type != FDW_IMPORT_SCHEMA_LIMIT_TO) + appendStringInfoString(&buf, " AND NOT c.relispartition "); + + /* Apply restrictions for LIMIT TO and EXCEPT */ + if (stmt->list_type == FDW_IMPORT_SCHEMA_LIMIT_TO || + stmt->list_type == FDW_IMPORT_SCHEMA_EXCEPT) + { + bool first_item = true; - /* Partitions are supported since Postgres 10 */ - if (PQserverVersion(conn) >= 100000 && - stmt->list_type != FDW_IMPORT_SCHEMA_LIMIT_TO) - appendStringInfoString(&buf, " AND NOT c.relispartition "); + appendStringInfoString(&buf, " AND c.relname "); + if (stmt->list_type == FDW_IMPORT_SCHEMA_EXCEPT) + appendStringInfoString(&buf, "NOT "); + appendStringInfoString(&buf, "IN ("); - /* Apply restrictions for LIMIT TO and EXCEPT */ - if (stmt->list_type == FDW_IMPORT_SCHEMA_LIMIT_TO || - stmt->list_type == FDW_IMPORT_SCHEMA_EXCEPT) + /* Append list of table names within IN clause */ + foreach(lc, stmt->table_list) { - bool first_item = true; + RangeVar *rv = (RangeVar *) lfirst(lc); - appendStringInfoString(&buf, " AND c.relname "); - if (stmt->list_type == FDW_IMPORT_SCHEMA_EXCEPT) - appendStringInfoString(&buf, "NOT "); - appendStringInfoString(&buf, "IN ("); + if (first_item) + first_item = false; + else + appendStringInfoString(&buf, ", "); + deparseStringLiteral(&buf, rv->relname); + } + appendStringInfoChar(&buf, ')'); + } - /* Append list of table names within IN clause */ - foreach(lc, stmt->table_list) - { - RangeVar *rv = (RangeVar *) lfirst(lc); + /* Append ORDER BY at the end of query to ensure output ordering */ + appendStringInfoString(&buf, " ORDER BY c.relname, a.attnum"); - if (first_item) - first_item = false; - else - appendStringInfoString(&buf, ", "); - deparseStringLiteral(&buf, rv->relname); - } - appendStringInfoChar(&buf, ')'); - } + /* Fetch the data */ + res = pgfdw_exec_query(conn, buf.data, NULL); + if (PQresultStatus(res) != PGRES_TUPLES_OK) + pgfdw_report_error(ERROR, res, conn, buf.data); - /* Append ORDER BY at the end of query to ensure output ordering */ - appendStringInfoString(&buf, " ORDER BY c.relname, a.attnum"); + /* Process results */ + numrows = PQntuples(res); + /* note: incrementation of i happens in inner loop's while() test */ + for (i = 0; i < numrows;) + { + char *tablename = PQgetvalue(res, i, 0); + bool first_item = true; - /* Fetch the data */ - res = pgfdw_exec_query(conn, buf.data, NULL); - if (PQresultStatus(res) != PGRES_TUPLES_OK) - pgfdw_report_error(ERROR, res, conn, buf.data); + resetStringInfo(&buf); + appendStringInfo(&buf, "CREATE FOREIGN TABLE %s (\n", + quote_identifier(tablename)); - /* Process results */ - numrows = PQntuples(res); - /* note: incrementation of i happens in inner loop's while() test */ - for (i = 0; i < numrows;) + /* Scan all rows for this table */ + do { - char *tablename = PQgetvalue(res, i, 0); - bool first_item = true; + char *attname; + char *typename; + char *attnotnull; + char *attgenerated; + char *attdefault; + char *collname; + char *collnamespace; + + /* If table has no columns, we'll see nulls here */ + if (PQgetisnull(res, i, 1)) + continue; - resetStringInfo(&buf); - appendStringInfo(&buf, "CREATE FOREIGN TABLE %s (\n", - quote_identifier(tablename)); + attname = PQgetvalue(res, i, 1); + typename = PQgetvalue(res, i, 2); + attnotnull = PQgetvalue(res, i, 3); + attdefault = PQgetisnull(res, i, 4) ? NULL : + PQgetvalue(res, i, 4); + attgenerated = PQgetisnull(res, i, 5) ? NULL : + PQgetvalue(res, i, 5); + collname = PQgetisnull(res, i, 6) ? NULL : + PQgetvalue(res, i, 6); + collnamespace = PQgetisnull(res, i, 7) ? NULL : + PQgetvalue(res, i, 7); + + if (first_item) + first_item = false; + else + appendStringInfoString(&buf, ",\n"); - /* Scan all rows for this table */ - do - { - char *attname; - char *typename; - char *attnotnull; - char *attgenerated; - char *attdefault; - char *collname; - char *collnamespace; - - /* If table has no columns, we'll see nulls here */ - if (PQgetisnull(res, i, 1)) - continue; + /* Print column name and type */ + appendStringInfo(&buf, " %s %s", + quote_identifier(attname), + typename); - attname = PQgetvalue(res, i, 1); - typename = PQgetvalue(res, i, 2); - attnotnull = PQgetvalue(res, i, 3); - attdefault = PQgetisnull(res, i, 4) ? NULL : - PQgetvalue(res, i, 4); - attgenerated = PQgetisnull(res, i, 5) ? NULL : - PQgetvalue(res, i, 5); - collname = PQgetisnull(res, i, 6) ? NULL : - PQgetvalue(res, i, 6); - collnamespace = PQgetisnull(res, i, 7) ? NULL : - PQgetvalue(res, i, 7); - - if (first_item) - first_item = false; - else - appendStringInfoString(&buf, ",\n"); + /* + * Add column_name option so that renaming the foreign table's + * column doesn't break the association to the underlying column. + */ + appendStringInfoString(&buf, " OPTIONS (column_name "); + deparseStringLiteral(&buf, attname); + appendStringInfoChar(&buf, ')'); - /* Print column name and type */ - appendStringInfo(&buf, " %s %s", - quote_identifier(attname), - typename); + /* Add COLLATE if needed */ + if (import_collate && collname != NULL && collnamespace != NULL) + appendStringInfo(&buf, " COLLATE %s.%s", + quote_identifier(collnamespace), + quote_identifier(collname)); - /* - * Add column_name option so that renaming the foreign table's - * column doesn't break the association to the underlying - * column. - */ - appendStringInfoString(&buf, " OPTIONS (column_name "); - deparseStringLiteral(&buf, attname); - appendStringInfoChar(&buf, ')'); - - /* Add COLLATE if needed */ - if (import_collate && collname != NULL && collnamespace != NULL) - appendStringInfo(&buf, " COLLATE %s.%s", - quote_identifier(collnamespace), - quote_identifier(collname)); - - /* Add DEFAULT if needed */ - if (import_default && attdefault != NULL && - (!attgenerated || !attgenerated[0])) - appendStringInfo(&buf, " DEFAULT %s", attdefault); - - /* Add GENERATED if needed */ - if (import_generated && attgenerated != NULL && - attgenerated[0] == ATTRIBUTE_GENERATED_STORED) - { - Assert(attdefault != NULL); - appendStringInfo(&buf, - " GENERATED ALWAYS AS (%s) STORED", - attdefault); - } + /* Add DEFAULT if needed */ + if (import_default && attdefault != NULL && + (!attgenerated || !attgenerated[0])) + appendStringInfo(&buf, " DEFAULT %s", attdefault); - /* Add NOT NULL if needed */ - if (import_not_null && attnotnull[0] == 't') - appendStringInfoString(&buf, " NOT NULL"); + /* Add GENERATED if needed */ + if (import_generated && attgenerated != NULL && + attgenerated[0] == ATTRIBUTE_GENERATED_STORED) + { + Assert(attdefault != NULL); + appendStringInfo(&buf, + " GENERATED ALWAYS AS (%s) STORED", + attdefault); } - while (++i < numrows && - strcmp(PQgetvalue(res, i, 0), tablename) == 0); - /* - * Add server name and table-level options. We specify remote - * schema and table name as options (the latter to ensure that - * renaming the foreign table doesn't break the association). - */ - appendStringInfo(&buf, "\n) SERVER %s\nOPTIONS (", - quote_identifier(server->servername)); + /* Add NOT NULL if needed */ + if (import_not_null && attnotnull[0] == 't') + appendStringInfoString(&buf, " NOT NULL"); + } + while (++i < numrows && + strcmp(PQgetvalue(res, i, 0), tablename) == 0); - appendStringInfoString(&buf, "schema_name "); - deparseStringLiteral(&buf, stmt->remote_schema); - appendStringInfoString(&buf, ", table_name "); - deparseStringLiteral(&buf, tablename); + /* + * Add server name and table-level options. We specify remote schema + * and table name as options (the latter to ensure that renaming the + * foreign table doesn't break the association). + */ + appendStringInfo(&buf, "\n) SERVER %s\nOPTIONS (", + quote_identifier(server->servername)); - appendStringInfoString(&buf, ");"); + appendStringInfoString(&buf, "schema_name "); + deparseStringLiteral(&buf, stmt->remote_schema); + appendStringInfoString(&buf, ", table_name "); + deparseStringLiteral(&buf, tablename); - commands = lappend(commands, pstrdup(buf.data)); - } - PQclear(res); + appendStringInfoString(&buf, ");"); + + commands = lappend(commands, pstrdup(buf.data)); + } + PQclear(res); ReleaseConnection(conn); diff --git a/src/include/libpq/libpq-be-fe-helpers.h b/src/include/libpq/libpq-be-fe-helpers.h index 4ba635aa96f92..1c4a342090c33 100644 --- a/src/include/libpq/libpq-be-fe-helpers.h +++ b/src/include/libpq/libpq-be-fe-helpers.h @@ -281,28 +281,28 @@ libpqsrv_get_result_last(PGconn *conn, uint32 wait_event_info) { PGresult *lastResult = NULL; - for (;;) - { - /* Wait for, and collect, the next PGresult. */ - PGresult *result; + for (;;) + { + /* Wait for, and collect, the next PGresult. */ + PGresult *result; - result = libpqsrv_get_result(conn, wait_event_info); - if (result == NULL) - break; /* query is complete, or failure */ + result = libpqsrv_get_result(conn, wait_event_info); + if (result == NULL) + break; /* query is complete, or failure */ - /* - * Emulate PQexec()'s behavior of returning the last result when - * there are many. - */ - PQclear(lastResult); - lastResult = result; + /* + * Emulate PQexec()'s behavior of returning the last result when there + * are many. + */ + PQclear(lastResult); + lastResult = result; - if (PQresultStatus(lastResult) == PGRES_COPY_IN || - PQresultStatus(lastResult) == PGRES_COPY_OUT || - PQresultStatus(lastResult) == PGRES_COPY_BOTH || - PQstatus(conn) == CONNECTION_BAD) - break; - } + if (PQresultStatus(lastResult) == PGRES_COPY_IN || + PQresultStatus(lastResult) == PGRES_COPY_OUT || + PQresultStatus(lastResult) == PGRES_COPY_BOTH || + PQstatus(conn) == CONNECTION_BAD) + break; + } return lastResult; } From 0f9d4d7c12dcebe951061763ca23ee3b6477e7ca Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Fri, 25 Jul 2025 16:37:29 -0400 Subject: [PATCH 375/602] Silence leakage complaint about postgres_fdw's InitPgFdwOptions. Valgrind complains that the PQconninfoOption array returned by libpq is leaked. We apparently believed that we could suppress that warning by storing that array's address in a static variable. However, modern C compilers are bright enough to optimize the static variable away. We could escalate that arms race by making the variable global. But on the whole it seems better to revise the code so that it can free libpq's result properly. The only thing that costs us is copying the parameter-name keywords; which seems like a pretty negligible cost in a function that runs at most once per process. Author: Tom Lane Reviewed-by: Matheus Alcantara Discussion: https://postgr.es/m/2976982.1748049023@sss.pgh.pa.us --- contrib/postgres_fdw/option.c | 33 ++++++++++++--------------------- 1 file changed, 12 insertions(+), 21 deletions(-) diff --git a/contrib/postgres_fdw/option.c b/contrib/postgres_fdw/option.c index c2f936640bca8..d6fa89bad9399 100644 --- a/contrib/postgres_fdw/option.c +++ b/contrib/postgres_fdw/option.c @@ -21,6 +21,7 @@ #include "libpq/libpq-be.h" #include "postgres_fdw.h" #include "utils/guc.h" +#include "utils/memutils.h" #include "utils/varlena.h" /* @@ -39,12 +40,6 @@ typedef struct PgFdwOption */ static PgFdwOption *postgres_fdw_options; -/* - * Valid options for libpq. - * Allocated and filled in InitPgFdwOptions. - */ -static PQconninfoOption *libpq_options; - /* * GUC parameters */ @@ -239,6 +234,7 @@ static void InitPgFdwOptions(void) { int num_libpq_opts; + PQconninfoOption *libpq_options; PQconninfoOption *lopt; PgFdwOption *popt; @@ -307,8 +303,8 @@ InitPgFdwOptions(void) * Get list of valid libpq options. * * To avoid unnecessary work, we get the list once and use it throughout - * the lifetime of this backend process. We don't need to care about - * memory context issues, because PQconndefaults allocates with malloc. + * the lifetime of this backend process. Hence, we'll allocate it in + * TopMemoryContext. */ libpq_options = PQconndefaults(); if (!libpq_options) /* assume reason for failure is OOM */ @@ -325,19 +321,11 @@ InitPgFdwOptions(void) /* * Construct an array which consists of all valid options for * postgres_fdw, by appending FDW-specific options to libpq options. - * - * We use plain malloc here to allocate postgres_fdw_options because it - * lives as long as the backend process does. Besides, keeping - * libpq_options in memory allows us to avoid copying every keyword - * string. */ postgres_fdw_options = (PgFdwOption *) - malloc(sizeof(PgFdwOption) * num_libpq_opts + - sizeof(non_libpq_options)); - if (postgres_fdw_options == NULL) - ereport(ERROR, - (errcode(ERRCODE_FDW_OUT_OF_MEMORY), - errmsg("out of memory"))); + MemoryContextAlloc(TopMemoryContext, + sizeof(PgFdwOption) * num_libpq_opts + + sizeof(non_libpq_options)); popt = postgres_fdw_options; for (lopt = libpq_options; lopt->keyword; lopt++) @@ -355,8 +343,8 @@ InitPgFdwOptions(void) if (strncmp(lopt->keyword, "oauth_", strlen("oauth_")) == 0) continue; - /* We don't have to copy keyword string, as described above. */ - popt->keyword = lopt->keyword; + popt->keyword = MemoryContextStrdup(TopMemoryContext, + lopt->keyword); /* * "user" and any secret options are allowed only on user mappings. @@ -371,6 +359,9 @@ InitPgFdwOptions(void) popt++; } + /* Done with libpq's output structure. */ + PQconninfoFree(libpq_options); + /* Append FDW-specific options and dummy terminator. */ memcpy(popt, non_libpq_options, sizeof(non_libpq_options)); } From db6461b1c9aae122b90bb52430f06efb306b371a Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Fri, 25 Jul 2025 16:45:57 -0400 Subject: [PATCH 376/602] Add commit 73873805f to .git-blame-ignore-revs. --- .git-blame-ignore-revs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs index 1ee1dee011164..f8526d4d1a9c2 100644 --- a/.git-blame-ignore-revs +++ b/.git-blame-ignore-revs @@ -14,6 +14,9 @@ # # $ git log --pretty=format:"%H # %cd%n# %s" $PGINDENTGITHASH -1 --date=iso +73873805fb3627cb23937c750fa83ffd8f16fc6c # 2025-07-25 16:36:44 -0400 +# Run pgindent on the changes of the previous patch. + 9e345415bcd3c4358350b89edfd710469b8bfaf9 # 2025-07-01 15:23:07 +0200 # Fix indentation in pg_numa code From 6f22a82a401d267e4bf1fcbcff8d6adb24e14d58 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Sun, 27 Jul 2025 17:48:47 +0900 Subject: [PATCH 377/602] Add assertions for all the required index AM callbacks Similar checks are done for the mandatory table AM callbacks. A portion of the index AM callbacks are optional and can be NULL; the rest is mandatory and is documented as such in the documentation and in amapi.h. These checks are useful to detect quickly if all the mandatory callbacks are defined when implementing a new index access method, as the assertions are run when loading the AM. Author: Japin Li Discussion: https://postgr.es/m/ME0P300MB0445795D31CEAB92C58B41FDB651A@ME0P300MB0445.AUSP300.PROD.OUTLOOK.COM --- src/backend/access/index/amapi.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/backend/access/index/amapi.c b/src/backend/access/index/amapi.c index f0f4f974bcedb..60684c5342279 100644 --- a/src/backend/access/index/amapi.c +++ b/src/backend/access/index/amapi.c @@ -42,6 +42,19 @@ GetIndexAmRoutine(Oid amhandler) elog(ERROR, "index access method handler function %u did not return an IndexAmRoutine struct", amhandler); + /* Assert that all required callbacks are present. */ + Assert(routine->ambuild != NULL); + Assert(routine->ambuildempty != NULL); + Assert(routine->aminsert != NULL); + Assert(routine->ambulkdelete != NULL); + Assert(routine->amvacuumcleanup != NULL); + Assert(routine->amcostestimate != NULL); + Assert(routine->amoptions != NULL); + Assert(routine->amvalidate != NULL); + Assert(routine->ambeginscan != NULL); + Assert(routine->amrescan != NULL); + Assert(routine->amendscan != NULL); + return routine; } From 258bf0a2ea8ff86257f750018bfd44397ce7e554 Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Sun, 27 Jul 2025 15:07:47 +0300 Subject: [PATCH 378/602] Process sync requests incrementally in AbsorbSyncRequests If the number of sync requests is big enough, the palloc() call in AbsorbSyncRequests() will attempt to allocate more than 1 GB of memory, resulting in failure. This can lead to an infinite loop in the checkpointer process, as it repeatedly fails to absorb the pending requests. This commit introduces the following changes to cope with this problem: 1. Turn pending checkpointer requests array in shared memory into a bounded ring buffer. 2. Limit maximum ring buffer size to 10M items. 3. Make AbsorbSyncRequests() process requests incrementally in 10K batches. Even #2 makes the whole queue size fit the maximum palloc() size of 1 GB. of continuous lock holding. This commit is for master only. Simpler fix, which just limits a request queue size to 10M, will be backpatched. Reported-by: Ekaterina Sokolova Discussion: https://postgr.es/m/db4534f83a22a29ab5ee2566ad86ca92%40postgrespro.ru Author: Maxim Orlov Co-authored-by: Xuneng Zhou Reviewed-by: Andres Freund Reviewed-by: Heikki Linnakangas Reviewed-by: Alexander Korotkov --- src/backend/postmaster/checkpointer.c | 155 +++++++++++++++++++------- 1 file changed, 114 insertions(+), 41 deletions(-) diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c index 2809e298a44fb..8490148a47d52 100644 --- a/src/backend/postmaster/checkpointer.c +++ b/src/backend/postmaster/checkpointer.c @@ -130,6 +130,13 @@ typedef struct int num_requests; /* current # of requests */ int max_requests; /* allocated array size */ + + int head; /* Index of the first request in the ring + * buffer */ + int tail; /* Index of the last request in the ring + * buffer */ + + /* The ring buffer of pending checkpointer requests */ CheckpointerRequest requests[FLEXIBLE_ARRAY_MEMBER]; } CheckpointerShmemStruct; @@ -138,6 +145,12 @@ static CheckpointerShmemStruct *CheckpointerShmem; /* interval for calling AbsorbSyncRequests in CheckpointWriteDelay */ #define WRITES_PER_ABSORB 1000 +/* Maximum number of checkpointer requests to process in one batch */ +#define CKPT_REQ_BATCH_SIZE 10000 + +/* Max number of requests the checkpointer request queue can hold */ +#define MAX_CHECKPOINT_REQUESTS 10000000 + /* * GUC parameters */ @@ -973,7 +986,8 @@ CheckpointerShmemInit(void) */ MemSet(CheckpointerShmem, 0, size); SpinLockInit(&CheckpointerShmem->ckpt_lck); - CheckpointerShmem->max_requests = NBuffers; + CheckpointerShmem->max_requests = Min(NBuffers, MAX_CHECKPOINT_REQUESTS); + CheckpointerShmem->head = CheckpointerShmem->tail = 0; ConditionVariableInit(&CheckpointerShmem->start_cv); ConditionVariableInit(&CheckpointerShmem->done_cv); } @@ -1201,6 +1215,7 @@ ForwardSyncRequest(const FileTag *ftag, SyncRequestType type) { CheckpointerRequest *request; bool too_full; + int insert_pos; if (!IsUnderPostmaster) return false; /* probably shouldn't even get here */ @@ -1224,10 +1239,14 @@ ForwardSyncRequest(const FileTag *ftag, SyncRequestType type) } /* OK, insert request */ - request = &CheckpointerShmem->requests[CheckpointerShmem->num_requests++]; + insert_pos = CheckpointerShmem->tail; + request = &CheckpointerShmem->requests[insert_pos]; request->ftag = *ftag; request->type = type; + CheckpointerShmem->tail = (CheckpointerShmem->tail + 1) % CheckpointerShmem->max_requests; + CheckpointerShmem->num_requests++; + /* If queue is more than half full, nudge the checkpointer to empty it */ too_full = (CheckpointerShmem->num_requests >= CheckpointerShmem->max_requests / 2); @@ -1269,12 +1288,16 @@ CompactCheckpointerRequestQueue(void) struct CheckpointerSlotMapping { CheckpointerRequest request; - int slot; + int ring_idx; }; - int n, - preserve_count; + int n; int num_skipped = 0; + int head; + int max_requests; + int num_requests; + int read_idx, + write_idx; HASHCTL ctl; HTAB *htab; bool *skip_slot; @@ -1286,8 +1309,13 @@ CompactCheckpointerRequestQueue(void) if (CritSectionCount > 0) return false; + max_requests = CheckpointerShmem->max_requests; + num_requests = CheckpointerShmem->num_requests; + /* Initialize skip_slot array */ - skip_slot = palloc0(sizeof(bool) * CheckpointerShmem->num_requests); + skip_slot = palloc0(sizeof(bool) * max_requests); + + head = CheckpointerShmem->head; /* Initialize temporary hash table */ ctl.keysize = sizeof(CheckpointerRequest); @@ -1311,7 +1339,8 @@ CompactCheckpointerRequestQueue(void) * away preceding entries that would end up being canceled anyhow), but * it's not clear that the extra complexity would buy us anything. */ - for (n = 0; n < CheckpointerShmem->num_requests; n++) + read_idx = head; + for (n = 0; n < num_requests; n++) { CheckpointerRequest *request; struct CheckpointerSlotMapping *slotmap; @@ -1324,16 +1353,19 @@ CompactCheckpointerRequestQueue(void) * CheckpointerShmemInit. Note also that RelFileLocator had better * contain no pad bytes. */ - request = &CheckpointerShmem->requests[n]; + request = &CheckpointerShmem->requests[read_idx]; slotmap = hash_search(htab, request, HASH_ENTER, &found); if (found) { /* Duplicate, so mark the previous occurrence as skippable */ - skip_slot[slotmap->slot] = true; + skip_slot[slotmap->ring_idx] = true; num_skipped++; } /* Remember slot containing latest occurrence of this request value */ - slotmap->slot = n; + slotmap->ring_idx = read_idx; + + /* Move to the next request in the ring buffer */ + read_idx = (read_idx + 1) % max_requests; } /* Done with the hash table. */ @@ -1347,17 +1379,34 @@ CompactCheckpointerRequestQueue(void) } /* We found some duplicates; remove them. */ - preserve_count = 0; - for (n = 0; n < CheckpointerShmem->num_requests; n++) + read_idx = write_idx = head; + for (n = 0; n < num_requests; n++) { - if (skip_slot[n]) - continue; - CheckpointerShmem->requests[preserve_count++] = CheckpointerShmem->requests[n]; + /* If this slot is NOT skipped, keep it */ + if (!skip_slot[read_idx]) + { + /* If the read and write positions are different, copy the request */ + if (write_idx != read_idx) + CheckpointerShmem->requests[write_idx] = + CheckpointerShmem->requests[read_idx]; + + /* Advance the write position */ + write_idx = (write_idx + 1) % max_requests; + } + + read_idx = (read_idx + 1) % max_requests; } + + /* + * Update ring buffer state: head remains the same, tail moves, count + * decreases + */ + CheckpointerShmem->tail = write_idx; + CheckpointerShmem->num_requests -= num_skipped; + ereport(DEBUG1, (errmsg_internal("compacted fsync request queue from %d entries to %d entries", - CheckpointerShmem->num_requests, preserve_count))); - CheckpointerShmem->num_requests = preserve_count; + num_requests, CheckpointerShmem->num_requests))); /* Cleanup. */ pfree(skip_slot); @@ -1378,40 +1427,64 @@ AbsorbSyncRequests(void) { CheckpointerRequest *requests = NULL; CheckpointerRequest *request; - int n; + int n, + i; + bool loop; if (!AmCheckpointerProcess()) return; - LWLockAcquire(CheckpointerCommLock, LW_EXCLUSIVE); - - /* - * We try to avoid holding the lock for a long time by copying the request - * array, and processing the requests after releasing the lock. - * - * Once we have cleared the requests from shared memory, we have to PANIC - * if we then fail to absorb them (eg, because our hashtable runs out of - * memory). This is because the system cannot run safely if we are unable - * to fsync what we have been told to fsync. Fortunately, the hashtable - * is so small that the problem is quite unlikely to arise in practice. - */ - n = CheckpointerShmem->num_requests; - if (n > 0) + do { - requests = (CheckpointerRequest *) palloc(n * sizeof(CheckpointerRequest)); - memcpy(requests, CheckpointerShmem->requests, n * sizeof(CheckpointerRequest)); - } + LWLockAcquire(CheckpointerCommLock, LW_EXCLUSIVE); + + /*--- + * We try to avoid holding the lock for a long time by: + * 1. Copying the request array and processing the requests after + * releasing the lock; + * 2. Processing not the whole queue, but only batches of + * CKPT_REQ_BATCH_SIZE at once. + * + * Once we have cleared the requests from shared memory, we must + * PANIC if we then fail to absorb them (e.g., because our hashtable + * runs out of memory). This is because the system cannot run safely + * if we are unable to fsync what we have been told to fsync. + * Fortunately, the hashtable is so small that the problem is quite + * unlikely to arise in practice. + * + * Note: The maximum possible size of a ring buffer is + * MAX_CHECKPOINT_REQUESTS entries, which fit into a maximum palloc + * allocation size of 1Gb. Our maximum batch size, + * CKPT_REQ_BATCH_SIZE, is even smaller. + */ + n = Min(CheckpointerShmem->num_requests, CKPT_REQ_BATCH_SIZE); + if (n > 0) + { + if (!requests) + requests = (CheckpointerRequest *) palloc(n * sizeof(CheckpointerRequest)); - START_CRIT_SECTION(); + for (i = 0; i < n; i++) + { + requests[i] = CheckpointerShmem->requests[CheckpointerShmem->head]; + CheckpointerShmem->head = (CheckpointerShmem->head + 1) % CheckpointerShmem->max_requests; + } - CheckpointerShmem->num_requests = 0; + CheckpointerShmem->num_requests -= n; - LWLockRelease(CheckpointerCommLock); + } + + START_CRIT_SECTION(); + + /* Are there any requests in the queue? If so, keep going. */ + loop = CheckpointerShmem->num_requests != 0; + + LWLockRelease(CheckpointerCommLock); - for (request = requests; n > 0; request++, n--) - RememberSyncRequest(&request->ftag, request->type); + for (request = requests; n > 0; request++, n--) + RememberSyncRequest(&request->ftag, request->type); - END_CRIT_SECTION(); + END_CRIT_SECTION(); + } while (loop); if (requests) pfree(requests); From 793928c2d5ac8e60e1e4054fa3b986369777896d Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Mon, 28 Jul 2025 08:15:11 +0900 Subject: [PATCH 379/602] Fix performance regression with flush of pending fixed-numbered stats The callback added in fc415edf8ca8 used to check if there is any pending data to flush for fixed-numbered statistics, done by looping across all the builtin and custom stats kinds with a call to have_fixed_pending_cb, is proving to able to show in workloads that do not report any stats (read-only, no function calls, no WAL, no IO, etc). The code used in v17 was cheaper than that what HEAD has introduced, relying on three boolean checks for WAL, SLRU and IO stats. This commit switches the code to use a more efficient approach than fc415edf8ca8, with a single boolean flag that can be switched to "true" by any fixed-numbered stats kinds to force pgstat_report_stat() to go through one round of reports. The flag is reset by pgstat_report_stat() once a full round of reports is done. The flag being false means that fixed-numbered stats kinds saw no activity, and that there is no pending data to flush. ac000fca743e took one step in improving the performance by reducing the number of stats kinds that the backend can hold. This commit takes a more drastic step by bringing back the code efficiency to what it was before v18 with a cheap check at the beginning of pgstat_report_stat() for its fast-exit path. The callback have_static_pending_cb is removed as an effect of all that. Reported-by: Andres Freund Reviewed-by: Bertrand Drouvot Discussion: https://postgr.es/m/eb224uegsga2hgq7dfq3ps5cduhpqej7ir2hjxzzozjthrekx5@dysei6buqthe Backpatch-through: 18 --- src/backend/access/transam/xlog.c | 10 ++++ src/backend/utils/activity/pgstat.c | 52 ++++++++------------- src/backend/utils/activity/pgstat_backend.c | 14 +----- src/backend/utils/activity/pgstat_io.c | 10 +--- src/backend/utils/activity/pgstat_slru.c | 10 +--- src/backend/utils/activity/pgstat_wal.c | 20 ++++---- src/include/utils/pgstat_internal.h | 34 ++++++++------ 7 files changed, 62 insertions(+), 88 deletions(-) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index eefffc4277a1a..b0891998b243f 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -96,6 +96,7 @@ #include "utils/guc_hooks.h" #include "utils/guc_tables.h" #include "utils/injection_point.h" +#include "utils/pgstat_internal.h" #include "utils/ps_status.h" #include "utils/relmapper.h" #include "utils/snapmgr.h" @@ -1091,6 +1092,9 @@ XLogInsertRecord(XLogRecData *rdata, pgWalUsage.wal_bytes += rechdr->xl_tot_len; pgWalUsage.wal_records++; pgWalUsage.wal_fpi += num_fpi; + + /* Required for the flush of pending stats WAL data */ + pgstat_report_fixed = true; } return EndPos; @@ -2108,6 +2112,12 @@ AdvanceXLInsertBuffer(XLogRecPtr upto, TimeLineID tli, bool opportunistic) LWLockRelease(WALWriteLock); pgWalUsage.wal_buffers_full++; TRACE_POSTGRESQL_WAL_BUFFER_WRITE_DIRTY_DONE(); + + /* + * Required for the flush of pending stats WAL data, per + * update of pgWalUsage. + */ + pgstat_report_fixed = true; } } } diff --git a/src/backend/utils/activity/pgstat.c b/src/backend/utils/activity/pgstat.c index 8b57845e8709f..6bc91ce0dadda 100644 --- a/src/backend/utils/activity/pgstat.c +++ b/src/backend/utils/activity/pgstat.c @@ -212,6 +212,11 @@ int pgstat_fetch_consistency = PGSTAT_FETCH_CONSISTENCY_CACHE; PgStat_LocalState pgStatLocal; +/* + * Track pending reports for fixed-numbered stats, used by + * pgstat_report_stat(). + */ +bool pgstat_report_fixed = false; /* ---------- * Local data @@ -370,7 +375,6 @@ static const PgStat_KindInfo pgstat_kind_builtin_infos[PGSTAT_KIND_BUILTIN_SIZE] .shared_data_off = offsetof(PgStatShared_Backend, stats), .shared_data_len = sizeof(((PgStatShared_Backend *) 0)->stats), - .have_static_pending_cb = pgstat_backend_have_pending_cb, .flush_static_cb = pgstat_backend_flush_cb, .reset_timestamp_cb = pgstat_backend_reset_timestamp_cb, }, @@ -437,7 +441,6 @@ static const PgStat_KindInfo pgstat_kind_builtin_infos[PGSTAT_KIND_BUILTIN_SIZE] .shared_data_len = sizeof(((PgStatShared_IO *) 0)->stats), .flush_static_cb = pgstat_io_flush_cb, - .have_static_pending_cb = pgstat_io_have_pending_cb, .init_shmem_cb = pgstat_io_init_shmem_cb, .reset_all_cb = pgstat_io_reset_all_cb, .snapshot_cb = pgstat_io_snapshot_cb, @@ -455,7 +458,6 @@ static const PgStat_KindInfo pgstat_kind_builtin_infos[PGSTAT_KIND_BUILTIN_SIZE] .shared_data_len = sizeof(((PgStatShared_SLRU *) 0)->stats), .flush_static_cb = pgstat_slru_flush_cb, - .have_static_pending_cb = pgstat_slru_have_pending_cb, .init_shmem_cb = pgstat_slru_init_shmem_cb, .reset_all_cb = pgstat_slru_reset_all_cb, .snapshot_cb = pgstat_slru_snapshot_cb, @@ -474,7 +476,6 @@ static const PgStat_KindInfo pgstat_kind_builtin_infos[PGSTAT_KIND_BUILTIN_SIZE] .init_backend_cb = pgstat_wal_init_backend_cb, .flush_static_cb = pgstat_wal_flush_cb, - .have_static_pending_cb = pgstat_wal_have_pending_cb, .init_shmem_cb = pgstat_wal_init_shmem_cb, .reset_all_cb = pgstat_wal_reset_all_cb, .snapshot_cb = pgstat_wal_snapshot_cb, @@ -708,29 +709,10 @@ pgstat_report_stat(bool force) } /* Don't expend a clock check if nothing to do */ - if (dlist_is_empty(&pgStatPending)) + if (dlist_is_empty(&pgStatPending) && + !pgstat_report_fixed) { - bool do_flush = false; - - /* Check for pending stats */ - for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++) - { - const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind); - - if (!kind_info) - continue; - if (!kind_info->have_static_pending_cb) - continue; - - if (kind_info->have_static_pending_cb()) - { - do_flush = true; - break; - } - } - - if (!do_flush) - return 0; + return 0; } /* @@ -784,16 +766,19 @@ pgstat_report_stat(bool force) partial_flush |= pgstat_flush_pending_entries(nowait); /* flush of other stats kinds */ - for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++) + if (pgstat_report_fixed) { - const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind); + for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++) + { + const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind); - if (!kind_info) - continue; - if (!kind_info->flush_static_cb) - continue; + if (!kind_info) + continue; + if (!kind_info->flush_static_cb) + continue; - partial_flush |= kind_info->flush_static_cb(nowait); + partial_flush |= kind_info->flush_static_cb(nowait); + } } last_flush = now; @@ -815,6 +800,7 @@ pgstat_report_stat(bool force) } pending_since = 0; + pgstat_report_fixed = false; return 0; } diff --git a/src/backend/utils/activity/pgstat_backend.c b/src/backend/utils/activity/pgstat_backend.c index 51256277e8d37..8714a85e2d936 100644 --- a/src/backend/utils/activity/pgstat_backend.c +++ b/src/backend/utils/activity/pgstat_backend.c @@ -66,6 +66,7 @@ pgstat_count_backend_io_op_time(IOObject io_object, IOContext io_context, io_time); backend_has_iostats = true; + pgstat_report_fixed = true; } void @@ -81,6 +82,7 @@ pgstat_count_backend_io_op(IOObject io_object, IOContext io_context, PendingBackendStats.pending_io.bytes[io_object][io_context][io_op] += bytes; backend_has_iostats = true; + pgstat_report_fixed = true; } /* @@ -301,18 +303,6 @@ pgstat_flush_backend(bool nowait, bits32 flags) return false; } -/* - * Check if there are any backend stats waiting for flush. - */ -bool -pgstat_backend_have_pending_cb(void) -{ - if (!pgstat_tracks_backend_bktype(MyBackendType)) - return false; - - return (backend_has_iostats || pgstat_backend_wal_have_pending()); -} - /* * Callback to flush out locally pending backend statistics. * diff --git a/src/backend/utils/activity/pgstat_io.c b/src/backend/utils/activity/pgstat_io.c index d8d26379a571e..13ae57ed6498d 100644 --- a/src/backend/utils/activity/pgstat_io.c +++ b/src/backend/utils/activity/pgstat_io.c @@ -80,6 +80,7 @@ pgstat_count_io_op(IOObject io_object, IOContext io_context, IOOp io_op, pgstat_count_backend_io_op(io_object, io_context, io_op, cnt, bytes); have_iostats = true; + pgstat_report_fixed = true; } /* @@ -167,15 +168,6 @@ pgstat_fetch_stat_io(void) return &pgStatLocal.snapshot.io; } -/* - * Check if there any IO stats waiting for flush. - */ -bool -pgstat_io_have_pending_cb(void) -{ - return have_iostats; -} - /* * Simpler wrapper of pgstat_io_flush_cb() */ diff --git a/src/backend/utils/activity/pgstat_slru.c b/src/backend/utils/activity/pgstat_slru.c index b9e940dde45b6..7bd8744accb0e 100644 --- a/src/backend/utils/activity/pgstat_slru.c +++ b/src/backend/utils/activity/pgstat_slru.c @@ -143,15 +143,6 @@ pgstat_get_slru_index(const char *name) return (SLRU_NUM_ELEMENTS - 1); } -/* - * Check if there are any SLRU stats entries waiting for flush. - */ -bool -pgstat_slru_have_pending_cb(void) -{ - return have_slrustats; -} - /* * Flush out locally pending SLRU stats entries * @@ -247,6 +238,7 @@ get_slru_entry(int slru_idx) Assert((slru_idx >= 0) && (slru_idx < SLRU_NUM_ELEMENTS)); have_slrustats = true; + pgstat_report_fixed = true; return &pending_SLRUStats[slru_idx]; } diff --git a/src/backend/utils/activity/pgstat_wal.c b/src/backend/utils/activity/pgstat_wal.c index 16a1ecb4d90d2..0d04480d2f6d0 100644 --- a/src/backend/utils/activity/pgstat_wal.c +++ b/src/backend/utils/activity/pgstat_wal.c @@ -71,6 +71,15 @@ pgstat_fetch_stat_wal(void) return &pgStatLocal.snapshot.wal; } +/* + * To determine whether WAL usage happened. + */ +static inline bool +pgstat_wal_have_pending(void) +{ + return pgWalUsage.wal_records != prevWalUsage.wal_records; +} + /* * Calculate how much WAL usage counters have increased by subtracting the * previous counters from the current ones. @@ -92,7 +101,7 @@ pgstat_wal_flush_cb(bool nowait) * This function can be called even if nothing at all has happened. Avoid * taking lock for nothing in that case. */ - if (!pgstat_wal_have_pending_cb()) + if (!pgstat_wal_have_pending()) return false; /* @@ -136,15 +145,6 @@ pgstat_wal_init_backend_cb(void) prevWalUsage = pgWalUsage; } -/* - * To determine whether WAL usage happened. - */ -bool -pgstat_wal_have_pending_cb(void) -{ - return pgWalUsage.wal_records != prevWalUsage.wal_records; -} - void pgstat_wal_init_shmem_cb(void *stats) { diff --git a/src/include/utils/pgstat_internal.h b/src/include/utils/pgstat_internal.h index d5557e6e998cd..6cf00008f6333 100644 --- a/src/include/utils/pgstat_internal.h +++ b/src/include/utils/pgstat_internal.h @@ -295,18 +295,11 @@ typedef struct PgStat_KindInfo * * Returns true if some of the stats could not be flushed, due to lock * contention for example. Optional. - */ - bool (*flush_static_cb) (bool nowait); - - /* - * For fixed-numbered or variable-numbered statistics: Check for pending - * stats in need of flush with flush_static_cb, when these do not use - * PgStat_EntryRef->pending. * - * Returns true if there are any stats pending for flush, triggering - * flush_static_cb. Optional. + * "pgstat_report_fixed" needs to be set to trigger the flush of pending + * stats. */ - bool (*have_static_pending_cb) (void); + bool (*flush_static_cb) (bool nowait); /* * For fixed-numbered statistics: Reset All. @@ -627,7 +620,6 @@ extern void pgstat_archiver_snapshot_cb(void); extern bool pgstat_flush_backend(bool nowait, bits32 flags); extern bool pgstat_backend_flush_cb(bool nowait); -extern bool pgstat_backend_have_pending_cb(void); extern void pgstat_backend_reset_timestamp_cb(PgStatShared_Common *header, TimestampTz ts); @@ -676,7 +668,6 @@ extern bool pgstat_function_flush_cb(PgStat_EntryRef *entry_ref, bool nowait); extern void pgstat_flush_io(bool nowait); -extern bool pgstat_io_have_pending_cb(void); extern bool pgstat_io_flush_cb(bool nowait); extern void pgstat_io_init_shmem_cb(void *stats); extern void pgstat_io_reset_all_cb(TimestampTz ts); @@ -738,7 +729,6 @@ extern PgStatShared_Common *pgstat_init_entry(PgStat_Kind kind, * Functions in pgstat_slru.c */ -extern bool pgstat_slru_have_pending_cb(void); extern bool pgstat_slru_flush_cb(bool nowait); extern void pgstat_slru_init_shmem_cb(void *stats); extern void pgstat_slru_reset_all_cb(TimestampTz ts); @@ -750,7 +740,6 @@ extern void pgstat_slru_snapshot_cb(void); */ extern void pgstat_wal_init_backend_cb(void); -extern bool pgstat_wal_have_pending_cb(void); extern bool pgstat_wal_flush_cb(bool nowait); extern void pgstat_wal_init_shmem_cb(void *stats); extern void pgstat_wal_reset_all_cb(TimestampTz ts); @@ -778,8 +767,23 @@ extern void pgstat_create_transactional(PgStat_Kind kind, Oid dboid, uint64 obji * Variables in pgstat.c */ -extern PGDLLIMPORT PgStat_LocalState pgStatLocal; +/* + * Track if *any* pending fixed-numbered statistics should be flushed to + * shared memory. + * + * This flag can be switched to true by fixed-numbered statistics to let + * pgstat_report_stat() know if it needs to go through one round of + * reports, calling flush_static_cb for each fixed-numbered statistics + * kind. When this flag is not set, pgstat_report_stat() is able to do + * a fast exit, knowing that there are no pending fixed-numbered statistics. + * + * Statistics callbacks should never reset this flag; pgstat_report_stat() + * is in charge of doing that. + */ +extern PGDLLIMPORT bool pgstat_report_fixed; +/* Backend-local stats state */ +extern PGDLLIMPORT PgStat_LocalState pgStatLocal; /* * Implementation of inline functions declared above. From 3151c264d460c0be09131ce90529073631d70ae8 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Mon, 28 Jul 2025 08:38:24 +0900 Subject: [PATCH 380/602] ecpg: Fix memory leaks in ecpg_auto_prepare() This routines includes three code paths that can fail, with the allocated prepared statement name going out of scope. Per report from Coverity. Oversight in commit a6eabec6808c, that has played with the order of some ecpg_strdup() calls in this code path. --- src/interfaces/ecpg/ecpglib/prepare.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/interfaces/ecpg/ecpglib/prepare.c b/src/interfaces/ecpg/ecpglib/prepare.c index dd6fd1fe7f407..06f0135813b37 100644 --- a/src/interfaces/ecpg/ecpglib/prepare.c +++ b/src/interfaces/ecpg/ecpglib/prepare.c @@ -603,7 +603,10 @@ ecpg_auto_prepare(int lineno, const char *connection_name, const int compat, cha prep = ecpg_find_prepared_statement(stmtID, con, NULL); /* This prepared name doesn't exist on this connection. */ if (!prep && !prepare_common(lineno, con, stmtID, query)) + { + ecpg_free(*name); return false; + } } else @@ -619,11 +622,17 @@ ecpg_auto_prepare(int lineno, const char *connection_name, const int compat, cha return false; if (!ECPGprepare(lineno, connection_name, 0, stmtID, query)) + { + ecpg_free(*name); return false; + } entNo = AddStmtToCache(lineno, stmtID, connection_name, compat, query); if (entNo < 0) + { + ecpg_free(*name); return false; + } } /* increase usage counter */ From dcc9820a3526eb8d89c5da75dad32b8ef4bf8545 Mon Sep 17 00:00:00 2001 From: Robert Haas Date: Thu, 24 Jul 2025 13:30:43 -0400 Subject: [PATCH 381/602] Avoid throwing away the error message in syncrep_yyerror. Commit 473a575e05979b4dbb28b3f2544f4ec8f184ce65 purported to make this function stash the error message in *syncrep_parse_result_p, but it didn't actually. As a result, an attempt to set synchronous_standby_names to any value that does not parse resulted in a generic "parser failed." message rather than anything more specific. This fixes that. Discussion: http://postgr.es/m/CA+TgmoYF9wPNZ-Q_EMfib_espgHycY-eX__6Tzo2GpYpVXqCdQ@mail.gmail.com Backpatch-through: 18 --- src/backend/replication/syncrep_scanner.l | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/backend/replication/syncrep_scanner.l b/src/backend/replication/syncrep_scanner.l index 7dec1f869c745..02004d621e73d 100644 --- a/src/backend/replication/syncrep_scanner.l +++ b/src/backend/replication/syncrep_scanner.l @@ -157,17 +157,16 @@ syncrep_yyerror(SyncRepConfigData **syncrep_parse_result_p, char **syncrep_parse { struct yyguts_t *yyg = (struct yyguts_t *) yyscanner; /* needed for yytext * macro */ - char *syncrep_parse_error_msg = *syncrep_parse_error_msg_p; /* report only the first error in a parse operation */ - if (syncrep_parse_error_msg) + if (*syncrep_parse_error_msg_p) return; if (yytext[0]) - syncrep_parse_error_msg = psprintf("%s at or near \"%s\"", - message, yytext); + *syncrep_parse_error_msg_p = psprintf("%s at or near \"%s\"", + message, yytext); else - syncrep_parse_error_msg = psprintf("%s at end of input", - message); + *syncrep_parse_error_msg_p = psprintf("%s at end of input", + message); } void From d5b9b2d40262f57f58322ad49f8928fd4a492adb Mon Sep 17 00:00:00 2001 From: Robert Haas Date: Mon, 28 Jul 2025 11:15:47 -0400 Subject: [PATCH 382/602] Remove misleading hint for "unexpected data beyond EOF" error. Commit ffae5cc5a6024b4e25ec920ed5c4dfac649605f8 added this hint in 2006, but it's now obsolete and doesn't reflect what users should really check in this situation. We were not able to agree on a new hint, so just delete the existing one and update the comments to mention one possibility that is known to cause problems of this kind: something other than PostgreSQL is modifying files in the PostgreSQL data directory. Author: Jakub Wartak Reviewed-by: Robert Haas Reviewed-by: Andres Freund Reviewed-by: Christoph Berg Discussion: https://postgr.es/m/CAKZiRmxNbcaL76x=09Sxf7aUmrRQJBf8drzDdUHo+j9_eM+VMg@mail.gmail.com --- src/backend/storage/buffer/bufmgr.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 6afdd28dba6f2..9c6fe587ec940 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -2743,11 +2743,9 @@ ExtendBufferedRelShared(BufferManagerRelation bmr, * because mdread doesn't complain about reads beyond EOF (when * zero_damaged_pages is ON) and so a previous attempt to read a block * beyond EOF could have left a "valid" zero-filled buffer. - * Unfortunately, we have also seen this case occurring because of - * buggy Linux kernels that sometimes return an lseek(SEEK_END) result - * that doesn't account for a recent write. In that situation, the - * pre-existing buffer would contain valid data that we don't want to - * overwrite. Since the legitimate cases should always have left a + * + * This has also been observed when relation was overwritten by external + * process. Since the legitimate cases should always have left a * zero-filled buffer, complain if not PageIsNew. */ if (existing_id >= 0) @@ -2778,8 +2776,7 @@ ExtendBufferedRelShared(BufferManagerRelation bmr, ereport(ERROR, (errmsg("unexpected data beyond EOF in block %u of relation %s", existing_hdr->tag.blockNum, - relpath(bmr.smgr->smgr_rlocator, fork).str), - errhint("This has been seen to occur with buggy kernels; consider updating your system."))); + relpath(bmr.smgr->smgr_rlocator, fork).str))); /* * We *must* do smgr[zero]extend before succeeding, else the page From 71c0921b649d7a800eb2d6f93539890eaa14d979 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Mon, 28 Jul 2025 16:50:41 -0400 Subject: [PATCH 383/602] Avoid regression in the size of XML input that we will accept. This mostly reverts commit 6082b3d5d, "Use xmlParseInNodeContext not xmlParseBalancedChunkMemory". It turns out that xmlParseInNodeContext will reject text chunks exceeding 10MB, while (in most libxml2 versions) xmlParseBalancedChunkMemory will not. The bleeding-edge libxml2 bug that we needed to work around a year ago is presumably no longer a factor, and the argument that xmlParseBalancedChunkMemory is semi-deprecated is not enough to justify a functionality regression. Hence, go back to doing it the old way. Reported-by: Michael Paquier Author: Michael Paquier Co-authored-by: Erik Wienhold Reviewed-by: Tom Lane Discussion: https://postgr.es/m/aIGknLuc8b8ega2X@paquier.xyz Backpatch-through: 13 --- src/backend/utils/adt/xml.c | 68 ++++++++++++++++--------------------- 1 file changed, 30 insertions(+), 38 deletions(-) diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c index f7b731825fca0..3379d3922606a 100644 --- a/src/backend/utils/adt/xml.c +++ b/src/backend/utils/adt/xml.c @@ -1769,7 +1769,7 @@ xml_doctype_in_content(const xmlChar *str) * xmloption_arg, but a DOCTYPE node in the input can force DOCUMENT mode). * * If parsed_nodes isn't NULL and we parse in CONTENT mode, the list - * of parsed nodes from the xmlParseInNodeContext call will be returned + * of parsed nodes from the xmlParseBalancedChunkMemory call will be returned * to *parsed_nodes. (It is caller's responsibility to free that.) * * Errors normally result in ereport(ERROR), but if escontext is an @@ -1795,6 +1795,7 @@ xml_parse(text *data, XmlOptionType xmloption_arg, PgXmlErrorContext *xmlerrcxt; volatile xmlParserCtxtPtr ctxt = NULL; volatile xmlDocPtr doc = NULL; + volatile int save_keep_blanks = -1; /* * This step looks annoyingly redundant, but we must do it to have a @@ -1822,7 +1823,6 @@ xml_parse(text *data, XmlOptionType xmloption_arg, PG_TRY(); { bool parse_as_document = false; - int options; int res_code; size_t count = 0; xmlChar *version = NULL; @@ -1853,18 +1853,6 @@ xml_parse(text *data, XmlOptionType xmloption_arg, parse_as_document = true; } - /* - * Select parse options. - * - * Note that here we try to apply DTD defaults (XML_PARSE_DTDATTR) - * according to SQL/XML:2008 GR 10.16.7.d: 'Default values defined by - * internal DTD are applied'. As for external DTDs, we try to support - * them too (see SQL/XML:2008 GR 10.16.7.e), but that doesn't really - * happen because xmlPgEntityLoader prevents it. - */ - options = XML_PARSE_NOENT | XML_PARSE_DTDATTR - | (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS); - /* initialize output parameters */ if (parsed_xmloptiontype != NULL) *parsed_xmloptiontype = parse_as_document ? XMLOPTION_DOCUMENT : @@ -1874,11 +1862,26 @@ xml_parse(text *data, XmlOptionType xmloption_arg, if (parse_as_document) { + int options; + + /* set up parser context used by xmlCtxtReadDoc */ ctxt = xmlNewParserCtxt(); if (ctxt == NULL || xmlerrcxt->err_occurred) xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, "could not allocate parser context"); + /* + * Select parse options. + * + * Note that here we try to apply DTD defaults (XML_PARSE_DTDATTR) + * according to SQL/XML:2008 GR 10.16.7.d: 'Default values defined + * by internal DTD are applied'. As for external DTDs, we try to + * support them too (see SQL/XML:2008 GR 10.16.7.e), but that + * doesn't really happen because xmlPgEntityLoader prevents it. + */ + options = XML_PARSE_NOENT | XML_PARSE_DTDATTR + | (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS); + doc = xmlCtxtReadDoc(ctxt, utf8string, NULL, /* no URL */ "UTF-8", @@ -1900,10 +1903,7 @@ xml_parse(text *data, XmlOptionType xmloption_arg, } else { - xmlNodePtr root; - xmlNodePtr oldroot PG_USED_FOR_ASSERTS_ONLY; - - /* set up document with empty root node to be the context node */ + /* set up document that xmlParseBalancedChunkMemory will add to */ doc = xmlNewDoc(version); if (doc == NULL || xmlerrcxt->err_occurred) xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, @@ -1916,36 +1916,23 @@ xml_parse(text *data, XmlOptionType xmloption_arg, "could not allocate XML document"); doc->standalone = standalone; - root = xmlNewNode(NULL, (const xmlChar *) "content-root"); - if (root == NULL || xmlerrcxt->err_occurred) - xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, - "could not allocate xml node"); - - /* - * This attaches root to doc, so we need not free it separately; - * and there can't yet be any old root to free. - */ - oldroot = xmlDocSetRootElement(doc, root); - Assert(oldroot == NULL); + /* set parse options --- have to do this the ugly way */ + save_keep_blanks = xmlKeepBlanksDefault(preserve_whitespace ? 1 : 0); /* allow empty content */ if (*(utf8string + count)) { xmlNodePtr node_list = NULL; - xmlParserErrors res; - - res = xmlParseInNodeContext(root, - (char *) utf8string + count, - strlen((char *) utf8string + count), - options, - &node_list); - if (res != XML_ERR_OK || xmlerrcxt->err_occurred) + res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0, + utf8string + count, + &node_list); + if (res_code != 0 || xmlerrcxt->err_occurred) { - xmlFreeNodeList(node_list); xml_errsave(escontext, xmlerrcxt, ERRCODE_INVALID_XML_CONTENT, "invalid XML content"); + xmlFreeNodeList(node_list); goto fail; } @@ -1961,6 +1948,8 @@ xml_parse(text *data, XmlOptionType xmloption_arg, } PG_CATCH(); { + if (save_keep_blanks != -1) + xmlKeepBlanksDefault(save_keep_blanks); if (doc != NULL) xmlFreeDoc(doc); if (ctxt != NULL) @@ -1972,6 +1961,9 @@ xml_parse(text *data, XmlOptionType xmloption_arg, } PG_END_TRY(); + if (save_keep_blanks != -1) + xmlKeepBlanksDefault(save_keep_blanks); + if (ctxt != NULL) xmlFreeParserCtxt(ctxt); From 4bc62b86849065939a6b85273fece6b92d6e97bf Mon Sep 17 00:00:00 2001 From: David Rowley Date: Tue, 29 Jul 2025 15:18:01 +1200 Subject: [PATCH 384/602] Display Memoize planner estimates in EXPLAIN There've been a few complaints that it can be overly difficult to figure out why the planner picked a Memoize plan. To help address that, here we adjust the EXPLAIN output to display the following additional details: 1) The estimated number of cache entries that can be stored at once 2) The estimated number of unique lookup keys that we expect to see 3) The number of lookups we expect 4) The estimated hit ratio Technically #4 can be calculated using #1, #2 and #3, but it's not a particularly obvious calculation, so we opt to display it explicitly. The original patch by Lukas Fittl only displayed the hit ratio, but there was a fear that might lead to more questions about how that was calculated. The idea with displaying all 4 is to be transparent which may allow queries to be tuned more easily. For example, if #2 isn't correct then maybe extended statistics or a manual n_distinct estimate can be used to help fix poor plan choices. Author: Ilia Evdokimov Author: Lukas Fittl Reviewed-by: David Rowley Reviewed-by: Andrei Lepikhov Reviewed-by: Robert Haas Discussion: https://postgr.es/m/CAP53Pky29GWAVVk3oBgKBDqhND0BRBN6yTPeguV_qSivFL5N_g%40mail.gmail.com --- src/backend/commands/explain.c | 21 ++++++++++++++++++++- src/backend/optimizer/path/costsize.c | 18 ++++++++++++------ src/backend/optimizer/plan/createplan.c | 15 ++++++++++++--- src/backend/optimizer/util/pathnode.c | 11 ++++++++--- src/include/nodes/pathnodes.h | 4 +++- src/include/nodes/plannodes.h | 10 ++++++++++ src/include/optimizer/pathnode.h | 2 +- 7 files changed, 66 insertions(+), 15 deletions(-) diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index 7e2792ead715b..8345bc0264b23 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -3582,6 +3582,7 @@ static void show_memoize_info(MemoizeState *mstate, List *ancestors, ExplainState *es) { Plan *plan = ((PlanState *) mstate)->plan; + Memoize *mplan = (Memoize *) plan; ListCell *lc; List *context; StringInfoData keystr; @@ -3602,7 +3603,7 @@ show_memoize_info(MemoizeState *mstate, List *ancestors, ExplainState *es) plan, ancestors); - foreach(lc, ((Memoize *) plan)->param_exprs) + foreach(lc, mplan->param_exprs) { Node *expr = (Node *) lfirst(lc); @@ -3618,6 +3619,24 @@ show_memoize_info(MemoizeState *mstate, List *ancestors, ExplainState *es) pfree(keystr.data); + if (es->costs) + { + if (es->format == EXPLAIN_FORMAT_TEXT) + { + ExplainIndentText(es); + appendStringInfo(es->str, "Estimates: capacity=%u distinct keys=%.0f lookups=%.0f hit percent=%.2f%%\n", + mplan->est_entries, mplan->est_unique_keys, + mplan->est_calls, mplan->est_hit_ratio * 100.0); + } + else + { + ExplainPropertyUInteger("Estimated Capacity", NULL, mplan->est_entries, es); + ExplainPropertyFloat("Estimated Distinct Lookup Keys", NULL, mplan->est_unique_keys, 0, es); + ExplainPropertyFloat("Estimated Lookups", NULL, mplan->est_calls, 0, es); + ExplainPropertyFloat("Estimated Hit Percent", NULL, mplan->est_hit_ratio * 100.0, 2, es); + } + } + if (!es->analyze) return; diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 1f04a2c182ca9..344a3188317b1 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -2572,13 +2572,13 @@ cost_memoize_rescan(PlannerInfo *root, MemoizePath *mpath, Cost input_startup_cost = mpath->subpath->startup_cost; Cost input_total_cost = mpath->subpath->total_cost; double tuples = mpath->subpath->rows; - double calls = mpath->calls; + Cardinality est_calls = mpath->est_calls; int width = mpath->subpath->pathtarget->width; double hash_mem_bytes; double est_entry_bytes; - double est_cache_entries; - double ndistinct; + Cardinality est_cache_entries; + Cardinality ndistinct; double evict_ratio; double hit_ratio; Cost startup_cost; @@ -2604,7 +2604,7 @@ cost_memoize_rescan(PlannerInfo *root, MemoizePath *mpath, est_cache_entries = floor(hash_mem_bytes / est_entry_bytes); /* estimate on the distinct number of parameter values */ - ndistinct = estimate_num_groups(root, mpath->param_exprs, calls, NULL, + ndistinct = estimate_num_groups(root, mpath->param_exprs, est_calls, NULL, &estinfo); /* @@ -2616,7 +2616,10 @@ cost_memoize_rescan(PlannerInfo *root, MemoizePath *mpath, * certainly mean a MemoizePath will never survive add_path(). */ if ((estinfo.flags & SELFLAG_USED_DEFAULT) != 0) - ndistinct = calls; + ndistinct = est_calls; + + /* Remember the ndistinct estimate for EXPLAIN */ + mpath->est_unique_keys = ndistinct; /* * Since we've already estimated the maximum number of entries we can @@ -2644,9 +2647,12 @@ cost_memoize_rescan(PlannerInfo *root, MemoizePath *mpath, * must look at how many scans are estimated in total for this node and * how many of those scans we expect to get a cache hit. */ - hit_ratio = ((calls - ndistinct) / calls) * + hit_ratio = ((est_calls - ndistinct) / est_calls) * (est_cache_entries / Max(ndistinct, est_cache_entries)); + /* Remember the hit ratio estimate for EXPLAIN */ + mpath->est_hit_ratio = hit_ratio; + Assert(hit_ratio >= 0 && hit_ratio <= 1.0); /* diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 8a9f1d7a943a8..bfefc7dbea106 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -284,7 +284,10 @@ static Material *make_material(Plan *lefttree); static Memoize *make_memoize(Plan *lefttree, Oid *hashoperators, Oid *collations, List *param_exprs, bool singlerow, bool binary_mode, - uint32 est_entries, Bitmapset *keyparamids); + uint32 est_entries, Bitmapset *keyparamids, + Cardinality est_calls, + Cardinality est_unique_keys, + double est_hit_ratio); static WindowAgg *make_windowagg(List *tlist, WindowClause *wc, int partNumCols, AttrNumber *partColIdx, Oid *partOperators, Oid *partCollations, int ordNumCols, AttrNumber *ordColIdx, Oid *ordOperators, Oid *ordCollations, @@ -1753,7 +1756,8 @@ create_memoize_plan(PlannerInfo *root, MemoizePath *best_path, int flags) plan = make_memoize(subplan, operators, collations, param_exprs, best_path->singlerow, best_path->binary_mode, - best_path->est_entries, keyparamids); + best_path->est_entries, keyparamids, best_path->est_calls, + best_path->est_unique_keys, best_path->est_hit_ratio); copy_generic_path_info(&plan->plan, (Path *) best_path); @@ -6749,7 +6753,9 @@ materialize_finished_plan(Plan *subplan) static Memoize * make_memoize(Plan *lefttree, Oid *hashoperators, Oid *collations, List *param_exprs, bool singlerow, bool binary_mode, - uint32 est_entries, Bitmapset *keyparamids) + uint32 est_entries, Bitmapset *keyparamids, + Cardinality est_calls, Cardinality est_unique_keys, + double est_hit_ratio) { Memoize *node = makeNode(Memoize); Plan *plan = &node->plan; @@ -6767,6 +6773,9 @@ make_memoize(Plan *lefttree, Oid *hashoperators, Oid *collations, node->binary_mode = binary_mode; node->est_entries = est_entries; node->keyparamids = keyparamids; + node->est_calls = est_calls; + node->est_unique_keys = est_unique_keys; + node->est_hit_ratio = est_hit_ratio; return node; } diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 9cc602788eaae..a4c5867cdcb84 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -1689,7 +1689,7 @@ create_material_path(RelOptInfo *rel, Path *subpath) MemoizePath * create_memoize_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, List *param_exprs, List *hash_operators, - bool singlerow, bool binary_mode, double calls) + bool singlerow, bool binary_mode, Cardinality est_calls) { MemoizePath *pathnode = makeNode(MemoizePath); @@ -1710,7 +1710,6 @@ create_memoize_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, pathnode->param_exprs = param_exprs; pathnode->singlerow = singlerow; pathnode->binary_mode = binary_mode; - pathnode->calls = clamp_row_est(calls); /* * For now we set est_entries to 0. cost_memoize_rescan() does all the @@ -1720,6 +1719,12 @@ create_memoize_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, */ pathnode->est_entries = 0; + pathnode->est_calls = clamp_row_est(est_calls); + + /* These will also be set later in cost_memoize_rescan() */ + pathnode->est_unique_keys = 0.0; + pathnode->est_hit_ratio = 0.0; + /* we should not generate this path type when enable_memoize=false */ Assert(enable_memoize); pathnode->path.disabled_nodes = subpath->disabled_nodes; @@ -4259,7 +4264,7 @@ reparameterize_path(PlannerInfo *root, Path *path, mpath->hash_operators, mpath->singlerow, mpath->binary_mode, - mpath->calls); + mpath->est_calls); } default: break; diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index e5dd15098f635..ad2726f026f7d 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -2133,10 +2133,12 @@ typedef struct MemoizePath * complete after caching the first record. */ bool binary_mode; /* true when cache key should be compared bit * by bit, false when using hash equality ops */ - Cardinality calls; /* expected number of rescans */ uint32 est_entries; /* The maximum number of entries that the * planner expects will fit in the cache, or 0 * if unknown */ + Cardinality est_calls; /* expected number of rescans */ + Cardinality est_unique_keys; /* estimated unique keys, for EXPLAIN */ + double est_hit_ratio; /* estimated cache hit ratio, for EXPLAIN */ } MemoizePath; /* diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index 46e2e09ea35be..6d8e1e99db3bd 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -1073,6 +1073,16 @@ typedef struct Memoize /* paramids from param_exprs */ Bitmapset *keyparamids; + + /* Estimated number of rescans, for EXPLAIN */ + Cardinality est_calls; + + /* Estimated number of distinct lookup keys, for EXPLAIN */ + Cardinality est_unique_keys; + + /* Estimated cache hit ratio, for EXPLAIN */ + double est_hit_ratio; + } Memoize; /* ---------------- diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h index 60dcdb77e41be..58936e963cb6b 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -90,7 +90,7 @@ extern MemoizePath *create_memoize_path(PlannerInfo *root, List *hash_operators, bool singlerow, bool binary_mode, - double calls); + Cardinality est_calls); extern UniquePath *create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, SpecialJoinInfo *sjinfo); extern GatherPath *create_gather_path(PlannerInfo *root, From c2c2c7e225669e81f83a5db3f0f57131cdaa4a2d Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Tue, 29 Jul 2025 10:41:13 +0300 Subject: [PATCH 385/602] Clarify documentation for the initcap function This commit documents differences in the definition of word separators for the initcap function between libc and ICU locale providers. Backpatch to all supported branches. Discussion: https://postgr.es/m/804cc10ef95d4d3b298e76b181fd9437%40postgrespro.ru Author: Oleg Tselebrovskiy Backpatch-through: 13 --- doc/src/sgml/func.sgml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index de5b5929ee078..74a16af04ad3b 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -3148,8 +3148,11 @@ SELECT NOT(ROW(table.*) IS NOT NULL) FROM TABLE; -- detect at least one null in Converts the first letter of each word to upper case and the - rest to lower case. Words are sequences of alphanumeric - characters separated by non-alphanumeric characters. + rest to lower case. When using the libc locale + provider, words are sequences of alphanumeric characters separated + by non-alphanumeric characters; when using the ICU locale provider, + words are separated according to + Unicode Standard Annex #29. initcap('hi THOMAS') From cb833c1b6d19507b13a1a852feea4dbe5d6f0c20 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Tue, 29 Jul 2025 17:03:07 +0900 Subject: [PATCH 386/602] Handle timeout in PostgreSQL::Test::Cluster::is_alive() This commit adds an extra --timeout=PG_TEST_TIMEOUT_DEFAULT to the call of pg_isready done in is_alive(), so as it is possible to have more leverage with the call on machines constrained on resources. By default the timeout is 180s, and it can be changed depending on the environment where the tests are run. Per buildfarm member mamba, where the default timeout of 3s used by pg_isready has proved that it may not be enough as the postmaster may not have the time it needs to reply to a ping request. Reported-by: Alexander Lakhin Reviewed-by: Nazir Bilal Yavuz Discussion: https://postgr.es/m/29b637df-f818-4b52-986a-f11ba28300e9@gmail.com --- src/test/perl/PostgreSQL/Test/Cluster.pm | 1 + 1 file changed, 1 insertion(+) diff --git a/src/test/perl/PostgreSQL/Test/Cluster.pm b/src/test/perl/PostgreSQL/Test/Cluster.pm index 61f68e0cc2e51..35413f140198b 100644 --- a/src/test/perl/PostgreSQL/Test/Cluster.pm +++ b/src/test/perl/PostgreSQL/Test/Cluster.pm @@ -304,6 +304,7 @@ sub is_alive my $ret = PostgreSQL::Test::Utils::system_log( 'pg_isready', + '--timeout' => $PostgreSQL::Test::Utils::timeout_default, '--host' => $self->host, '--port' => $self->port); From cc321b1d1c55fe208a394b0f8e0e99c5fb91742c Mon Sep 17 00:00:00 2001 From: Fujii Masao Date: Tue, 29 Jul 2025 19:43:10 +0900 Subject: [PATCH 387/602] Add regression test for background worker restart after crash. Previously, if a background worker crashed and the server restarted with restart_after_crash enabled, the worker was not restarted as expected. This issue was fixed by commit b5d084c5353, which ensures that background workers without the never-restart flag are correctly restarted after a crash-and-restart cycle. To guard against regressions, this commit adds a test that verifies a background worker successfully restarts in such a scenario. Author: Fujii Masao Reviewed-by: ChangAo Chen Discussion: https://postgr.es/m/CAHGQGwHF-PdUOgiXCH_8K5qBm8b13h0Qt=dSoFXZybXQdbf-tw@mail.gmail.com --- src/test/recovery/t/013_crash_restart.pl | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/test/recovery/t/013_crash_restart.pl b/src/test/recovery/t/013_crash_restart.pl index debfa635c36fe..4c5af018ee44e 100644 --- a/src/test/recovery/t/013_crash_restart.pl +++ b/src/test/recovery/t/013_crash_restart.pl @@ -228,6 +228,13 @@ 'before-orderly-restart', 'can still write after crash restart'); +# Confirm that the logical replication launcher, a background worker +# without the never-restart flag, has also restarted successfully. +is($node->poll_query_until('postgres', + "SELECT count(*) = 1 FROM pg_stat_activity WHERE backend_type = 'logical replication launcher'"), + '1', + 'logical replication launcher restarted after crash'); + # Just to be sure, check that an orderly restart now still works $node->restart(); From 1d1612aec7688139e1a5506df1366b4b6a69605d Mon Sep 17 00:00:00 2001 From: Robert Haas Date: Tue, 29 Jul 2025 09:09:42 -0400 Subject: [PATCH 388/602] Run pgindent. Per buildfarm member koel, Nathan Bossart, and David Rowley. --- src/backend/storage/buffer/bufmgr.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 9c6fe587ec940..67431208e7f5f 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -2744,9 +2744,9 @@ ExtendBufferedRelShared(BufferManagerRelation bmr, * zero_damaged_pages is ON) and so a previous attempt to read a block * beyond EOF could have left a "valid" zero-filled buffer. * - * This has also been observed when relation was overwritten by external - * process. Since the legitimate cases should always have left a - * zero-filled buffer, complain if not PageIsNew. + * This has also been observed when relation was overwritten by + * external process. Since the legitimate cases should always have + * left a zero-filled buffer, complain if not PageIsNew. */ if (existing_id >= 0) { From b9ebb92bcb6e0db111deacfbc14f470ce1b3ed8d Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Tue, 29 Jul 2025 09:42:22 -0400 Subject: [PATCH 389/602] Suppress uninitialized-variable warning. In the wake of commit 80aa9848b, a few compilers think that postgresAcquireSampleRowsFunc's "reltuples" might be used uninitialized. The logic is visibly correct, both before and after that change; presumably what happened here is that the previous presence of a setjmp() in the function stopped them from attempting any flow analysis at all. Add a dummy initialization to silence the warning. Reported-by: Ashutosh Bapat Author: Tom Lane Discussion: https://postgr.es/m/CAExHW5tkerCufA_F6oct5dMJ61N+yVrVgYXL7M8dD-5_zXjrDw@mail.gmail.com --- contrib/postgres_fdw/postgres_fdw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c index 25b287be069fa..9d266b3e2b120 100644 --- a/contrib/postgres_fdw/postgres_fdw.c +++ b/contrib/postgres_fdw/postgres_fdw.c @@ -5018,7 +5018,7 @@ postgresAcquireSampleRowsFunc(Relation relation, int elevel, int server_version_num; PgFdwSamplingMethod method = ANALYZE_SAMPLE_AUTO; /* auto is default */ double sample_frac = -1.0; - double reltuples; + double reltuples = -1.0; unsigned int cursor_number; StringInfoData sql; PGresult *res; From 74e121c8dc5184318478dee587cf7d8303ab1357 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Tue, 29 Jul 2025 10:35:01 -0400 Subject: [PATCH 390/602] Split up pgfdw_report_error so that we can mark it pg_noreturn. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pgfdw_report_error has the same design fault as elog/ereport do, namely that it might or might not return depending on elevel. While those functions are too widely used to redesign, there are only about 30 call sites for pgfdw_report_error, and it's not exposed for extension use. So let's rethink it. Split it into pgfdw_report_error() which hard-wires ERROR elevel and is marked pg_noreturn, and pgfdw_report() which allows only elevels less than ERROR. (Thanks to Álvaro Herrera for suggesting this naming.) The motivation for doing this now is that in the wake of commit 80aa9848b, which removed a bunch of PG_TRYs from postgres_fdw, we're seeing more thorough flow analysis there from C compilers and Coverity. Marking pgfdw_report_error as noreturn where appropriate should help prevent false-positive complaints. We could alternatively have invented a macro wrapper similar to what we use for elog/ereport, but that code is sufficiently fragile that I didn't find it appetizing to make another copy. Since 80aa9848b already changed pgfdw_report_error's signature, this won't make back-patching any harder than it was already. Author: Tom Lane Discussion: https://postgr.es/m/420221.1753714491@sss.pgh.pa.us --- contrib/postgres_fdw/connection.c | 39 ++++++++++++++++++------- contrib/postgres_fdw/postgres_fdw.c | 44 ++++++++++++++--------------- contrib/postgres_fdw/postgres_fdw.h | 6 ++-- 3 files changed, 55 insertions(+), 34 deletions(-) diff --git a/contrib/postgres_fdw/connection.c b/contrib/postgres_fdw/connection.c index a33843fcf8531..e8148f2c5a223 100644 --- a/contrib/postgres_fdw/connection.c +++ b/contrib/postgres_fdw/connection.c @@ -142,6 +142,8 @@ static void do_sql_command_begin(PGconn *conn, const char *sql); static void do_sql_command_end(PGconn *conn, const char *sql, bool consume_input); static void begin_remote_xact(ConnCacheEntry *entry); +static void pgfdw_report_internal(int elevel, PGresult *res, PGconn *conn, + const char *sql); static void pgfdw_xact_callback(XactEvent event, void *arg); static void pgfdw_subxact_callback(SubXactEvent event, SubTransactionId mySubid, @@ -815,7 +817,7 @@ static void do_sql_command_begin(PGconn *conn, const char *sql) { if (!PQsendQuery(conn, sql)) - pgfdw_report_error(ERROR, NULL, conn, sql); + pgfdw_report_error(NULL, conn, sql); } static void @@ -830,10 +832,10 @@ do_sql_command_end(PGconn *conn, const char *sql, bool consume_input) * would be large compared to the overhead of PQconsumeInput.) */ if (consume_input && !PQconsumeInput(conn)) - pgfdw_report_error(ERROR, NULL, conn, sql); + pgfdw_report_error(NULL, conn, sql); res = pgfdw_get_result(conn); if (PQresultStatus(res) != PGRES_COMMAND_OK) - pgfdw_report_error(ERROR, res, conn, sql); + pgfdw_report_error(res, conn, sql); PQclear(res); } @@ -966,7 +968,10 @@ pgfdw_get_result(PGconn *conn) /* * Report an error we got from the remote server. * - * elevel: error level to use (typically ERROR, but might be less) + * Callers should use pgfdw_report_error() to throw an error, or use + * pgfdw_report() for lesser message levels. (We make this distinction + * so that pgfdw_report_error() can be marked noreturn.) + * * res: PGresult containing the error (might be NULL) * conn: connection we did the query on * sql: NULL, or text of remote command we tried to execute @@ -979,8 +984,22 @@ pgfdw_get_result(PGconn *conn) * marked with have_error = true. */ void -pgfdw_report_error(int elevel, PGresult *res, PGconn *conn, - const char *sql) +pgfdw_report_error(PGresult *res, PGconn *conn, const char *sql) +{ + pgfdw_report_internal(ERROR, res, conn, sql); + pg_unreachable(); +} + +void +pgfdw_report(int elevel, PGresult *res, PGconn *conn, const char *sql) +{ + Assert(elevel < ERROR); /* use pgfdw_report_error for that */ + pgfdw_report_internal(elevel, res, conn, sql); +} + +static void +pgfdw_report_internal(int elevel, PGresult *res, PGconn *conn, + const char *sql) { char *diag_sqlstate = PQresultErrorField(res, PG_DIAG_SQLSTATE); char *message_primary = PQresultErrorField(res, PG_DIAG_MESSAGE_PRIMARY); @@ -1538,7 +1557,7 @@ pgfdw_exec_cleanup_query_begin(PGconn *conn, const char *query) */ if (!PQsendQuery(conn, query)) { - pgfdw_report_error(WARNING, NULL, conn, query); + pgfdw_report(WARNING, NULL, conn, query); return false; } @@ -1563,7 +1582,7 @@ pgfdw_exec_cleanup_query_end(PGconn *conn, const char *query, */ if (consume_input && !PQconsumeInput(conn)) { - pgfdw_report_error(WARNING, NULL, conn, query); + pgfdw_report(WARNING, NULL, conn, query); return false; } @@ -1575,7 +1594,7 @@ pgfdw_exec_cleanup_query_end(PGconn *conn, const char *query, (errmsg("could not get query result due to timeout"), errcontext("remote SQL command: %s", query))); else - pgfdw_report_error(WARNING, NULL, conn, query); + pgfdw_report(WARNING, NULL, conn, query); return false; } @@ -1583,7 +1602,7 @@ pgfdw_exec_cleanup_query_end(PGconn *conn, const char *query, /* Issue a warning if not successful. */ if (PQresultStatus(result) != PGRES_COMMAND_OK) { - pgfdw_report_error(WARNING, result, conn, query); + pgfdw_report(WARNING, result, conn, query); return ignore_errors; } PQclear(result); diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c index 9d266b3e2b120..456b267f70b5b 100644 --- a/contrib/postgres_fdw/postgres_fdw.c +++ b/contrib/postgres_fdw/postgres_fdw.c @@ -1704,7 +1704,7 @@ postgresReScanForeignScan(ForeignScanState *node) res = pgfdw_exec_query(fsstate->conn, sql, fsstate->conn_state); if (PQresultStatus(res) != PGRES_COMMAND_OK) - pgfdw_report_error(ERROR, res, fsstate->conn, sql); + pgfdw_report_error(res, fsstate->conn, sql); PQclear(res); /* Now force a fresh FETCH. */ @@ -3614,7 +3614,7 @@ get_remote_estimate(const char *sql, PGconn *conn, */ res = pgfdw_exec_query(conn, sql, NULL); if (PQresultStatus(res) != PGRES_TUPLES_OK) - pgfdw_report_error(ERROR, res, conn, sql); + pgfdw_report_error(res, conn, sql); /* * Extract cost numbers for topmost plan node. Note we search for a left @@ -3769,14 +3769,14 @@ create_cursor(ForeignScanState *node) */ if (!PQsendQueryParams(conn, buf.data, numParams, NULL, values, NULL, NULL, 0)) - pgfdw_report_error(ERROR, NULL, conn, buf.data); + pgfdw_report_error(NULL, conn, buf.data); /* * Get the result, and check for success. */ res = pgfdw_get_result(conn); if (PQresultStatus(res) != PGRES_COMMAND_OK) - pgfdw_report_error(ERROR, res, conn, fsstate->query); + pgfdw_report_error(res, conn, fsstate->query); PQclear(res); /* Mark the cursor as created, and show no tuples have been retrieved */ @@ -3823,7 +3823,7 @@ fetch_more_data(ForeignScanState *node) res = pgfdw_get_result(conn); /* On error, report the original query, not the FETCH. */ if (PQresultStatus(res) != PGRES_TUPLES_OK) - pgfdw_report_error(ERROR, res, conn, fsstate->query); + pgfdw_report_error(res, conn, fsstate->query); /* Reset per-connection state */ fsstate->conn_state->pendingAreq = NULL; @@ -3839,7 +3839,7 @@ fetch_more_data(ForeignScanState *node) res = pgfdw_exec_query(conn, sql, fsstate->conn_state); /* On error, report the original query, not the FETCH. */ if (PQresultStatus(res) != PGRES_TUPLES_OK) - pgfdw_report_error(ERROR, res, conn, fsstate->query); + pgfdw_report_error(res, conn, fsstate->query); } /* Convert the data into HeapTuples */ @@ -3944,7 +3944,7 @@ close_cursor(PGconn *conn, unsigned int cursor_number, snprintf(sql, sizeof(sql), "CLOSE c%u", cursor_number); res = pgfdw_exec_query(conn, sql, conn_state); if (PQresultStatus(res) != PGRES_COMMAND_OK) - pgfdw_report_error(ERROR, res, conn, sql); + pgfdw_report_error(res, conn, sql); PQclear(res); } @@ -4152,7 +4152,7 @@ execute_foreign_modify(EState *estate, NULL, NULL, 0)) - pgfdw_report_error(ERROR, NULL, fmstate->conn, fmstate->query); + pgfdw_report_error(NULL, fmstate->conn, fmstate->query); /* * Get the result, and check for success. @@ -4160,7 +4160,7 @@ execute_foreign_modify(EState *estate, res = pgfdw_get_result(fmstate->conn); if (PQresultStatus(res) != (fmstate->has_returning ? PGRES_TUPLES_OK : PGRES_COMMAND_OK)) - pgfdw_report_error(ERROR, res, fmstate->conn, fmstate->query); + pgfdw_report_error(res, fmstate->conn, fmstate->query); /* Check number of rows affected, and fetch RETURNING tuple if any */ if (fmstate->has_returning) @@ -4219,14 +4219,14 @@ prepare_foreign_modify(PgFdwModifyState *fmstate) fmstate->query, 0, NULL)) - pgfdw_report_error(ERROR, NULL, fmstate->conn, fmstate->query); + pgfdw_report_error(NULL, fmstate->conn, fmstate->query); /* * Get the result, and check for success. */ res = pgfdw_get_result(fmstate->conn); if (PQresultStatus(res) != PGRES_COMMAND_OK) - pgfdw_report_error(ERROR, res, fmstate->conn, fmstate->query); + pgfdw_report_error(res, fmstate->conn, fmstate->query); PQclear(res); /* This action shows that the prepare has been done. */ @@ -4373,7 +4373,7 @@ deallocate_query(PgFdwModifyState *fmstate) snprintf(sql, sizeof(sql), "DEALLOCATE %s", fmstate->p_name); res = pgfdw_exec_query(fmstate->conn, sql, fmstate->conn_state); if (PQresultStatus(res) != PGRES_COMMAND_OK) - pgfdw_report_error(ERROR, res, fmstate->conn, sql); + pgfdw_report_error(res, fmstate->conn, sql); PQclear(res); pfree(fmstate->p_name); fmstate->p_name = NULL; @@ -4541,7 +4541,7 @@ execute_dml_stmt(ForeignScanState *node) */ if (!PQsendQueryParams(dmstate->conn, dmstate->query, numParams, NULL, values, NULL, NULL, 0)) - pgfdw_report_error(ERROR, NULL, dmstate->conn, dmstate->query); + pgfdw_report_error(NULL, dmstate->conn, dmstate->query); /* * Get the result, and check for success. @@ -4549,7 +4549,7 @@ execute_dml_stmt(ForeignScanState *node) dmstate->result = pgfdw_get_result(dmstate->conn); if (PQresultStatus(dmstate->result) != (dmstate->has_returning ? PGRES_TUPLES_OK : PGRES_COMMAND_OK)) - pgfdw_report_error(ERROR, dmstate->result, dmstate->conn, + pgfdw_report_error(dmstate->result, dmstate->conn, dmstate->query); /* @@ -4923,7 +4923,7 @@ postgresAnalyzeForeignTable(Relation relation, res = pgfdw_exec_query(conn, sql.data, NULL); if (PQresultStatus(res) != PGRES_TUPLES_OK) - pgfdw_report_error(ERROR, res, conn, sql.data); + pgfdw_report_error(res, conn, sql.data); if (PQntuples(res) != 1 || PQnfields(res) != 1) elog(ERROR, "unexpected result from deparseAnalyzeSizeSql query"); @@ -4972,7 +4972,7 @@ postgresGetAnalyzeInfoForForeignTable(Relation relation, bool *can_tablesample) res = pgfdw_exec_query(conn, sql.data, NULL); if (PQresultStatus(res) != PGRES_TUPLES_OK) - pgfdw_report_error(ERROR, res, conn, sql.data); + pgfdw_report_error(res, conn, sql.data); if (PQntuples(res) != 1 || PQnfields(res) != 2) elog(ERROR, "unexpected result from deparseAnalyzeInfoSql query"); @@ -5202,7 +5202,7 @@ postgresAcquireSampleRowsFunc(Relation relation, int elevel, res = pgfdw_exec_query(conn, sql.data, NULL); if (PQresultStatus(res) != PGRES_COMMAND_OK) - pgfdw_report_error(ERROR, res, conn, sql.data); + pgfdw_report_error(res, conn, sql.data); PQclear(res); /* @@ -5254,7 +5254,7 @@ postgresAcquireSampleRowsFunc(Relation relation, int elevel, res = pgfdw_exec_query(conn, fetch_sql, NULL); /* On error, report the original query, not the FETCH. */ if (PQresultStatus(res) != PGRES_TUPLES_OK) - pgfdw_report_error(ERROR, res, conn, sql.data); + pgfdw_report_error(res, conn, sql.data); /* Process whatever we got. */ numrows = PQntuples(res); @@ -5426,7 +5426,7 @@ postgresImportForeignSchema(ImportForeignSchemaStmt *stmt, Oid serverOid) res = pgfdw_exec_query(conn, buf.data, NULL); if (PQresultStatus(res) != PGRES_TUPLES_OK) - pgfdw_report_error(ERROR, res, conn, buf.data); + pgfdw_report_error(res, conn, buf.data); if (PQntuples(res) != 1) ereport(ERROR, @@ -5540,7 +5540,7 @@ postgresImportForeignSchema(ImportForeignSchemaStmt *stmt, Oid serverOid) /* Fetch the data */ res = pgfdw_exec_query(conn, buf.data, NULL); if (PQresultStatus(res) != PGRES_TUPLES_OK) - pgfdw_report_error(ERROR, res, conn, buf.data); + pgfdw_report_error(res, conn, buf.data); /* Process results */ numrows = PQntuples(res); @@ -7312,7 +7312,7 @@ postgresForeignAsyncNotify(AsyncRequest *areq) /* On error, report the original query, not the FETCH. */ if (!PQconsumeInput(fsstate->conn)) - pgfdw_report_error(ERROR, NULL, fsstate->conn, fsstate->query); + pgfdw_report_error(NULL, fsstate->conn, fsstate->query); fetch_more_data(node); @@ -7411,7 +7411,7 @@ fetch_more_data_begin(AsyncRequest *areq) fsstate->fetch_size, fsstate->cursor_number); if (!PQsendQuery(fsstate->conn, sql)) - pgfdw_report_error(ERROR, NULL, fsstate->conn, fsstate->query); + pgfdw_report_error(NULL, fsstate->conn, fsstate->query); /* Remember that the request is in process */ fsstate->conn_state->pendingAreq = areq; diff --git a/contrib/postgres_fdw/postgres_fdw.h b/contrib/postgres_fdw/postgres_fdw.h index 38e1a88594131..e69735298d78f 100644 --- a/contrib/postgres_fdw/postgres_fdw.h +++ b/contrib/postgres_fdw/postgres_fdw.h @@ -166,8 +166,10 @@ extern void do_sql_command(PGconn *conn, const char *sql); extern PGresult *pgfdw_get_result(PGconn *conn); extern PGresult *pgfdw_exec_query(PGconn *conn, const char *query, PgFdwConnState *state); -extern void pgfdw_report_error(int elevel, PGresult *res, PGconn *conn, - const char *sql); +pg_noreturn extern void pgfdw_report_error(PGresult *res, PGconn *conn, + const char *sql); +extern void pgfdw_report(int elevel, PGresult *res, PGconn *conn, + const char *sql); /* in option.c */ extern int ExtractConnectionOptions(List *defelems, From 0f3a26feddae7ae403c90742095ff4626d7e5617 Mon Sep 17 00:00:00 2001 From: Nathan Bossart Date: Tue, 29 Jul 2025 10:32:53 -0500 Subject: [PATCH 391/602] Add commit 1d1612aec7 to .git-blame-ignore-revs. --- .git-blame-ignore-revs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs index f8526d4d1a9c2..f83e2fc658664 100644 --- a/.git-blame-ignore-revs +++ b/.git-blame-ignore-revs @@ -14,6 +14,9 @@ # # $ git log --pretty=format:"%H # %cd%n# %s" $PGINDENTGITHASH -1 --date=iso +1d1612aec7688139e1a5506df1366b4b6a69605d # 2025-07-29 09:10:41 -0400 +# Run pgindent. + 73873805fb3627cb23937c750fa83ffd8f16fc6c # 2025-07-25 16:36:44 -0400 # Run pgindent on the changes of the previous patch. From 902f922218894dd69df1874f9f130dbbafff0499 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Tue, 29 Jul 2025 12:47:19 -0400 Subject: [PATCH 392/602] Remove unnecessary complication around xmlParseBalancedChunkMemory. When I prepared 71c0921b6 et al yesterday, I was thinking that the logic involving explicitly freeing the node_list output was still needed to dodge leakage bugs in libxml2. But I was misremembering: we introduced that only because with early 2.13.x releases we could not trust xmlParseBalancedChunkMemory's result code, so we had to look to see if a node list was returned or not. There's no reason to believe that xmlParseBalancedChunkMemory will fail to clean up the node list when required, so simplify. (This essentially completes reverting all the non-cosmetic changes in 6082b3d5d.) Reported-by: Jim Jones Author: Tom Lane Discussion: https://postgr.es/m/997668.1753802857@sss.pgh.pa.us Backpatch-through: 13 --- src/backend/utils/adt/xml.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c index 3379d3922606a..182e8f75db75c 100644 --- a/src/backend/utils/adt/xml.c +++ b/src/backend/utils/adt/xml.c @@ -1922,24 +1922,16 @@ xml_parse(text *data, XmlOptionType xmloption_arg, /* allow empty content */ if (*(utf8string + count)) { - xmlNodePtr node_list = NULL; - res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0, utf8string + count, - &node_list); + parsed_nodes); if (res_code != 0 || xmlerrcxt->err_occurred) { xml_errsave(escontext, xmlerrcxt, ERRCODE_INVALID_XML_CONTENT, "invalid XML content"); - xmlFreeNodeList(node_list); goto fail; } - - if (parsed_nodes != NULL) - *parsed_nodes = node_list; - else - xmlFreeNodeList(node_list); } } From c3019bb778b99f2541779ed23402a8f825a0000b Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Tue, 29 Jul 2025 18:56:00 +0200 Subject: [PATCH 393/602] Update comment The code being referred to was moved to a different function in commit eb8312a22a8, so update the comment accordingly. --- src/backend/utils/adt/tid.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/backend/utils/adt/tid.c b/src/backend/utils/adt/tid.c index 1b0df1117171a..39dab3e42df58 100644 --- a/src/backend/utils/adt/tid.c +++ b/src/backend/utils/adt/tid.c @@ -84,7 +84,7 @@ tidin(PG_FUNCTION_ARGS) /* * Cope with possibility that unsigned long is wider than BlockNumber, in * which case strtoul will not raise an error for some values that are out - * of the range of BlockNumber. (See similar code in oidin().) + * of the range of BlockNumber. (See similar code in uint32in_subr().) */ #if SIZEOF_LONG > 4 if (cvt != (unsigned long) blockNumber && From 4300d8b6a79d61abb5ca9f901df7bde7a49322b6 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Tue, 29 Jul 2025 15:17:40 -0400 Subject: [PATCH 394/602] Don't put library-supplied -L/-I switches before user-supplied ones. For many optional libraries, we extract the -L and -l switches needed to link the library from a helper program such as llvm-config. In some cases we put the resulting -L switches into LDFLAGS ahead of -L switches specified via --with-libraries. That risks breaking the user's intention for --with-libraries. It's not such a problem if the library's -L switch points to a directory containing only that library, but on some platforms a library helper may "helpfully" offer a switch such as -L/usr/lib that points to a directory holding all standard libraries. If the user specified --with-libraries in hopes of overriding the standard build of some library, the -L/usr/lib switch prevents that from happening since it will come before the user-specified directory. To fix, avoid inserting these switches directly into LDFLAGS during configure, instead adding them to LIBDIRS or SHLIB_LINK. They will still eventually get added to LDFLAGS, but only after the switches coming from --with-libraries. The same problem exists for -I switches: those coming from --with-includes should appear before any coming from helper programs such as llvm-config. We have not heard field complaints about this case, but it seems certain that a user attempting to override a standard library could have issues. The changes for this go well beyond configure itself, however, because many Makefiles have occasion to manipulate CPPFLAGS to insert locally-desirable -I switches, and some of them got it wrong. The correct ordering is any -I switches pointing at within-the- source-tree-or-build-tree directories, then those from the tree-wide CPPFLAGS, then those from helper programs. There were several places that risked pulling in a system-supplied copy of libpq headers, for example, instead of the in-tree files. (Commit cb36f8ec2 fixed one instance of that a few months ago, but this exercise found more.) The Meson build scripts may or may not have any comparable problems, but I'll leave it to someone else to investigate that. Reported-by: Charles Samborski Author: Tom Lane Discussion: https://postgr.es/m/70f2155f-27ca-4534-b33d-7750e20633d7@demurgos.net Backpatch-through: 13 --- config/llvm.m4 | 4 ++-- config/programs.m4 | 4 ++-- configure | 24 ++++++++++++------------ configure.ac | 18 +++++++++--------- src/Makefile.global.in | 2 +- src/backend/jit/llvm/Makefile | 2 +- src/bin/initdb/Makefile | 2 +- src/common/Makefile | 2 +- src/interfaces/libpq-oauth/Makefile | 2 +- src/interfaces/libpq/Makefile | 2 +- src/pl/plpython/Makefile | 2 +- src/pl/tcl/Makefile | 2 +- 12 files changed, 33 insertions(+), 33 deletions(-) diff --git a/config/llvm.m4 b/config/llvm.m4 index fa4bedd9370fc..9d6fe8199e364 100644 --- a/config/llvm.m4 +++ b/config/llvm.m4 @@ -4,7 +4,7 @@ # ----------------- # # Look for the LLVM installation, check that it's new enough, set the -# corresponding LLVM_{CFLAGS,CXXFLAGS,BINPATH} and LDFLAGS +# corresponding LLVM_{CFLAGS,CXXFLAGS,BINPATH,LIBS} # variables. Also verify that CLANG is available, to transform C # into bitcode. # @@ -55,7 +55,7 @@ AC_DEFUN([PGAC_LLVM_SUPPORT], for pgac_option in `$LLVM_CONFIG --ldflags`; do case $pgac_option in - -L*) LDFLAGS="$LDFLAGS $pgac_option";; + -L*) LLVM_LIBS="$LLVM_LIBS $pgac_option";; esac done diff --git a/config/programs.m4 b/config/programs.m4 index c73d9307ea8a9..e57fe4907b844 100644 --- a/config/programs.m4 +++ b/config/programs.m4 @@ -290,8 +290,8 @@ AC_DEFUN([PGAC_CHECK_LIBCURL], pgac_save_LDFLAGS=$LDFLAGS pgac_save_LIBS=$LIBS - CPPFLAGS="$LIBCURL_CPPFLAGS $CPPFLAGS" - LDFLAGS="$LIBCURL_LDFLAGS $LDFLAGS" + CPPFLAGS="$CPPFLAGS $LIBCURL_CPPFLAGS" + LDFLAGS="$LDFLAGS $LIBCURL_LDFLAGS" AC_CHECK_HEADER(curl/curl.h, [], [AC_MSG_ERROR([header file is required for --with-libcurl])]) diff --git a/configure b/configure index 6d7c22e153fea..8a535da6b7a98 100755 --- a/configure +++ b/configure @@ -5194,7 +5194,7 @@ fi for pgac_option in `$LLVM_CONFIG --ldflags`; do case $pgac_option in - -L*) LDFLAGS="$LDFLAGS $pgac_option";; + -L*) LLVM_LIBS="$LLVM_LIBS $pgac_option";; esac done @@ -9436,12 +9436,12 @@ fi # Note the user could also set XML2_CFLAGS/XML2_LIBS directly for pgac_option in $XML2_CFLAGS; do case $pgac_option in - -I*|-D*) CPPFLAGS="$CPPFLAGS $pgac_option";; + -I*|-D*) INCLUDES="$INCLUDES $pgac_option";; esac done for pgac_option in $XML2_LIBS; do case $pgac_option in - -L*) LDFLAGS="$LDFLAGS $pgac_option";; + -L*) LIBDIRS="$LIBDIRS $pgac_option";; esac done fi @@ -9666,12 +9666,12 @@ fi # note that -llz4 will be added by AC_CHECK_LIB below. for pgac_option in $LZ4_CFLAGS; do case $pgac_option in - -I*|-D*) CPPFLAGS="$CPPFLAGS $pgac_option";; + -I*|-D*) INCLUDES="$INCLUDES $pgac_option";; esac done for pgac_option in $LZ4_LIBS; do case $pgac_option in - -L*) LDFLAGS="$LDFLAGS $pgac_option";; + -L*) LIBDIRS="$LIBDIRS $pgac_option";; esac done fi @@ -9807,12 +9807,12 @@ fi # note that -lzstd will be added by AC_CHECK_LIB below. for pgac_option in $ZSTD_CFLAGS; do case $pgac_option in - -I*|-D*) CPPFLAGS="$CPPFLAGS $pgac_option";; + -I*|-D*) INCLUDES="$INCLUDES $pgac_option";; esac done for pgac_option in $ZSTD_LIBS; do case $pgac_option in - -L*) LDFLAGS="$LDFLAGS $pgac_option";; + -L*) LIBDIRS="$LIBDIRS $pgac_option";; esac done fi @@ -12723,8 +12723,8 @@ if test "$with_libcurl" = yes ; then pgac_save_LDFLAGS=$LDFLAGS pgac_save_LIBS=$LIBS - CPPFLAGS="$LIBCURL_CPPFLAGS $CPPFLAGS" - LDFLAGS="$LIBCURL_LDFLAGS $LDFLAGS" + CPPFLAGS="$CPPFLAGS $LIBCURL_CPPFLAGS" + LDFLAGS="$LDFLAGS $LIBCURL_LDFLAGS" ac_fn_c_check_header_mongrel "$LINENO" "curl/curl.h" "ac_cv_header_curl_curl_h" "$ac_includes_default" if test "x$ac_cv_header_curl_curl_h" = xyes; then : @@ -16658,7 +16658,7 @@ fi if test "$with_icu" = yes; then ac_save_CPPFLAGS=$CPPFLAGS - CPPFLAGS="$ICU_CFLAGS $CPPFLAGS" + CPPFLAGS="$CPPFLAGS $ICU_CFLAGS" # Verify we have ICU's header files ac_fn_c_check_header_mongrel "$LINENO" "unicode/ucol.h" "ac_cv_header_unicode_ucol_h" "$ac_includes_default" @@ -18876,7 +18876,7 @@ Use --without-tcl to disable building PL/Tcl." "$LINENO" 5 fi # now that we have TCL_INCLUDE_SPEC, we can check for ac_save_CPPFLAGS=$CPPFLAGS - CPPFLAGS="$TCL_INCLUDE_SPEC $CPPFLAGS" + CPPFLAGS="$CPPFLAGS $TCL_INCLUDE_SPEC" ac_fn_c_check_header_mongrel "$LINENO" "tcl.h" "ac_cv_header_tcl_h" "$ac_includes_default" if test "x$ac_cv_header_tcl_h" = xyes; then : @@ -18945,7 +18945,7 @@ fi # check for if test "$with_python" = yes; then ac_save_CPPFLAGS=$CPPFLAGS - CPPFLAGS="$python_includespec $CPPFLAGS" + CPPFLAGS="$CPPFLAGS $python_includespec" ac_fn_c_check_header_mongrel "$LINENO" "Python.h" "ac_cv_header_Python_h" "$ac_includes_default" if test "x$ac_cv_header_Python_h" = xyes; then : diff --git a/configure.ac b/configure.ac index c2877e369350e..e72201e679b56 100644 --- a/configure.ac +++ b/configure.ac @@ -1103,12 +1103,12 @@ if test "$with_libxml" = yes ; then # Note the user could also set XML2_CFLAGS/XML2_LIBS directly for pgac_option in $XML2_CFLAGS; do case $pgac_option in - -I*|-D*) CPPFLAGS="$CPPFLAGS $pgac_option";; + -I*|-D*) INCLUDES="$INCLUDES $pgac_option";; esac done for pgac_option in $XML2_LIBS; do case $pgac_option in - -L*) LDFLAGS="$LDFLAGS $pgac_option";; + -L*) LIBDIRS="$LIBDIRS $pgac_option";; esac done fi @@ -1152,12 +1152,12 @@ if test "$with_lz4" = yes; then # note that -llz4 will be added by AC_CHECK_LIB below. for pgac_option in $LZ4_CFLAGS; do case $pgac_option in - -I*|-D*) CPPFLAGS="$CPPFLAGS $pgac_option";; + -I*|-D*) INCLUDES="$INCLUDES $pgac_option";; esac done for pgac_option in $LZ4_LIBS; do case $pgac_option in - -L*) LDFLAGS="$LDFLAGS $pgac_option";; + -L*) LIBDIRS="$LIBDIRS $pgac_option";; esac done fi @@ -1177,12 +1177,12 @@ if test "$with_zstd" = yes; then # note that -lzstd will be added by AC_CHECK_LIB below. for pgac_option in $ZSTD_CFLAGS; do case $pgac_option in - -I*|-D*) CPPFLAGS="$CPPFLAGS $pgac_option";; + -I*|-D*) INCLUDES="$INCLUDES $pgac_option";; esac done for pgac_option in $ZSTD_LIBS; do case $pgac_option in - -L*) LDFLAGS="$LDFLAGS $pgac_option";; + -L*) LIBDIRS="$LIBDIRS $pgac_option";; esac done fi @@ -1944,7 +1944,7 @@ fi if test "$with_icu" = yes; then ac_save_CPPFLAGS=$CPPFLAGS - CPPFLAGS="$ICU_CFLAGS $CPPFLAGS" + CPPFLAGS="$CPPFLAGS $ICU_CFLAGS" # Verify we have ICU's header files AC_CHECK_HEADER(unicode/ucol.h, [], @@ -2344,7 +2344,7 @@ Use --without-tcl to disable building PL/Tcl.]) fi # now that we have TCL_INCLUDE_SPEC, we can check for ac_save_CPPFLAGS=$CPPFLAGS - CPPFLAGS="$TCL_INCLUDE_SPEC $CPPFLAGS" + CPPFLAGS="$CPPFLAGS $TCL_INCLUDE_SPEC" AC_CHECK_HEADER(tcl.h, [], [AC_MSG_ERROR([header file is required for Tcl])]) CPPFLAGS=$ac_save_CPPFLAGS fi @@ -2381,7 +2381,7 @@ fi # check for if test "$with_python" = yes; then ac_save_CPPFLAGS=$CPPFLAGS - CPPFLAGS="$python_includespec $CPPFLAGS" + CPPFLAGS="$CPPFLAGS $python_includespec" AC_CHECK_HEADER(Python.h, [], [AC_MSG_ERROR([header file is required for Python])]) CPPFLAGS=$ac_save_CPPFLAGS fi diff --git a/src/Makefile.global.in b/src/Makefile.global.in index 04952b533ded9..8b1b357beaa04 100644 --- a/src/Makefile.global.in +++ b/src/Makefile.global.in @@ -254,7 +254,7 @@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ PG_SYSROOT = @PG_SYSROOT@ -override CPPFLAGS := $(ICU_CFLAGS) $(LIBNUMA_CFLAGS) $(LIBURING_CFLAGS) $(CPPFLAGS) +override CPPFLAGS += $(ICU_CFLAGS) $(LIBNUMA_CFLAGS) $(LIBURING_CFLAGS) ifdef PGXS override CPPFLAGS := -I$(includedir_server) -I$(includedir_internal) $(CPPFLAGS) diff --git a/src/backend/jit/llvm/Makefile b/src/backend/jit/llvm/Makefile index e8c12060b93df..68677ba42e189 100644 --- a/src/backend/jit/llvm/Makefile +++ b/src/backend/jit/llvm/Makefile @@ -31,7 +31,7 @@ endif # All files in this directory use LLVM. CFLAGS += $(LLVM_CFLAGS) CXXFLAGS += $(LLVM_CXXFLAGS) -override CPPFLAGS := $(LLVM_CPPFLAGS) $(CPPFLAGS) +override CPPFLAGS += $(LLVM_CPPFLAGS) SHLIB_LINK += $(LLVM_LIBS) # Because this module includes C++ files, we need to use a C++ diff --git a/src/bin/initdb/Makefile b/src/bin/initdb/Makefile index 997e0a013e956..c0470efda92a3 100644 --- a/src/bin/initdb/Makefile +++ b/src/bin/initdb/Makefile @@ -20,7 +20,7 @@ include $(top_builddir)/src/Makefile.global # from libpq, else we have risks of version skew if we run with a libpq # shared library from a different PG version. Define # USE_PRIVATE_ENCODING_FUNCS to ensure that that happens. -override CPPFLAGS := -DUSE_PRIVATE_ENCODING_FUNCS -I$(libpq_srcdir) -I$(top_srcdir)/src/timezone $(ICU_CFLAGS) $(CPPFLAGS) +override CPPFLAGS := -DUSE_PRIVATE_ENCODING_FUNCS -I$(libpq_srcdir) -I$(top_srcdir)/src/timezone $(CPPFLAGS) $(ICU_CFLAGS) # We need libpq only because fe_utils does. LDFLAGS_INTERNAL += -L$(top_builddir)/src/fe_utils -lpgfeutils $(libpq_pgport) $(ICU_LIBS) diff --git a/src/common/Makefile b/src/common/Makefile index 1e2b91c83c4c4..2c720caa50972 100644 --- a/src/common/Makefile +++ b/src/common/Makefile @@ -163,7 +163,7 @@ libpgcommon_shlib.a: $(OBJS_SHLIB) # The JSON API normally exits on out-of-memory; disable that behavior for shared # library builds. This requires libpq's pqexpbuffer.h. jsonapi_shlib.o: override CPPFLAGS += -DJSONAPI_USE_PQEXPBUFFER -jsonapi_shlib.o: override CPPFLAGS += -I$(libpq_srcdir) +jsonapi_shlib.o: override CPPFLAGS := -I$(libpq_srcdir) $(CPPFLAGS) # Because this uses its own compilation rule, it doesn't use the # dependency tracking logic from Makefile.global. To make sure that diff --git a/src/interfaces/libpq-oauth/Makefile b/src/interfaces/libpq-oauth/Makefile index 270fc0cf2d9d9..682f17413b3a4 100644 --- a/src/interfaces/libpq-oauth/Makefile +++ b/src/interfaces/libpq-oauth/Makefile @@ -24,7 +24,7 @@ NAME = pq-oauth-$(MAJORVERSION) override shlib := lib$(NAME)$(DLSUFFIX) override stlib := libpq-oauth.a -override CPPFLAGS := -I$(libpq_srcdir) -I$(top_builddir)/src/port $(LIBCURL_CPPFLAGS) $(CPPFLAGS) +override CPPFLAGS := -I$(libpq_srcdir) -I$(top_builddir)/src/port $(CPPFLAGS) $(LIBCURL_CPPFLAGS) OBJS = \ $(WIN32RES) diff --git a/src/interfaces/libpq/Makefile b/src/interfaces/libpq/Makefile index 47d6781150944..da6650066d46e 100644 --- a/src/interfaces/libpq/Makefile +++ b/src/interfaces/libpq/Makefile @@ -24,7 +24,7 @@ NAME= pq SO_MAJOR_VERSION= 5 SO_MINOR_VERSION= $(MAJORVERSION) -override CPPFLAGS := -I$(srcdir) $(CPPFLAGS) -I$(top_builddir)/src/port -I$(top_srcdir)/src/port +override CPPFLAGS := -I$(srcdir) -I$(top_builddir)/src/port -I$(top_srcdir)/src/port $(CPPFLAGS) ifneq ($(PORTNAME), win32) override CFLAGS += $(PTHREAD_CFLAGS) endif diff --git a/src/pl/plpython/Makefile b/src/pl/plpython/Makefile index f959083a0bdec..25f295c3709e2 100644 --- a/src/pl/plpython/Makefile +++ b/src/pl/plpython/Makefile @@ -11,7 +11,7 @@ ifeq ($(PORTNAME), win32) override python_libspec = endif -override CPPFLAGS := -I. -I$(srcdir) $(python_includespec) $(CPPFLAGS) +override CPPFLAGS := -I. -I$(srcdir) $(CPPFLAGS) $(python_includespec) rpathdir = $(python_libdir) diff --git a/src/pl/tcl/Makefile b/src/pl/tcl/Makefile index ea52a2efc229d..dd57f7d694c82 100644 --- a/src/pl/tcl/Makefile +++ b/src/pl/tcl/Makefile @@ -11,7 +11,7 @@ top_builddir = ../../.. include $(top_builddir)/src/Makefile.global -override CPPFLAGS := -I. -I$(srcdir) $(TCL_INCLUDE_SPEC) $(CPPFLAGS) +override CPPFLAGS := -I. -I$(srcdir) $(CPPFLAGS) $(TCL_INCLUDE_SPEC) # On Windows, we don't link directly with the Tcl library; see below ifneq ($(PORTNAME), win32) From 613f64712257d4b94e068e77fb0593e0a71d8df1 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Wed, 30 Jul 2025 00:39:49 +0300 Subject: [PATCH 395/602] Handle cancel requests with PID 0 gracefully If the client sent a query cancel request with backend PID 0, it tripped an assertion. With assertions disabled, you got this in the log instead: LOG: invalid cancel request with PID 0 LOG: wrong key in cancel request for process 0 Query cancellations don't even require authentication, so we better tolerate bogus requests. Fix by turning the assertion into a regular runtime check. Spotted while testing libpq behavior with a modified server that didn't send BackendKeyData to the client. Backpatch-through: 18 --- src/backend/storage/ipc/procsignal.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/backend/storage/ipc/procsignal.c b/src/backend/storage/ipc/procsignal.c index a9bb540b55ac2..087821311cceb 100644 --- a/src/backend/storage/ipc/procsignal.c +++ b/src/backend/storage/ipc/procsignal.c @@ -728,7 +728,11 @@ procsignal_sigusr1_handler(SIGNAL_ARGS) void SendCancelRequest(int backendPID, const uint8 *cancel_key, int cancel_key_len) { - Assert(backendPID != 0); + if (backendPID == 0) + { + ereport(LOG, (errmsg("invalid cancel request with PID 0"))); + return; + } /* * See if we have a matching backend. Reading the pss_pid and From 1a5212775e46fd573e74c9213392177920f0efd6 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Wed, 30 Jul 2025 11:55:42 +0900 Subject: [PATCH 396/602] Fix ./configure checks with __cpuidex() and __cpuid() The configure checks used two incorrect functions when checking the presence of some routines in an environment: - __get_cpuidex() for the check of __cpuidex(). - __get_cpuid() for the check of __cpuid(). This means that Postgres has never been able to detect the presence of these functions, impacting environments where these exist, like Windows. Simply fixing the function name does not work. For example, using configure with MinGW on Windows causes the checks to detect all four of __get_cpuid(), __get_cpuid_count(), __cpuidex() and __cpuid() to be available, causing a compilation failure as this messes up with the MinGW headers as we would include both and . The Postgres code expects only one in { __get_cpuid() , __cpuid() } and one in { __get_cpuid_count() , __cpuidex() } to exist. This commit reshapes the configure checks to do exactly what meson is doing, which has been working well for us: check one, then the other, but never allow both to be detected in a given build. The logic is wrong since 3dc2d62d0486 and 792752af4eb5 where these checks have been introduced (the second case is most likely a copy-pasto coming from the first case), with meson documenting that the configure checks were broken. As far as I can see, they are not once applied consistently with what the code expects, but let's see if the buildfarm has different something to say. The comment in meson.build is adjusted as well, to reflect the new reality. Author: Lukas Fittl Co-authored-by: Michael Paquier Discussion: https://postgr.es/m/aIgwNYGVt5aRAqTJ@paquier.xyz Backpatch-through: 13 --- configure | 63 +++++++++++++++++++++++++++------------------------- configure.ac | 49 +++++++++++++++++++++------------------- meson.build | 5 +---- 3 files changed, 60 insertions(+), 57 deletions(-) diff --git a/configure b/configure index 8a535da6b7a98..507a2437c3308 100755 --- a/configure +++ b/configure @@ -17565,7 +17565,7 @@ $as_echo "#define HAVE_GCC__ATOMIC_INT64_CAS 1" >>confdefs.h fi -# Check for x86 cpuid instruction +# Check for __get_cpuid() and __cpuid() { $as_echo "$as_me:${as_lineno-$LINENO}: checking for __get_cpuid" >&5 $as_echo_n "checking for __get_cpuid... " >&6; } if ${pgac_cv__get_cpuid+:} false; then : @@ -17598,77 +17598,79 @@ if test x"$pgac_cv__get_cpuid" = x"yes"; then $as_echo "#define HAVE__GET_CPUID 1" >>confdefs.h -fi - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __get_cpuid_count" >&5 -$as_echo_n "checking for __get_cpuid_count... " >&6; } -if ${pgac_cv__get_cpuid_count+:} false; then : +else + # __cpuid() + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for __cpuid" >&5 +$as_echo_n "checking for __cpuid... " >&6; } +if ${pgac_cv__cpuid+:} false; then : $as_echo_n "(cached) " >&6 else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ -#include +#include int main () { unsigned int exx[4] = {0, 0, 0, 0}; - __get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]); + __cpuid(exx, 1); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : - pgac_cv__get_cpuid_count="yes" + pgac_cv__cpuid="yes" else - pgac_cv__get_cpuid_count="no" + pgac_cv__cpuid="no" fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv__get_cpuid_count" >&5 -$as_echo "$pgac_cv__get_cpuid_count" >&6; } -if test x"$pgac_cv__get_cpuid_count" = x"yes"; then +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv__cpuid" >&5 +$as_echo "$pgac_cv__cpuid" >&6; } + if test x"$pgac_cv__cpuid" = x"yes"; then -$as_echo "#define HAVE__GET_CPUID_COUNT 1" >>confdefs.h +$as_echo "#define HAVE__CPUID 1" >>confdefs.h + fi fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __cpuid" >&5 -$as_echo_n "checking for __cpuid... " >&6; } -if ${pgac_cv__cpuid+:} false; then : +# Check for __get_cpuid_count() and __cpuidex() in a similar fashion. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __get_cpuid_count" >&5 +$as_echo_n "checking for __get_cpuid_count... " >&6; } +if ${pgac_cv__get_cpuid_count+:} false; then : $as_echo_n "(cached) " >&6 else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ -#include +#include int main () { unsigned int exx[4] = {0, 0, 0, 0}; - __get_cpuid(exx[0], 1); + __get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : - pgac_cv__cpuid="yes" + pgac_cv__get_cpuid_count="yes" else - pgac_cv__cpuid="no" + pgac_cv__get_cpuid_count="no" fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv__cpuid" >&5 -$as_echo "$pgac_cv__cpuid" >&6; } -if test x"$pgac_cv__cpuid" = x"yes"; then - -$as_echo "#define HAVE__CPUID 1" >>confdefs.h +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv__get_cpuid_count" >&5 +$as_echo "$pgac_cv__get_cpuid_count" >&6; } +if test x"$pgac_cv__get_cpuid_count" = x"yes"; then -fi +$as_echo "#define HAVE__GET_CPUID_COUNT 1" >>confdefs.h -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __cpuidex" >&5 +else + # __cpuidex() + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for __cpuidex" >&5 $as_echo_n "checking for __cpuidex... " >&6; } if ${pgac_cv__cpuidex+:} false; then : $as_echo_n "(cached) " >&6 @@ -17680,7 +17682,7 @@ int main () { unsigned int exx[4] = {0, 0, 0, 0}; - __get_cpuidex(exx[0], 7, 0); + __cpuidex(exx, 7, 0); ; return 0; @@ -17696,10 +17698,11 @@ rm -f core conftest.err conftest.$ac_objext \ fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv__cpuidex" >&5 $as_echo "$pgac_cv__cpuidex" >&6; } -if test x"$pgac_cv__cpuidex" = x"yes"; then + if test x"$pgac_cv__cpuidex" = x"yes"; then $as_echo "#define HAVE__CPUIDEX 1" >>confdefs.h + fi fi # Check for XSAVE intrinsics diff --git a/configure.ac b/configure.ac index e72201e679b56..5f4548adc5cd1 100644 --- a/configure.ac +++ b/configure.ac @@ -2044,7 +2044,7 @@ PGAC_HAVE_GCC__ATOMIC_INT32_CAS PGAC_HAVE_GCC__ATOMIC_INT64_CAS -# Check for x86 cpuid instruction +# Check for __get_cpuid() and __cpuid() AC_CACHE_CHECK([for __get_cpuid], [pgac_cv__get_cpuid], [AC_LINK_IFELSE([AC_LANG_PROGRAM([#include ], [[unsigned int exx[4] = {0, 0, 0, 0}; @@ -2054,8 +2054,21 @@ AC_CACHE_CHECK([for __get_cpuid], [pgac_cv__get_cpuid], [pgac_cv__get_cpuid="no"])]) if test x"$pgac_cv__get_cpuid" = x"yes"; then AC_DEFINE(HAVE__GET_CPUID, 1, [Define to 1 if you have __get_cpuid.]) +else + # __cpuid() + AC_CACHE_CHECK([for __cpuid], [pgac_cv__cpuid], + [AC_LINK_IFELSE([AC_LANG_PROGRAM([#include ], + [[unsigned int exx[4] = {0, 0, 0, 0}; + __cpuid(exx, 1); + ]])], + [pgac_cv__cpuid="yes"], + [pgac_cv__cpuid="no"])]) + if test x"$pgac_cv__cpuid" = x"yes"; then + AC_DEFINE(HAVE__CPUID, 1, [Define to 1 if you have __cpuid.]) + fi fi +# Check for __get_cpuid_count() and __cpuidex() in a similar fashion. AC_CACHE_CHECK([for __get_cpuid_count], [pgac_cv__get_cpuid_count], [AC_LINK_IFELSE([AC_LANG_PROGRAM([#include ], [[unsigned int exx[4] = {0, 0, 0, 0}; @@ -2065,28 +2078,18 @@ AC_CACHE_CHECK([for __get_cpuid_count], [pgac_cv__get_cpuid_count], [pgac_cv__get_cpuid_count="no"])]) if test x"$pgac_cv__get_cpuid_count" = x"yes"; then AC_DEFINE(HAVE__GET_CPUID_COUNT, 1, [Define to 1 if you have __get_cpuid_count.]) -fi - -AC_CACHE_CHECK([for __cpuid], [pgac_cv__cpuid], -[AC_LINK_IFELSE([AC_LANG_PROGRAM([#include ], - [[unsigned int exx[4] = {0, 0, 0, 0}; - __get_cpuid(exx[0], 1); - ]])], - [pgac_cv__cpuid="yes"], - [pgac_cv__cpuid="no"])]) -if test x"$pgac_cv__cpuid" = x"yes"; then - AC_DEFINE(HAVE__CPUID, 1, [Define to 1 if you have __cpuid.]) -fi - -AC_CACHE_CHECK([for __cpuidex], [pgac_cv__cpuidex], -[AC_LINK_IFELSE([AC_LANG_PROGRAM([#include ], - [[unsigned int exx[4] = {0, 0, 0, 0}; - __get_cpuidex(exx[0], 7, 0); - ]])], - [pgac_cv__cpuidex="yes"], - [pgac_cv__cpuidex="no"])]) -if test x"$pgac_cv__cpuidex" = x"yes"; then - AC_DEFINE(HAVE__CPUIDEX, 1, [Define to 1 if you have __cpuidex.]) +else + # __cpuidex() + AC_CACHE_CHECK([for __cpuidex], [pgac_cv__cpuidex], + [AC_LINK_IFELSE([AC_LANG_PROGRAM([#include ], + [[unsigned int exx[4] = {0, 0, 0, 0}; + __cpuidex(exx, 7, 0); + ]])], + [pgac_cv__cpuidex="yes"], + [pgac_cv__cpuidex="no"])]) + if test x"$pgac_cv__cpuidex" = x"yes"; then + AC_DEFINE(HAVE__CPUIDEX, 1, [Define to 1 if you have __cpuidex.]) + fi fi # Check for XSAVE intrinsics diff --git a/meson.build b/meson.build index 5365aaf95e64b..ca423dc8e12f3 100644 --- a/meson.build +++ b/meson.build @@ -1996,10 +1996,7 @@ if cc.links(''' cdata.set('HAVE__BUILTIN_OP_OVERFLOW', 1) endif - -# XXX: The configure.ac check for __cpuid() is broken, we don't copy that -# here. To prevent problems due to two detection methods working, stop -# checking after one. +# Check for __get_cpuid() and __cpuid(). if cc.links(''' #include int main(int arg, char **argv) From 00c977177956c4b4d12f8c6518d4269b086deca8 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Wed, 30 Jul 2025 09:51:45 +0200 Subject: [PATCH 397/602] Fix whitespace --- doc/src/sgml/pageinspect.sgml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/src/sgml/pageinspect.sgml b/doc/src/sgml/pageinspect.sgml index 1292933366555..f5014787c783b 100644 --- a/doc/src/sgml/pageinspect.sgml +++ b/doc/src/sgml/pageinspect.sgml @@ -741,7 +741,7 @@ test=# SELECT first_tid, nbytes, tids[0:5] AS some_tids For example: test=# SELECT * FROM gist_page_opaque_info(get_raw_page('test_gist_idx', 2)); - lsn | nsn | rightlink | flags + lsn | nsn | rightlink | flags ------------+------------+-----------+-------- 0/0B5FE088 | 0/00000000 | 1 | {leaf} (1 row) From ce9a6244b5b4ce1df71611512a757353803404a5 Mon Sep 17 00:00:00 2001 From: Andrew Dunstan Date: Wed, 30 Jul 2025 11:04:05 -0400 Subject: [PATCH 398/602] Revert Non text modes for pg_dumpall, and pg_restore support Recent discussions of the mechanisms used to manage global data have raised concerns about their robustness and security. Rather than try to deal with those concerns at a very late stage of the release cycle, the conclusion is to revert these features and work on them for the next release. This reverts parts or all of the following commits: 1495eff7bdb Non text modes for pg_dumpall, correspondingly change pg_restore 5db3bf7391d Clean up from commit 1495eff7bdb 289f74d0cb2 Add more TAP tests for pg_dumpall 2ef57908067 Fix a couple of error messages and tests for them b52a4a5f285 Clean up error messages from 1495eff7bdb 4170298b6ec Further cleanup for directory creation on pg_dump/pg_dumpall 22cb6d28950 Fix memory leak in pg_restore.c 928394b664b Improve various new-to-v18 appendStringInfo calls 39729ec01d2 Fix fat fingering in 22cb6d28950 5822bf21d50 Add missing space in pg_restore documentation. f09088a01d3 Free memory properly in pg_restore.c 40b9c27014d pg_restore cleanups 4aad2cb7707 Portability fix: isdigit() must be passed an unsigned char. 88e947136b4 Fix typos and grammar in the code f60420cff66 doc: Alphabetize long options for pg_dump[all]. bc35adee8d7 doc: Put new options in consistent order on man pages a876464abc7 Message style improvements dec6643487b Improve pg_dump/pg_dumpall help synopses and terminology 0ebd2425558 Run pgperltidy Discussion: https://postgr.es/m/20250708212819.09.nmisch@google.com Backpatch-to: 18 Reviewed-by: Noah Misch --- doc/src/sgml/ref/pg_dumpall.sgml | 89 +-- doc/src/sgml/ref/pg_restore.sgml | 66 +-- src/bin/pg_dump/meson.build | 1 - src/bin/pg_dump/parallel.c | 10 - src/bin/pg_dump/pg_backup.h | 2 +- src/bin/pg_dump/pg_backup_archiver.c | 20 +- src/bin/pg_dump/pg_backup_archiver.h | 1 - src/bin/pg_dump/pg_backup_tar.c | 2 +- src/bin/pg_dump/pg_dump.c | 2 +- src/bin/pg_dump/pg_dumpall.c | 238 ++------- src/bin/pg_dump/pg_restore.c | 772 +-------------------------- src/bin/pg_dump/t/001_basic.pl | 22 - src/bin/pg_dump/t/006_pg_dumpall.pl | 400 -------------- 13 files changed, 85 insertions(+), 1540 deletions(-) delete mode 100644 src/bin/pg_dump/t/006_pg_dumpall.pl diff --git a/doc/src/sgml/ref/pg_dumpall.sgml b/doc/src/sgml/ref/pg_dumpall.sgml index 8ca68da5a5560..f4cbc8288e3ad 100644 --- a/doc/src/sgml/ref/pg_dumpall.sgml +++ b/doc/src/sgml/ref/pg_dumpall.sgml @@ -16,10 +16,7 @@ PostgreSQL documentation pg_dumpall - - - export a PostgreSQL database cluster as an SQL script or to other formats - + extract a PostgreSQL database cluster into a script file @@ -36,7 +33,7 @@ PostgreSQL documentation pg_dumpall is a utility for writing out (dumping) all PostgreSQL databases - of a cluster into an SQL script file or an archive. The output contains + of a cluster into one script file. The script file contains SQL commands that can be used as input to to restore the databases. It does this by calling for each database in the cluster. @@ -55,16 +52,11 @@ PostgreSQL documentation - Plain text SQL scripts will be written to the standard output. Use the + The SQL script will be written to the standard output. Use the / option or shell operators to redirect it into a file. - - Archives in other formats will be placed in a directory named using the - /, which is required in this case. - - pg_dumpall needs to connect several times to the PostgreSQL server (once per @@ -129,85 +121,10 @@ PostgreSQL documentation Send output to the specified file. If this is omitted, the standard output is used. - Note: This option can only be omitted when is plain - - - - - - Specify the format of dump files. In plain format, all the dump data is - sent in a single text stream. This is the default. - - In all other modes, pg_dumpall first creates two files: - global.dat and map.dat, in the directory - specified by . - The first file contains global data, such as roles and tablespaces. The second - contains a mapping between database oids and names. These files are used by - pg_restore. Data for individual databases is placed in - databases subdirectory, named using the database's oid. - - - - d - directory - - - Output directory-format archives for each database, - suitable for input into pg_restore. The directory - will have database oid as its name. - - - - - - p - plain - - - Output a plain-text SQL script file (the default). - - - - - - c - custom - - - Output a custom-format archive for each database, - suitable for input into pg_restore. The archive - will be named dboid.dmp where dboid is the - oid of the database. - - - - - - t - tar - - - Output a tar-format archive for each database, - suitable for input into pg_restore. The archive - will be named dboid.tar where dboid is the - oid of the database. - - - - - - - Note: see for details - of how the various non plain text archives work. - - - - - diff --git a/doc/src/sgml/ref/pg_restore.sgml b/doc/src/sgml/ref/pg_restore.sgml index b649bd3a5ae0f..2abe05d47e936 100644 --- a/doc/src/sgml/ref/pg_restore.sgml +++ b/doc/src/sgml/ref/pg_restore.sgml @@ -18,9 +18,8 @@ PostgreSQL documentation pg_restore - restore PostgreSQL databases from archives - created by pg_dump or - pg_dumpall + restore a PostgreSQL database from an + archive file created by pg_dump @@ -39,14 +38,13 @@ PostgreSQL documentation pg_restore is a utility for restoring a - PostgreSQL database or cluster from an archive - created by or - in one of the non-plain-text + PostgreSQL database from an archive + created by in one of the non-plain-text formats. It will issue the commands necessary to reconstruct the - database or cluster to the state it was in at the time it was saved. The - archives also allow pg_restore to + database to the state it was in at the time it was saved. The + archive files also allow pg_restore to be selective about what is restored, or even to reorder the items - prior to being restored. The archive formats are designed to be + prior to being restored. The archive files are designed to be portable across architectures. @@ -54,17 +52,10 @@ PostgreSQL documentation pg_restore can operate in two modes. If a database name is specified, pg_restore connects to that database and restores archive contents directly into - the database. - When restoring from a dump made by pg_dumpall, - each database will be created and then the restoration will be run in that - database. - - Otherwise, when a database name is not specified, a script containing the SQL - commands necessary to rebuild the database or cluster is created and written + the database. Otherwise, a script containing the SQL + commands necessary to rebuild the database is created and written to a file or standard output. This script output is equivalent to - the plain text output format of pg_dump or - pg_dumpall. - + the plain text output format of pg_dump. Some of the options controlling the output are therefore analogous to pg_dump options. @@ -149,8 +140,6 @@ PostgreSQL documentation commands that mention this database. Access privileges for the database itself are also restored, unless is specified. - is required when restoring multiple databases - from an archive created by pg_dumpall. @@ -246,19 +235,6 @@ PostgreSQL documentation - - - - - - Restore only global objects (roles and tablespaces), no databases. - - - This option is only relevant when restoring from an archive made using pg_dumpall. - - - - @@ -603,28 +579,6 @@ PostgreSQL documentation - - - - - Do not restore databases whose name matches - pattern. - Multiple patterns can be excluded by writing multiple - switches. The - pattern parameter is - interpreted as a pattern according to the same rules used by - psql's \d - commands (see ), - so multiple databases can also be excluded by writing wildcard - characters in the pattern. When using wildcards, be careful to - quote the pattern if needed to prevent shell wildcard expansion. - - - This option is only relevant when restoring from an archive made using pg_dumpall. - - - - diff --git a/src/bin/pg_dump/meson.build b/src/bin/pg_dump/meson.build index 4a4ebbd8ec94f..a2233b0a1b431 100644 --- a/src/bin/pg_dump/meson.build +++ b/src/bin/pg_dump/meson.build @@ -102,7 +102,6 @@ tests += { 't/003_pg_dump_with_server.pl', 't/004_pg_dump_parallel.pl', 't/005_pg_dump_filterfile.pl', - 't/006_pg_dumpall.pl', 't/010_dump_connstr.pl', ], }, diff --git a/src/bin/pg_dump/parallel.c b/src/bin/pg_dump/parallel.c index 5974d6706fd57..086adcdc50295 100644 --- a/src/bin/pg_dump/parallel.c +++ b/src/bin/pg_dump/parallel.c @@ -333,16 +333,6 @@ on_exit_close_archive(Archive *AHX) on_exit_nicely(archive_close_connection, &shutdown_info); } -/* - * When pg_restore restores multiple databases, then update already added entry - * into array for cleanup. - */ -void -replace_on_exit_close_archive(Archive *AHX) -{ - shutdown_info.AHX = AHX; -} - /* * on_exit_nicely handler for shutting down database connections and * worker processes cleanly. diff --git a/src/bin/pg_dump/pg_backup.h b/src/bin/pg_dump/pg_backup.h index af0007fb6d2f1..4ebef1e864451 100644 --- a/src/bin/pg_dump/pg_backup.h +++ b/src/bin/pg_dump/pg_backup.h @@ -308,7 +308,7 @@ extern void SetArchiveOptions(Archive *AH, DumpOptions *dopt, RestoreOptions *ro extern void ProcessArchiveRestoreOptions(Archive *AHX); -extern void RestoreArchive(Archive *AHX, bool append_data); +extern void RestoreArchive(Archive *AHX); /* Open an existing archive */ extern Archive *OpenArchive(const char *FileSpec, const ArchiveFormat fmt); diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c index 30e0da31aa340..dce88f040ace3 100644 --- a/src/bin/pg_dump/pg_backup_archiver.c +++ b/src/bin/pg_dump/pg_backup_archiver.c @@ -87,7 +87,7 @@ static int RestoringToDB(ArchiveHandle *AH); static void dump_lo_buf(ArchiveHandle *AH); static void dumpTimestamp(ArchiveHandle *AH, const char *msg, time_t tim); static void SetOutput(ArchiveHandle *AH, const char *filename, - const pg_compress_specification compression_spec, bool append_data); + const pg_compress_specification compression_spec); static CompressFileHandle *SaveOutput(ArchiveHandle *AH); static void RestoreOutput(ArchiveHandle *AH, CompressFileHandle *savedOutput); @@ -339,14 +339,9 @@ ProcessArchiveRestoreOptions(Archive *AHX) StrictNamesCheck(ropt); } -/* - * RestoreArchive - * - * If append_data is set, then append data into file as we are restoring dump - * of multiple databases which was taken by pg_dumpall. - */ +/* Public */ void -RestoreArchive(Archive *AHX, bool append_data) +RestoreArchive(Archive *AHX) { ArchiveHandle *AH = (ArchiveHandle *) AHX; RestoreOptions *ropt = AH->public.ropt; @@ -463,7 +458,7 @@ RestoreArchive(Archive *AHX, bool append_data) */ sav = SaveOutput(AH); if (ropt->filename || ropt->compression_spec.algorithm != PG_COMPRESSION_NONE) - SetOutput(AH, ropt->filename, ropt->compression_spec, append_data); + SetOutput(AH, ropt->filename, ropt->compression_spec); ahprintf(AH, "--\n-- PostgreSQL database dump\n--\n\n"); @@ -1302,7 +1297,7 @@ PrintTOCSummary(Archive *AHX) sav = SaveOutput(AH); if (ropt->filename) - SetOutput(AH, ropt->filename, out_compression_spec, false); + SetOutput(AH, ropt->filename, out_compression_spec); if (strftime(stamp_str, sizeof(stamp_str), PGDUMP_STRFTIME_FMT, localtime(&AH->createDate)) == 0) @@ -1681,8 +1676,7 @@ archprintf(Archive *AH, const char *fmt,...) static void SetOutput(ArchiveHandle *AH, const char *filename, - const pg_compress_specification compression_spec, - bool append_data) + const pg_compress_specification compression_spec) { CompressFileHandle *CFH; const char *mode; @@ -1702,7 +1696,7 @@ SetOutput(ArchiveHandle *AH, const char *filename, else fn = fileno(stdout); - if (append_data || AH->mode == archModeAppend) + if (AH->mode == archModeAppend) mode = PG_BINARY_A; else mode = PG_BINARY_W; diff --git a/src/bin/pg_dump/pg_backup_archiver.h b/src/bin/pg_dump/pg_backup_archiver.h index 365073b3eae45..325b53fc9bd4b 100644 --- a/src/bin/pg_dump/pg_backup_archiver.h +++ b/src/bin/pg_dump/pg_backup_archiver.h @@ -394,7 +394,6 @@ struct _tocEntry extern int parallel_restore(ArchiveHandle *AH, TocEntry *te); extern void on_exit_close_archive(Archive *AHX); -extern void replace_on_exit_close_archive(Archive *AHX); extern void warn_or_exit_horribly(ArchiveHandle *AH, const char *fmt,...) pg_attribute_printf(2, 3); diff --git a/src/bin/pg_dump/pg_backup_tar.c b/src/bin/pg_dump/pg_backup_tar.c index d94d0de2a5d17..b5ba3b46dd999 100644 --- a/src/bin/pg_dump/pg_backup_tar.c +++ b/src/bin/pg_dump/pg_backup_tar.c @@ -826,7 +826,7 @@ _CloseArchive(ArchiveHandle *AH) savVerbose = AH->public.verbose; AH->public.verbose = 0; - RestoreArchive((Archive *) AH, false); + RestoreArchive((Archive *) AH); SetArchiveOptions((Archive *) AH, savDopt, savRopt); diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 6298edb26b5df..1da6bd7d9726c 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -1265,7 +1265,7 @@ main(int argc, char **argv) * right now. */ if (plainText) - RestoreArchive(fout, false); + RestoreArchive(fout); CloseArchive(fout); diff --git a/src/bin/pg_dump/pg_dumpall.c b/src/bin/pg_dump/pg_dumpall.c index 100317b1aa949..87d10df07c411 100644 --- a/src/bin/pg_dump/pg_dumpall.c +++ b/src/bin/pg_dump/pg_dumpall.c @@ -65,10 +65,9 @@ static void dropTablespaces(PGconn *conn); static void dumpTablespaces(PGconn *conn); static void dropDBs(PGconn *conn); static void dumpUserConfig(PGconn *conn, const char *username); -static void dumpDatabases(PGconn *conn, ArchiveFormat archDumpFormat); +static void dumpDatabases(PGconn *conn); static void dumpTimestamp(const char *msg); -static int runPgDump(const char *dbname, const char *create_opts, - char *dbfile, ArchiveFormat archDumpFormat); +static int runPgDump(const char *dbname, const char *create_opts); static void buildShSecLabels(PGconn *conn, const char *catalog_name, Oid objectId, const char *objtype, const char *objname, @@ -77,7 +76,6 @@ static void executeCommand(PGconn *conn, const char *query); static void expand_dbname_patterns(PGconn *conn, SimpleStringList *patterns, SimpleStringList *names); static void read_dumpall_filters(const char *filename, SimpleStringList *pattern); -static ArchiveFormat parseDumpFormat(const char *format); static char pg_dump_bin[MAXPGPATH]; static PQExpBuffer pgdumpopts; @@ -150,7 +148,6 @@ main(int argc, char *argv[]) {"password", no_argument, NULL, 'W'}, {"no-privileges", no_argument, NULL, 'x'}, {"no-acl", no_argument, NULL, 'x'}, - {"format", required_argument, NULL, 'F'}, /* * the following options don't have an equivalent short option letter @@ -201,8 +198,6 @@ main(int argc, char *argv[]) char *pgdb = NULL; char *use_role = NULL; const char *dumpencoding = NULL; - ArchiveFormat archDumpFormat = archNull; - const char *formatName = "p"; trivalue prompt_password = TRI_DEFAULT; bool data_only = false; bool globals_only = false; @@ -252,7 +247,7 @@ main(int argc, char *argv[]) pgdumpopts = createPQExpBuffer(); - while ((c = getopt_long(argc, argv, "acd:E:f:F:gh:l:Op:rsS:tU:vwWx", long_options, &optindex)) != -1) + while ((c = getopt_long(argc, argv, "acd:E:f:gh:l:Op:rsS:tU:vwWx", long_options, &optindex)) != -1) { switch (c) { @@ -280,9 +275,7 @@ main(int argc, char *argv[]) appendPQExpBufferStr(pgdumpopts, " -f "); appendShellString(pgdumpopts, filename); break; - case 'F': - formatName = pg_strdup(optarg); - break; + case 'g': globals_only = true; break; @@ -431,21 +424,6 @@ main(int argc, char *argv[]) exit_nicely(1); } - /* Get format for dump. */ - archDumpFormat = parseDumpFormat(formatName); - - /* - * If a non-plain format is specified, a file name is also required as the - * path to the main directory. - */ - if (archDumpFormat != archNull && - (!filename || strcmp(filename, "") == 0)) - { - pg_log_error("option -F/--format=d|c|t requires option -f/--file"); - pg_log_error_hint("Try \"%s --help\" for more information.", progname); - exit_nicely(1); - } - /* * If password values are not required in the dump, switch to using * pg_roles which is equally useful, just more likely to have unrestricted @@ -510,33 +488,6 @@ main(int argc, char *argv[]) if (sequence_data) appendPQExpBufferStr(pgdumpopts, " --sequence-data"); - /* - * Open the output file if required, otherwise use stdout. If required, - * then create new directory and global.dat file. - */ - if (archDumpFormat != archNull) - { - char global_path[MAXPGPATH]; - - /* Create new directory or accept the empty existing directory. */ - create_or_open_dir(filename); - - snprintf(global_path, MAXPGPATH, "%s/global.dat", filename); - - OPF = fopen(global_path, PG_BINARY_W); - if (!OPF) - pg_fatal("could not open file \"%s\": %m", global_path); - } - else if (filename) - { - OPF = fopen(filename, PG_BINARY_W); - if (!OPF) - pg_fatal("could not open output file \"%s\": %m", - filename); - } - else - OPF = stdout; - /* * If there was a database specified on the command line, use that, * otherwise try to connect to database "postgres", and failing that @@ -576,6 +527,19 @@ main(int argc, char *argv[]) expand_dbname_patterns(conn, &database_exclude_patterns, &database_exclude_names); + /* + * Open the output file if required, otherwise use stdout + */ + if (filename) + { + OPF = fopen(filename, PG_BINARY_W); + if (!OPF) + pg_fatal("could not open output file \"%s\": %m", + filename); + } + else + OPF = stdout; + /* * Set the client encoding if requested. */ @@ -675,7 +639,7 @@ main(int argc, char *argv[]) } if (!globals_only && !roles_only && !tablespaces_only) - dumpDatabases(conn, archDumpFormat); + dumpDatabases(conn); PQfinish(conn); @@ -688,7 +652,7 @@ main(int argc, char *argv[]) fclose(OPF); /* sync the resulting file, errors are not fatal */ - if (dosync && (archDumpFormat == archNull)) + if (dosync) (void) fsync_fname(filename, false); } @@ -699,14 +663,12 @@ main(int argc, char *argv[]) static void help(void) { - printf(_("%s exports a PostgreSQL database cluster as an SQL script or to other formats.\n\n"), progname); + printf(_("%s exports a PostgreSQL database cluster as an SQL script.\n\n"), progname); printf(_("Usage:\n")); printf(_(" %s [OPTION]...\n"), progname); printf(_("\nGeneral options:\n")); printf(_(" -f, --file=FILENAME output file name\n")); - printf(_(" -F, --format=c|d|t|p output file format (custom, directory, tar,\n" - " plain text (default))\n")); printf(_(" -v, --verbose verbose mode\n")); printf(_(" -V, --version output version information, then exit\n")); printf(_(" --lock-wait-timeout=TIMEOUT fail after waiting TIMEOUT for a table lock\n")); @@ -1013,6 +975,9 @@ dumpRoles(PGconn *conn) * We do it this way because config settings for roles could mention the * names of other roles. */ + if (PQntuples(res) > 0) + fprintf(OPF, "\n--\n-- User Configurations\n--\n"); + for (i = 0; i < PQntuples(res); i++) dumpUserConfig(conn, PQgetvalue(res, i, i_rolname)); @@ -1526,7 +1491,6 @@ dumpUserConfig(PGconn *conn, const char *username) { PQExpBuffer buf = createPQExpBuffer(); PGresult *res; - static bool header_done = false; printfPQExpBuffer(buf, "SELECT unnest(setconfig) FROM pg_db_role_setting " "WHERE setdatabase = 0 AND setrole = " @@ -1538,13 +1502,7 @@ dumpUserConfig(PGconn *conn, const char *username) res = executeQuery(conn, buf->data); if (PQntuples(res) > 0) - { - if (!header_done) - fprintf(OPF, "\n--\n-- User Configurations\n--\n"); - header_done = true; - fprintf(OPF, "\n--\n-- User Config \"%s\"\n--\n\n", username); - } for (int i = 0; i < PQntuples(res); i++) { @@ -1618,13 +1576,10 @@ expand_dbname_patterns(PGconn *conn, * Dump contents of databases. */ static void -dumpDatabases(PGconn *conn, ArchiveFormat archDumpFormat) +dumpDatabases(PGconn *conn) { PGresult *res; int i; - char db_subdir[MAXPGPATH]; - char dbfilepath[MAXPGPATH]; - FILE *map_file = NULL; /* * Skip databases marked not datallowconn, since we'd be unable to connect @@ -1638,42 +1593,18 @@ dumpDatabases(PGconn *conn, ArchiveFormat archDumpFormat) * doesn't have some failure mode with --clean. */ res = executeQuery(conn, - "SELECT datname, oid " + "SELECT datname " "FROM pg_database d " "WHERE datallowconn AND datconnlimit != -2 " "ORDER BY (datname <> 'template1'), datname"); - if (archDumpFormat == archNull && PQntuples(res) > 0) + if (PQntuples(res) > 0) fprintf(OPF, "--\n-- Databases\n--\n\n"); - /* - * If directory/tar/custom format is specified, create a subdirectory - * under the main directory and each database dump file or subdirectory - * will be created in that subdirectory by pg_dump. - */ - if (archDumpFormat != archNull) - { - char map_file_path[MAXPGPATH]; - - snprintf(db_subdir, MAXPGPATH, "%s/databases", filename); - - /* Create a subdirectory with 'databases' name under main directory. */ - if (mkdir(db_subdir, pg_dir_create_mode) != 0) - pg_fatal("could not create directory \"%s\": %m", db_subdir); - - snprintf(map_file_path, MAXPGPATH, "%s/map.dat", filename); - - /* Create a map file (to store dboid and dbname) */ - map_file = fopen(map_file_path, PG_BINARY_W); - if (!map_file) - pg_fatal("could not open file \"%s\": %m", map_file_path); - } - for (i = 0; i < PQntuples(res); i++) { char *dbname = PQgetvalue(res, i, 0); - char *oid = PQgetvalue(res, i, 1); - const char *create_opts = ""; + const char *create_opts; int ret; /* Skip template0, even if it's not marked !datallowconn. */ @@ -1687,27 +1618,9 @@ dumpDatabases(PGconn *conn, ArchiveFormat archDumpFormat) continue; } - /* - * If this is not a plain format dump, then append dboid and dbname to - * the map.dat file. - */ - if (archDumpFormat != archNull) - { - if (archDumpFormat == archCustom) - snprintf(dbfilepath, MAXPGPATH, "\"%s\"/\"%s\".dmp", db_subdir, oid); - else if (archDumpFormat == archTar) - snprintf(dbfilepath, MAXPGPATH, "\"%s\"/\"%s\".tar", db_subdir, oid); - else - snprintf(dbfilepath, MAXPGPATH, "\"%s\"/\"%s\"", db_subdir, oid); - - /* Put one line entry for dboid and dbname in map file. */ - fprintf(map_file, "%s %s\n", oid, dbname); - } - pg_log_info("dumping database \"%s\"", dbname); - if (archDumpFormat == archNull) - fprintf(OPF, "--\n-- Database \"%s\" dump\n--\n\n", dbname); + fprintf(OPF, "--\n-- Database \"%s\" dump\n--\n\n", dbname); /* * We assume that "template1" and "postgres" already exist in the @@ -1721,9 +1634,12 @@ dumpDatabases(PGconn *conn, ArchiveFormat archDumpFormat) { if (output_clean) create_opts = "--clean --create"; - /* Since pg_dump won't emit a \connect command, we must */ - else if (archDumpFormat == archNull) + else + { + create_opts = ""; + /* Since pg_dump won't emit a \connect command, we must */ fprintf(OPF, "\\connect %s\n\n", dbname); + } } else create_opts = "--create"; @@ -1731,30 +1647,19 @@ dumpDatabases(PGconn *conn, ArchiveFormat archDumpFormat) if (filename) fclose(OPF); - ret = runPgDump(dbname, create_opts, dbfilepath, archDumpFormat); + ret = runPgDump(dbname, create_opts); if (ret != 0) pg_fatal("pg_dump failed on database \"%s\", exiting", dbname); if (filename) { - char global_path[MAXPGPATH]; - - if (archDumpFormat != archNull) - snprintf(global_path, MAXPGPATH, "%s/global.dat", filename); - else - snprintf(global_path, MAXPGPATH, "%s", filename); - - OPF = fopen(global_path, PG_BINARY_A); + OPF = fopen(filename, PG_BINARY_A); if (!OPF) pg_fatal("could not re-open the output file \"%s\": %m", - global_path); + filename); } } - /* Close map file */ - if (archDumpFormat != archNull) - fclose(map_file); - PQclear(res); } @@ -1764,8 +1669,7 @@ dumpDatabases(PGconn *conn, ArchiveFormat archDumpFormat) * Run pg_dump on dbname, with specified options. */ static int -runPgDump(const char *dbname, const char *create_opts, char *dbfile, - ArchiveFormat archDumpFormat) +runPgDump(const char *dbname, const char *create_opts) { PQExpBufferData connstrbuf; PQExpBufferData cmd; @@ -1774,36 +1678,17 @@ runPgDump(const char *dbname, const char *create_opts, char *dbfile, initPQExpBuffer(&connstrbuf); initPQExpBuffer(&cmd); + printfPQExpBuffer(&cmd, "\"%s\" %s %s", pg_dump_bin, + pgdumpopts->data, create_opts); + /* - * If this is not a plain format dump, then append file name and dump - * format to the pg_dump command to get archive dump. + * If we have a filename, use the undocumented plain-append pg_dump + * format. */ - if (archDumpFormat != archNull) - { - printfPQExpBuffer(&cmd, "\"%s\" -f %s %s", pg_dump_bin, - dbfile, create_opts); - - if (archDumpFormat == archDirectory) - appendPQExpBufferStr(&cmd, " --format=directory "); - else if (archDumpFormat == archCustom) - appendPQExpBufferStr(&cmd, " --format=custom "); - else if (archDumpFormat == archTar) - appendPQExpBufferStr(&cmd, " --format=tar "); - } + if (filename) + appendPQExpBufferStr(&cmd, " -Fa "); else - { - printfPQExpBuffer(&cmd, "\"%s\" %s %s", pg_dump_bin, - pgdumpopts->data, create_opts); - - /* - * If we have a filename, use the undocumented plain-append pg_dump - * format. - */ - if (filename) - appendPQExpBufferStr(&cmd, " -Fa "); - else - appendPQExpBufferStr(&cmd, " -Fp "); - } + appendPQExpBufferStr(&cmd, " -Fp "); /* * Append the database name to the already-constructed stem of connection @@ -1948,36 +1833,3 @@ read_dumpall_filters(const char *filename, SimpleStringList *pattern) filter_free(&fstate); } - -/* - * parseDumpFormat - * - * This will validate dump formats. - */ -static ArchiveFormat -parseDumpFormat(const char *format) -{ - ArchiveFormat archDumpFormat; - - if (pg_strcasecmp(format, "c") == 0) - archDumpFormat = archCustom; - else if (pg_strcasecmp(format, "custom") == 0) - archDumpFormat = archCustom; - else if (pg_strcasecmp(format, "d") == 0) - archDumpFormat = archDirectory; - else if (pg_strcasecmp(format, "directory") == 0) - archDumpFormat = archDirectory; - else if (pg_strcasecmp(format, "p") == 0) - archDumpFormat = archNull; - else if (pg_strcasecmp(format, "plain") == 0) - archDumpFormat = archNull; - else if (pg_strcasecmp(format, "t") == 0) - archDumpFormat = archTar; - else if (pg_strcasecmp(format, "tar") == 0) - archDumpFormat = archTar; - else - pg_fatal("unrecognized output format \"%s\"; please specify \"c\", \"d\", \"p\", or \"t\"", - format); - - return archDumpFormat; -} diff --git a/src/bin/pg_dump/pg_restore.c b/src/bin/pg_dump/pg_restore.c index 6ef789cb06d63..b4e1acdb63fbb 100644 --- a/src/bin/pg_dump/pg_restore.c +++ b/src/bin/pg_dump/pg_restore.c @@ -2,7 +2,7 @@ * * pg_restore.c * pg_restore is an utility extracting postgres database definitions - * from a backup archive created by pg_dump/pg_dumpall using the archiver + * from a backup archive created by pg_dump using the archiver * interface. * * pg_restore will read the backup archive and @@ -41,15 +41,11 @@ #include "postgres_fe.h" #include -#include #ifdef HAVE_TERMIOS_H #include #endif -#include "common/string.h" -#include "connectdb.h" #include "fe_utils/option_utils.h" -#include "fe_utils/string_utils.h" #include "filter.h" #include "getopt_long.h" #include "parallel.h" @@ -57,43 +53,18 @@ static void usage(const char *progname); static void read_restore_filters(const char *filename, RestoreOptions *opts); -static bool file_exists_in_directory(const char *dir, const char *filename); -static int restore_one_database(const char *inputFileSpec, RestoreOptions *opts, - int numWorkers, bool append_data, int num); -static int read_one_statement(StringInfo inBuf, FILE *pfile); -static int restore_all_databases(PGconn *conn, const char *dumpdirpath, - SimpleStringList db_exclude_patterns, RestoreOptions *opts, int numWorkers); -static int process_global_sql_commands(PGconn *conn, const char *dumpdirpath, - const char *outfile); -static void copy_or_print_global_file(const char *outfile, FILE *pfile); -static int get_dbnames_list_to_restore(PGconn *conn, - SimplePtrList *dbname_oid_list, - SimpleStringList db_exclude_patterns); -static int get_dbname_oid_list_from_mfile(const char *dumpdirpath, - SimplePtrList *dbname_oid_list); - -/* - * Stores a database OID and the corresponding name. - */ -typedef struct DbOidName -{ - Oid oid; - char str[FLEXIBLE_ARRAY_MEMBER]; /* null-terminated string here */ -} DbOidName; - int main(int argc, char **argv) { RestoreOptions *opts; int c; + int exit_code; int numWorkers = 1; + Archive *AH; char *inputFileSpec; bool data_only = false; bool schema_only = false; - int n_errors = 0; - bool globals_only = false; - SimpleStringList db_exclude_patterns = {NULL, NULL}; static int disable_triggers = 0; static int enable_row_security = 0; static int if_exists = 0; @@ -119,7 +90,6 @@ main(int argc, char **argv) {"clean", 0, NULL, 'c'}, {"create", 0, NULL, 'C'}, {"data-only", 0, NULL, 'a'}, - {"globals-only", 0, NULL, 'g'}, {"dbname", 1, NULL, 'd'}, {"exit-on-error", 0, NULL, 'e'}, {"exclude-schema", 1, NULL, 'N'}, @@ -174,7 +144,6 @@ main(int argc, char **argv) {"with-statistics", no_argument, &with_statistics, 1}, {"statistics-only", no_argument, &statistics_only, 1}, {"filter", required_argument, NULL, 4}, - {"exclude-database", required_argument, NULL, 6}, {NULL, 0, NULL, 0} }; @@ -203,7 +172,7 @@ main(int argc, char **argv) } } - while ((c = getopt_long(argc, argv, "acCd:ef:F:gh:I:j:lL:n:N:Op:P:RsS:t:T:U:vwWx1", + while ((c = getopt_long(argc, argv, "acCd:ef:F:h:I:j:lL:n:N:Op:P:RsS:t:T:U:vwWx1", cmdopts, NULL)) != -1) { switch (c) @@ -230,14 +199,11 @@ main(int argc, char **argv) if (strlen(optarg) != 0) opts->formatName = pg_strdup(optarg); break; - case 'g': - /* restore only global.dat file from directory */ - globals_only = true; - break; case 'h': if (strlen(optarg) != 0) opts->cparams.pghost = pg_strdup(optarg); break; + case 'j': /* number of restore jobs */ if (!option_parse_int(optarg, "-j/--jobs", 1, PG_MAX_JOBS, @@ -352,9 +318,6 @@ main(int argc, char **argv) exit(1); opts->exit_on_error = true; break; - case 6: /* database patterns to skip */ - simple_string_list_append(&db_exclude_patterns, optarg); - break; default: /* getopt_long already emitted a complaint */ @@ -382,13 +345,6 @@ main(int argc, char **argv) if (!opts->cparams.dbname && !opts->filename && !opts->tocSummary) pg_fatal("one of -d/--dbname and -f/--file must be specified"); - if (db_exclude_patterns.head != NULL && globals_only) - { - pg_log_error("option --exclude-database cannot be used together with -g/--globals-only"); - pg_log_error_hint("Try \"%s --help\" for more information.", progname); - exit_nicely(1); - } - /* Should get at most one of -d and -f, else user is confused */ if (opts->cparams.dbname) { @@ -496,114 +452,6 @@ main(int argc, char **argv) opts->formatName); } - /* - * If toc.dat file is not present in the current path, then check for - * global.dat. If global.dat file is present, then restore all the - * databases from map.dat (if it exists), but skip restoring those - * matching --exclude-database patterns. - */ - if (inputFileSpec != NULL && !file_exists_in_directory(inputFileSpec, "toc.dat") && - file_exists_in_directory(inputFileSpec, "global.dat")) - { - PGconn *conn = NULL; /* Connection to restore global sql - * commands. */ - - /* - * Can only use --list or --use-list options with a single database - * dump. - */ - if (opts->tocSummary) - pg_fatal("option -l/--list cannot be used when restoring an archive created by pg_dumpall"); - else if (opts->tocFile) - pg_fatal("option -L/--use-list cannot be used when restoring an archive created by pg_dumpall"); - - /* - * To restore from a pg_dumpall archive, -C (create database) option - * must be specified unless we are only restoring globals. - */ - if (!globals_only && opts->createDB != 1) - { - pg_log_error("option -C/--create must be specified when restoring an archive created by pg_dumpall"); - pg_log_error_hint("Try \"%s --help\" for more information.", progname); - pg_log_error_hint("Individual databases can be restored using their specific archives."); - exit_nicely(1); - } - - /* - * Connect to the database to execute global sql commands from - * global.dat file. - */ - if (opts->cparams.dbname) - { - conn = ConnectDatabase(opts->cparams.dbname, NULL, opts->cparams.pghost, - opts->cparams.pgport, opts->cparams.username, TRI_DEFAULT, - false, progname, NULL, NULL, NULL, NULL); - - - if (!conn) - pg_fatal("could not connect to database \"%s\"", opts->cparams.dbname); - } - - /* If globals-only, then return from here. */ - if (globals_only) - { - /* - * Open global.dat file and execute/append all the global sql - * commands. - */ - n_errors = process_global_sql_commands(conn, inputFileSpec, - opts->filename); - - if (conn) - PQfinish(conn); - - pg_log_info("database restoring skipped because option -g/--globals-only was specified"); - } - else - { - /* Now restore all the databases from map.dat */ - n_errors = restore_all_databases(conn, inputFileSpec, db_exclude_patterns, - opts, numWorkers); - } - - /* Free db pattern list. */ - simple_string_list_destroy(&db_exclude_patterns); - } - else /* process if global.dat file does not exist. */ - { - if (db_exclude_patterns.head != NULL) - pg_fatal("option --exclude-database can be used only when restoring an archive created by pg_dumpall"); - - if (globals_only) - pg_fatal("option -g/--globals-only can be used only when restoring an archive created by pg_dumpall"); - - n_errors = restore_one_database(inputFileSpec, opts, numWorkers, false, 0); - } - - /* Done, print a summary of ignored errors during restore. */ - if (n_errors) - { - pg_log_warning("errors ignored on restore: %d", n_errors); - return 1; - } - - return 0; -} - -/* - * restore_one_database - * - * This will restore one database using toc.dat file. - * - * returns the number of errors while doing restore. - */ -static int -restore_one_database(const char *inputFileSpec, RestoreOptions *opts, - int numWorkers, bool append_data, int num) -{ - Archive *AH; - int n_errors; - AH = OpenArchive(inputFileSpec, opts->format); SetArchiveOptions(AH, NULL, opts); @@ -611,15 +459,9 @@ restore_one_database(const char *inputFileSpec, RestoreOptions *opts, /* * We don't have a connection yet but that doesn't matter. The connection * is initialized to NULL and if we terminate through exit_nicely() while - * it's still NULL, the cleanup function will just be a no-op. If we are - * restoring multiple databases, then only update AX handle for cleanup as - * the previous entry was already in the array and we had closed previous - * connection, so we can use the same array slot. + * it's still NULL, the cleanup function will just be a no-op. */ - if (!append_data || num == 0) - on_exit_close_archive(AH); - else - replace_on_exit_close_archive(AH); + on_exit_close_archive(AH); /* Let the archiver know how noisy to be */ AH->verbose = opts->verbose; @@ -639,21 +481,25 @@ restore_one_database(const char *inputFileSpec, RestoreOptions *opts, else { ProcessArchiveRestoreOptions(AH); - RestoreArchive(AH, append_data); + RestoreArchive(AH); } - n_errors = AH->n_errors; + /* done, print a summary of ignored errors */ + if (AH->n_errors) + pg_log_warning("errors ignored on restore: %d", AH->n_errors); /* AH may be freed in CloseArchive? */ + exit_code = AH->n_errors ? 1 : 0; + CloseArchive(AH); - return n_errors; + return exit_code; } static void usage(const char *progname) { - printf(_("%s restores PostgreSQL databases from archives created by pg_dump or pg_dumpall.\n\n"), progname); + printf(_("%s restores a PostgreSQL database from an archive created by pg_dump.\n\n"), progname); printf(_("Usage:\n")); printf(_(" %s [OPTION]... [FILE]\n"), progname); @@ -671,7 +517,6 @@ usage(const char *progname) printf(_(" -c, --clean clean (drop) database objects before recreating\n")); printf(_(" -C, --create create the target database\n")); printf(_(" -e, --exit-on-error exit on error, default is to continue\n")); - printf(_(" -g, --globals-only restore only global objects, no databases\n")); printf(_(" -I, --index=NAME restore named index\n")); printf(_(" -j, --jobs=NUM use this many parallel jobs to restore\n")); printf(_(" -L, --use-list=FILENAME use table of contents from this file for\n" @@ -688,7 +533,6 @@ usage(const char *progname) printf(_(" -1, --single-transaction restore as a single transaction\n")); printf(_(" --disable-triggers disable triggers during data-only restore\n")); printf(_(" --enable-row-security enable row security\n")); - printf(_(" --exclude-database=PATTERN do not restore the specified database(s)\n")); printf(_(" --filter=FILENAME restore or skip objects based on expressions\n" " in FILENAME\n")); printf(_(" --if-exists use IF EXISTS when dropping objects\n")); @@ -725,8 +569,8 @@ usage(const char *progname) printf(_(" --role=ROLENAME do SET ROLE before restore\n")); printf(_("\n" - "The options -I, -n, -N, -P, -t, -T, --section, and --exclude-database can be\n" - "combined and specified multiple times to select multiple objects.\n")); + "The options -I, -n, -N, -P, -t, -T, and --section can be combined and specified\n" + "multiple times to select multiple objects.\n")); printf(_("\nIf no input file name is supplied, then standard input is used.\n\n")); printf(_("Report bugs to <%s>.\n"), PACKAGE_BUGREPORT); printf(_("%s home page: <%s>\n"), PACKAGE_NAME, PACKAGE_URL); @@ -831,585 +675,3 @@ read_restore_filters(const char *filename, RestoreOptions *opts) filter_free(&fstate); } - -/* - * file_exists_in_directory - * - * Returns true if the file exists in the given directory. - */ -static bool -file_exists_in_directory(const char *dir, const char *filename) -{ - struct stat st; - char buf[MAXPGPATH]; - - if (snprintf(buf, MAXPGPATH, "%s/%s", dir, filename) >= MAXPGPATH) - pg_fatal("directory name too long: \"%s\"", dir); - - return (stat(buf, &st) == 0 && S_ISREG(st.st_mode)); -} - -/* - * read_one_statement - * - * This will start reading from passed file pointer using fgetc and read till - * semicolon(sql statement terminator for global.dat file) - * - * EOF is returned if end-of-file input is seen; time to shut down. - */ - -static int -read_one_statement(StringInfo inBuf, FILE *pfile) -{ - int c; /* character read from getc() */ - int m; - - StringInfoData q; - - initStringInfo(&q); - - resetStringInfo(inBuf); - - /* - * Read characters until EOF or the appropriate delimiter is seen. - */ - while ((c = fgetc(pfile)) != EOF) - { - if (c != '\'' && c != '"' && c != '\n' && c != ';') - { - appendStringInfoChar(inBuf, (char) c); - while ((c = fgetc(pfile)) != EOF) - { - if (c != '\'' && c != '"' && c != ';' && c != '\n') - appendStringInfoChar(inBuf, (char) c); - else - break; - } - } - - if (c == '\'' || c == '"') - { - appendStringInfoChar(&q, (char) c); - m = c; - - while ((c = fgetc(pfile)) != EOF) - { - appendStringInfoChar(&q, (char) c); - - if (c == m) - { - appendStringInfoString(inBuf, q.data); - resetStringInfo(&q); - break; - } - } - } - - if (c == ';') - { - appendStringInfoChar(inBuf, (char) ';'); - break; - } - - if (c == '\n') - appendStringInfoChar(inBuf, (char) '\n'); - } - - pg_free(q.data); - - /* No input before EOF signal means time to quit. */ - if (c == EOF && inBuf->len == 0) - return EOF; - - /* return something that's not EOF */ - return 'Q'; -} - -/* - * get_dbnames_list_to_restore - * - * This will mark for skipping any entries from dbname_oid_list that pattern match an - * entry in the db_exclude_patterns list. - * - * Returns the number of database to be restored. - * - */ -static int -get_dbnames_list_to_restore(PGconn *conn, - SimplePtrList *dbname_oid_list, - SimpleStringList db_exclude_patterns) -{ - int count_db = 0; - PQExpBuffer query; - PGresult *res; - - query = createPQExpBuffer(); - - if (!conn) - pg_log_info("considering PATTERN as NAME for --exclude-database option as no database connection while doing pg_restore"); - - /* - * Process one by one all dbnames and if specified to skip restoring, then - * remove dbname from list. - */ - for (SimplePtrListCell *db_cell = dbname_oid_list->head; - db_cell; db_cell = db_cell->next) - { - DbOidName *dbidname = (DbOidName *) db_cell->ptr; - bool skip_db_restore = false; - PQExpBuffer db_lit = createPQExpBuffer(); - - appendStringLiteralConn(db_lit, dbidname->str, conn); - - for (SimpleStringListCell *pat_cell = db_exclude_patterns.head; pat_cell; pat_cell = pat_cell->next) - { - /* - * If there is an exact match then we don't need to try a pattern - * match - */ - if (pg_strcasecmp(dbidname->str, pat_cell->val) == 0) - skip_db_restore = true; - /* Otherwise, try a pattern match if there is a connection */ - else if (conn) - { - int dotcnt; - - appendPQExpBufferStr(query, "SELECT 1 "); - processSQLNamePattern(conn, query, pat_cell->val, false, - false, NULL, db_lit->data, - NULL, NULL, NULL, &dotcnt); - - if (dotcnt > 0) - { - pg_log_error("improper qualified name (too many dotted names): %s", - dbidname->str); - PQfinish(conn); - exit_nicely(1); - } - - res = executeQuery(conn, query->data); - - if ((PQresultStatus(res) == PGRES_TUPLES_OK) && PQntuples(res)) - { - skip_db_restore = true; - pg_log_info("database name \"%s\" matches exclude pattern \"%s\"", dbidname->str, pat_cell->val); - } - - PQclear(res); - resetPQExpBuffer(query); - } - - if (skip_db_restore) - break; - } - - destroyPQExpBuffer(db_lit); - - /* - * Mark db to be skipped or increment the counter of dbs to be - * restored - */ - if (skip_db_restore) - { - pg_log_info("excluding database \"%s\"", dbidname->str); - dbidname->oid = InvalidOid; - } - else - { - count_db++; - } - } - - destroyPQExpBuffer(query); - - return count_db; -} - -/* - * get_dbname_oid_list_from_mfile - * - * Open map.dat file and read line by line and then prepare a list of database - * names and corresponding db_oid. - * - * Returns, total number of database names in map.dat file. - */ -static int -get_dbname_oid_list_from_mfile(const char *dumpdirpath, SimplePtrList *dbname_oid_list) -{ - StringInfoData linebuf; - FILE *pfile; - char map_file_path[MAXPGPATH]; - int count = 0; - - - /* - * If there is only global.dat file in dump, then return from here as - * there is no database to restore. - */ - if (!file_exists_in_directory(dumpdirpath, "map.dat")) - { - pg_log_info("database restoring is skipped because file \"%s\" does not exist in directory \"%s\"", "map.dat", dumpdirpath); - return 0; - } - - snprintf(map_file_path, MAXPGPATH, "%s/map.dat", dumpdirpath); - - /* Open map.dat file. */ - pfile = fopen(map_file_path, PG_BINARY_R); - - if (pfile == NULL) - pg_fatal("could not open file \"%s\": %m", map_file_path); - - initStringInfo(&linebuf); - - /* Append all the dbname/db_oid combinations to the list. */ - while (pg_get_line_buf(pfile, &linebuf)) - { - Oid db_oid = InvalidOid; - char *dbname; - DbOidName *dbidname; - int namelen; - char *p = linebuf.data; - - /* Extract dboid. */ - while (isdigit((unsigned char) *p)) - p++; - if (p > linebuf.data && *p == ' ') - { - sscanf(linebuf.data, "%u", &db_oid); - p++; - } - - /* dbname is the rest of the line */ - dbname = p; - namelen = strlen(dbname); - - /* Report error and exit if the file has any corrupted data. */ - if (!OidIsValid(db_oid) || namelen <= 1) - pg_fatal("invalid entry in file \"%s\" on line %d", map_file_path, - count + 1); - - pg_log_info("found database \"%s\" (OID: %u) in file \"%s\"", - dbname, db_oid, map_file_path); - - dbidname = pg_malloc(offsetof(DbOidName, str) + namelen + 1); - dbidname->oid = db_oid; - strlcpy(dbidname->str, dbname, namelen); - - simple_ptr_list_append(dbname_oid_list, dbidname); - count++; - } - - /* Close map.dat file. */ - fclose(pfile); - - return count; -} - -/* - * restore_all_databases - * - * This will restore databases those dumps are present in - * directory based on map.dat file mapping. - * - * This will skip restoring for databases that are specified with - * exclude-database option. - * - * returns, number of errors while doing restore. - */ -static int -restore_all_databases(PGconn *conn, const char *dumpdirpath, - SimpleStringList db_exclude_patterns, RestoreOptions *opts, - int numWorkers) -{ - SimplePtrList dbname_oid_list = {NULL, NULL}; - int num_db_restore = 0; - int num_total_db; - int n_errors_total; - int count = 0; - char *connected_db = NULL; - bool dumpData = opts->dumpData; - bool dumpSchema = opts->dumpSchema; - bool dumpStatistics = opts->dumpSchema; - - /* Save db name to reuse it for all the database. */ - if (opts->cparams.dbname) - connected_db = opts->cparams.dbname; - - num_total_db = get_dbname_oid_list_from_mfile(dumpdirpath, &dbname_oid_list); - - /* If map.dat has no entries, return after processing global.dat */ - if (dbname_oid_list.head == NULL) - return process_global_sql_commands(conn, dumpdirpath, opts->filename); - - pg_log_info(ngettext("found %d database name in \"%s\"", - "found %d database names in \"%s\"", - num_total_db), - num_total_db, "map.dat"); - - if (!conn) - { - pg_log_info("trying to connect to database \"%s\"", "postgres"); - - conn = ConnectDatabase("postgres", NULL, opts->cparams.pghost, - opts->cparams.pgport, opts->cparams.username, TRI_DEFAULT, - false, progname, NULL, NULL, NULL, NULL); - - /* Try with template1. */ - if (!conn) - { - pg_log_info("trying to connect to database \"%s\"", "template1"); - - conn = ConnectDatabase("template1", NULL, opts->cparams.pghost, - opts->cparams.pgport, opts->cparams.username, TRI_DEFAULT, - false, progname, NULL, NULL, NULL, NULL); - } - } - - /* - * filter the db list according to the exclude patterns - */ - num_db_restore = get_dbnames_list_to_restore(conn, &dbname_oid_list, - db_exclude_patterns); - - /* Open global.dat file and execute/append all the global sql commands. */ - n_errors_total = process_global_sql_commands(conn, dumpdirpath, opts->filename); - - /* Close the db connection as we are done with globals and patterns. */ - if (conn) - PQfinish(conn); - - /* Exit if no db needs to be restored. */ - if (dbname_oid_list.head == NULL || num_db_restore == 0) - { - pg_log_info(ngettext("no database needs restoring out of %d database", - "no database needs restoring out of %d databases", num_total_db), - num_total_db); - return n_errors_total; - } - - pg_log_info("need to restore %d databases out of %d databases", num_db_restore, num_total_db); - - /* - * We have a list of databases to restore after processing the - * exclude-database switch(es). Now we can restore them one by one. - */ - for (SimplePtrListCell *db_cell = dbname_oid_list.head; - db_cell; db_cell = db_cell->next) - { - DbOidName *dbidname = (DbOidName *) db_cell->ptr; - char subdirpath[MAXPGPATH]; - char subdirdbpath[MAXPGPATH]; - char dbfilename[MAXPGPATH]; - int n_errors; - - /* ignore dbs marked for skipping */ - if (dbidname->oid == InvalidOid) - continue; - - /* - * We need to reset override_dbname so that objects can be restored - * into an already created database. (used with -d/--dbname option) - */ - if (opts->cparams.override_dbname) - { - pfree(opts->cparams.override_dbname); - opts->cparams.override_dbname = NULL; - } - - snprintf(subdirdbpath, MAXPGPATH, "%s/databases", dumpdirpath); - - /* - * Look for the database dump file/dir. If there is an {oid}.tar or - * {oid}.dmp file, use it. Otherwise try to use a directory called - * {oid} - */ - snprintf(dbfilename, MAXPGPATH, "%u.tar", dbidname->oid); - if (file_exists_in_directory(subdirdbpath, dbfilename)) - snprintf(subdirpath, MAXPGPATH, "%s/databases/%u.tar", dumpdirpath, dbidname->oid); - else - { - snprintf(dbfilename, MAXPGPATH, "%u.dmp", dbidname->oid); - - if (file_exists_in_directory(subdirdbpath, dbfilename)) - snprintf(subdirpath, MAXPGPATH, "%s/databases/%u.dmp", dumpdirpath, dbidname->oid); - else - snprintf(subdirpath, MAXPGPATH, "%s/databases/%u", dumpdirpath, dbidname->oid); - } - - pg_log_info("restoring database \"%s\"", dbidname->str); - - /* If database is already created, then don't set createDB flag. */ - if (opts->cparams.dbname) - { - PGconn *test_conn; - - test_conn = ConnectDatabase(dbidname->str, NULL, opts->cparams.pghost, - opts->cparams.pgport, opts->cparams.username, TRI_DEFAULT, - false, progname, NULL, NULL, NULL, NULL); - if (test_conn) - { - PQfinish(test_conn); - - /* Use already created database for connection. */ - opts->createDB = 0; - opts->cparams.dbname = dbidname->str; - } - else - { - /* we'll have to create it */ - opts->createDB = 1; - opts->cparams.dbname = connected_db; - } - } - - /* - * Reset flags - might have been reset in pg_backup_archiver.c by the - * previous restore. - */ - opts->dumpData = dumpData; - opts->dumpSchema = dumpSchema; - opts->dumpStatistics = dumpStatistics; - - /* Restore the single database. */ - n_errors = restore_one_database(subdirpath, opts, numWorkers, true, count); - - /* Print a summary of ignored errors during single database restore. */ - if (n_errors) - { - n_errors_total += n_errors; - pg_log_warning("errors ignored on database \"%s\" restore: %d", dbidname->str, n_errors); - } - - count++; - } - - /* Log number of processed databases. */ - pg_log_info("number of restored databases is %d", num_db_restore); - - /* Free dbname and dboid list. */ - simple_ptr_list_destroy(&dbname_oid_list); - - return n_errors_total; -} - -/* - * process_global_sql_commands - * - * Open global.dat and execute or copy the sql commands one by one. - * - * If outfile is not NULL, copy all sql commands into outfile rather than - * executing them. - * - * Returns the number of errors while processing global.dat - */ -static int -process_global_sql_commands(PGconn *conn, const char *dumpdirpath, const char *outfile) -{ - char global_file_path[MAXPGPATH]; - PGresult *result; - StringInfoData sqlstatement, - user_create; - FILE *pfile; - int n_errors = 0; - - snprintf(global_file_path, MAXPGPATH, "%s/global.dat", dumpdirpath); - - /* Open global.dat file. */ - pfile = fopen(global_file_path, PG_BINARY_R); - - if (pfile == NULL) - pg_fatal("could not open file \"%s\": %m", global_file_path); - - /* - * If outfile is given, then just copy all global.dat file data into - * outfile. - */ - if (outfile) - { - copy_or_print_global_file(outfile, pfile); - return 0; - } - - /* Init sqlstatement to append commands. */ - initStringInfo(&sqlstatement); - - /* creation statement for our current role */ - initStringInfo(&user_create); - appendStringInfoString(&user_create, "CREATE ROLE "); - /* should use fmtId here, but we don't know the encoding */ - appendStringInfoString(&user_create, PQuser(conn)); - appendStringInfoChar(&user_create, ';'); - - /* Process file till EOF and execute sql statements. */ - while (read_one_statement(&sqlstatement, pfile) != EOF) - { - /* don't try to create the role we are connected as */ - if (strstr(sqlstatement.data, user_create.data)) - continue; - - pg_log_info("executing query: %s", sqlstatement.data); - result = PQexec(conn, sqlstatement.data); - - switch (PQresultStatus(result)) - { - case PGRES_COMMAND_OK: - case PGRES_TUPLES_OK: - case PGRES_EMPTY_QUERY: - break; - default: - n_errors++; - pg_log_error("could not execute query: %s", PQerrorMessage(conn)); - pg_log_error_detail("Command was: %s", sqlstatement.data); - } - PQclear(result); - } - - /* Print a summary of ignored errors during global.dat. */ - if (n_errors) - pg_log_warning(ngettext("ignored %d error in file \"%s\"", - "ignored %d errors in file \"%s\"", n_errors), - n_errors, global_file_path); - fclose(pfile); - - return n_errors; -} - -/* - * copy_or_print_global_file - * - * Copy global.dat into the output file. If "-" is used as outfile, - * then print commands to stdout. - */ -static void -copy_or_print_global_file(const char *outfile, FILE *pfile) -{ - char out_file_path[MAXPGPATH]; - FILE *OPF; - int c; - - /* "-" is used for stdout. */ - if (strcmp(outfile, "-") == 0) - OPF = stdout; - else - { - snprintf(out_file_path, MAXPGPATH, "%s", outfile); - OPF = fopen(out_file_path, PG_BINARY_W); - - if (OPF == NULL) - { - fclose(pfile); - pg_fatal("could not open file: \"%s\"", outfile); - } - } - - /* Append global.dat into output file or print to stdout. */ - while ((c = fgetc(pfile)) != EOF) - fputc(c, OPF); - - fclose(pfile); - - /* Close output file. */ - if (strcmp(outfile, "-") != 0) - fclose(OPF); -} diff --git a/src/bin/pg_dump/t/001_basic.pl b/src/bin/pg_dump/t/001_basic.pl index c3c5fae11eaaf..37d893d5e6a5f 100644 --- a/src/bin/pg_dump/t/001_basic.pl +++ b/src/bin/pg_dump/t/001_basic.pl @@ -237,24 +237,6 @@ 'pg_restore: options -C\/--create and -1\/--single-transaction cannot be used together' ); -command_fails_like( - [ 'pg_restore', '--exclude-database=foo', '--globals-only', '-d', 'xxx' ], - qr/\Qpg_restore: error: option --exclude-database cannot be used together with -g\/--globals-only\E/, - 'pg_restore: option --exclude-database cannot be used together with -g/--globals-only' -); - -command_fails_like( - [ 'pg_restore', '--exclude-database=foo', '-d', 'xxx', 'dumpdir' ], - qr/\Qpg_restore: error: option --exclude-database can be used only when restoring an archive created by pg_dumpall\E/, - 'When option --exclude-database is used in pg_restore with dump of pg_dump' -); - -command_fails_like( - [ 'pg_restore', '--globals-only', '-d', 'xxx', 'dumpdir' ], - qr/\Qpg_restore: error: option -g\/--globals-only can be used only when restoring an archive created by pg_dumpall\E/, - 'When option --globals-only is not used in pg_restore with dump of pg_dump' -); - # also fails for -r and -t, but it seems pointless to add more tests for those. command_fails_like( [ 'pg_dumpall', '--exclude-database=foo', '--globals-only' ], @@ -262,8 +244,4 @@ 'pg_dumpall: option --exclude-database cannot be used together with -g/--globals-only' ); -command_fails_like( - [ 'pg_dumpall', '--format', 'x' ], - qr/\Qpg_dumpall: error: unrecognized output format "x";\E/, - 'pg_dumpall: unrecognized output format'); done_testing(); diff --git a/src/bin/pg_dump/t/006_pg_dumpall.pl b/src/bin/pg_dump/t/006_pg_dumpall.pl deleted file mode 100644 index c274b777586ad..0000000000000 --- a/src/bin/pg_dump/t/006_pg_dumpall.pl +++ /dev/null @@ -1,400 +0,0 @@ -# Copyright (c) 2021-2025, PostgreSQL Global Development Group - -use strict; -use warnings FATAL => 'all'; - -use PostgreSQL::Test::Cluster; -use PostgreSQL::Test::Utils; -use Test::More; - -my $tempdir = PostgreSQL::Test::Utils::tempdir; -my $run_db = 'postgres'; -my $sep = $windows_os ? "\\" : "/"; - -# Tablespace locations used by "restore_tablespace" test case. -my $tablespace1 = "${tempdir}${sep}tbl1"; -my $tablespace2 = "${tempdir}${sep}tbl2"; -mkdir($tablespace1) || die "mkdir $tablespace1 $!"; -mkdir($tablespace2) || die "mkdir $tablespace2 $!"; - -# Scape tablespace locations on Windows. -$tablespace1 = $windows_os ? ($tablespace1 =~ s/\\/\\\\/gr) : $tablespace1; -$tablespace2 = $windows_os ? ($tablespace2 =~ s/\\/\\\\/gr) : $tablespace2; - -# Where pg_dumpall will be executed. -my $node = PostgreSQL::Test::Cluster->new('node'); -$node->init; -$node->start; - - -############################################################### -# Definition of the pg_dumpall test cases to run. -# -# Each of these test cases are named and those names are used for fail -# reporting and also to save the dump and restore information needed for the -# test to assert. -# -# The "setup_sql" is a psql valid script that contains SQL commands to execute -# before of actually execute the tests. The setups are all executed before of -# any test execution. -# -# The "dump_cmd" and "restore_cmd" are the commands that will be executed. The -# "restore_cmd" must have the --file flag to save the restore output so that we -# can assert on it. -# -# The "like" and "unlike" is a regexp that is used to match the pg_restore -# output. It must have at least one of then filled per test cases but it also -# can have both. See "excluding_databases" test case for example. -my %pgdumpall_runs = ( - restore_roles => { - setup_sql => ' - CREATE ROLE dumpall WITH ENCRYPTED PASSWORD \'admin\' SUPERUSER; - CREATE ROLE dumpall2 WITH REPLICATION CONNECTION LIMIT 10;', - dump_cmd => [ - 'pg_dumpall', - '--format' => 'directory', - '--file' => "$tempdir/restore_roles", - ], - restore_cmd => [ - 'pg_restore', '-C', - '--format' => 'directory', - '--file' => "$tempdir/restore_roles.sql", - "$tempdir/restore_roles", - ], - like => qr/ - ^\s*\QCREATE ROLE dumpall;\E\s*\n - \s*\QALTER ROLE dumpall WITH SUPERUSER INHERIT NOCREATEROLE NOCREATEDB NOLOGIN NOREPLICATION NOBYPASSRLS PASSWORD 'SCRAM-SHA-256\E - [^']+';\s*\n - \s*\QCREATE ROLE dumpall2;\E - \s*\QALTER ROLE dumpall2 WITH NOSUPERUSER INHERIT NOCREATEROLE NOCREATEDB NOLOGIN REPLICATION NOBYPASSRLS CONNECTION LIMIT 10;\E - /xm - }, - - restore_tablespace => { - setup_sql => " - CREATE ROLE tap; - CREATE TABLESPACE tbl1 OWNER tap LOCATION '$tablespace1'; - CREATE TABLESPACE tbl2 OWNER tap LOCATION '$tablespace2' WITH (seq_page_cost=1.0);", - dump_cmd => [ - 'pg_dumpall', - '--format' => 'directory', - '--file' => "$tempdir/restore_tablespace", - ], - restore_cmd => [ - 'pg_restore', '-C', - '--format' => 'directory', - '--file' => "$tempdir/restore_tablespace.sql", - "$tempdir/restore_tablespace", - ], - # Match "E" as optional since it is added on LOCATION when running on - # Windows. - like => qr/^ - \n\QCREATE TABLESPACE tbl1 OWNER tap LOCATION \E(?:E)?\Q'$tablespace1';\E - \n\QCREATE TABLESPACE tbl2 OWNER tap LOCATION \E(?:E)?\Q'$tablespace2';\E - \n\QALTER TABLESPACE tbl2 SET (seq_page_cost=1.0);\E - /xm, - }, - - restore_grants => { - setup_sql => " - CREATE DATABASE tapgrantsdb; - CREATE SCHEMA private; - CREATE SEQUENCE serial START 101; - CREATE FUNCTION fn() RETURNS void AS \$\$ - BEGIN - END; - \$\$ LANGUAGE plpgsql; - CREATE ROLE super; - CREATE ROLE grant1; - CREATE ROLE grant2; - CREATE ROLE grant3; - CREATE ROLE grant4; - CREATE ROLE grant5; - CREATE ROLE grant6; - CREATE ROLE grant7; - CREATE ROLE grant8; - - CREATE TABLE t (id int); - INSERT INTO t VALUES (1), (2), (3), (4); - - GRANT SELECT ON TABLE t TO grant1; - GRANT INSERT ON TABLE t TO grant2; - GRANT ALL PRIVILEGES ON TABLE t to grant3; - GRANT CONNECT, CREATE ON DATABASE tapgrantsdb TO grant4; - GRANT USAGE, CREATE ON SCHEMA private TO grant5; - GRANT USAGE, SELECT, UPDATE ON SEQUENCE serial TO grant6; - GRANT super TO grant7; - GRANT EXECUTE ON FUNCTION fn() TO grant8; - ", - dump_cmd => [ - 'pg_dumpall', - '--format' => 'directory', - '--file' => "$tempdir/restore_grants", - ], - restore_cmd => [ - 'pg_restore', '-C', - '--format' => 'directory', - '--file' => "$tempdir/restore_grants.sql", - "$tempdir/restore_grants", - ], - like => qr/^ - \n\QGRANT super TO grant7 WITH INHERIT TRUE GRANTED BY\E - (.*\n)* - \n\QGRANT ALL ON SCHEMA private TO grant5;\E - (.*\n)* - \n\QGRANT ALL ON FUNCTION public.fn() TO grant8;\E - (.*\n)* - \n\QGRANT ALL ON SEQUENCE public.serial TO grant6;\E - (.*\n)* - \n\QGRANT SELECT ON TABLE public.t TO grant1;\E - \n\QGRANT INSERT ON TABLE public.t TO grant2;\E - \n\QGRANT ALL ON TABLE public.t TO grant3;\E - (.*\n)* - \n\QGRANT CREATE,CONNECT ON DATABASE tapgrantsdb TO grant4;\E - /xm, - }, - - excluding_databases => { - setup_sql => 'CREATE DATABASE db1; - \c db1 - CREATE TABLE t1 (id int); - INSERT INTO t1 VALUES (1), (2), (3), (4); - CREATE TABLE t2 (id int); - INSERT INTO t2 VALUES (1), (2), (3), (4); - - CREATE DATABASE db2; - \c db2 - CREATE TABLE t3 (id int); - INSERT INTO t3 VALUES (1), (2), (3), (4); - CREATE TABLE t4 (id int); - INSERT INTO t4 VALUES (1), (2), (3), (4); - - CREATE DATABASE dbex3; - \c dbex3 - CREATE TABLE t5 (id int); - INSERT INTO t5 VALUES (1), (2), (3), (4); - CREATE TABLE t6 (id int); - INSERT INTO t6 VALUES (1), (2), (3), (4); - - CREATE DATABASE dbex4; - \c dbex4 - CREATE TABLE t7 (id int); - INSERT INTO t7 VALUES (1), (2), (3), (4); - CREATE TABLE t8 (id int); - INSERT INTO t8 VALUES (1), (2), (3), (4); - - CREATE DATABASE db5; - \c db5 - CREATE TABLE t9 (id int); - INSERT INTO t9 VALUES (1), (2), (3), (4); - CREATE TABLE t10 (id int); - INSERT INTO t10 VALUES (1), (2), (3), (4); - ', - dump_cmd => [ - 'pg_dumpall', - '--format' => 'directory', - '--file' => "$tempdir/excluding_databases", - '--exclude-database' => 'dbex*', - ], - restore_cmd => [ - 'pg_restore', '-C', - '--format' => 'directory', - '--file' => "$tempdir/excluding_databases.sql", - '--exclude-database' => 'db5', - "$tempdir/excluding_databases", - ], - like => qr/^ - \n\QCREATE DATABASE db1\E - (.*\n)* - \n\QCREATE TABLE public.t1 (\E - (.*\n)* - \n\QCREATE TABLE public.t2 (\E - (.*\n)* - \n\QCREATE DATABASE db2\E - (.*\n)* - \n\QCREATE TABLE public.t3 (\E - (.*\n)* - \n\QCREATE TABLE public.t4 (/xm, - unlike => qr/^ - \n\QCREATE DATABASE db3\E - (.*\n)* - \n\QCREATE TABLE public.t5 (\E - (.*\n)* - \n\QCREATE TABLE public.t6 (\E - (.*\n)* - \n\QCREATE DATABASE db4\E - (.*\n)* - \n\QCREATE TABLE public.t7 (\E - (.*\n)* - \n\QCREATE TABLE public.t8 (\E - \n\QCREATE DATABASE db5\E - (.*\n)* - \n\QCREATE TABLE public.t9 (\E - (.*\n)* - \n\QCREATE TABLE public.t10 (\E - /xm, - }, - - format_directory => { - setup_sql => "CREATE TABLE format_directory(a int, b boolean, c text); - INSERT INTO format_directory VALUES (1, true, 'name1'), (2, false, 'name2');", - dump_cmd => [ - 'pg_dumpall', - '--format' => 'directory', - '--file' => "$tempdir/format_directory", - ], - restore_cmd => [ - 'pg_restore', '-C', - '--format' => 'directory', - '--file' => "$tempdir/format_directory.sql", - "$tempdir/format_directory", - ], - like => qr/^\n\QCOPY public.format_directory (a, b, c) FROM stdin;/xm - }, - - format_tar => { - setup_sql => "CREATE TABLE format_tar(a int, b boolean, c text); - INSERT INTO format_tar VALUES (1, false, 'name3'), (2, true, 'name4');", - dump_cmd => [ - 'pg_dumpall', - '--format' => 'tar', - '--file' => "$tempdir/format_tar", - ], - restore_cmd => [ - 'pg_restore', '-C', - '--format' => 'tar', - '--file' => "$tempdir/format_tar.sql", - "$tempdir/format_tar", - ], - like => qr/^\n\QCOPY public.format_tar (a, b, c) FROM stdin;/xm - }, - - format_custom => { - setup_sql => "CREATE TABLE format_custom(a int, b boolean, c text); - INSERT INTO format_custom VALUES (1, false, 'name5'), (2, true, 'name6');", - dump_cmd => [ - 'pg_dumpall', - '--format' => 'custom', - '--file' => "$tempdir/format_custom", - ], - restore_cmd => [ - 'pg_restore', '-C', - '--format' => 'custom', - '--file' => "$tempdir/format_custom.sql", - "$tempdir/format_custom", - ], - like => qr/^ \n\QCOPY public.format_custom (a, b, c) FROM stdin;/xm - }, - - dump_globals_only => { - setup_sql => "CREATE TABLE format_dir(a int, b boolean, c text); - INSERT INTO format_dir VALUES (1, false, 'name5'), (2, true, 'name6');", - dump_cmd => [ - 'pg_dumpall', - '--format' => 'directory', - '--globals-only', - '--file' => "$tempdir/dump_globals_only", - ], - restore_cmd => [ - 'pg_restore', '-C', '--globals-only', - '--format' => 'directory', - '--file' => "$tempdir/dump_globals_only.sql", - "$tempdir/dump_globals_only", - ], - like => qr/ - ^\s*\QCREATE ROLE dumpall;\E\s*\n - /xm - },); - -# First execute the setup_sql -foreach my $run (sort keys %pgdumpall_runs) -{ - if ($pgdumpall_runs{$run}->{setup_sql}) - { - $node->safe_psql($run_db, $pgdumpall_runs{$run}->{setup_sql}); - } -} - -# Execute the tests -foreach my $run (sort keys %pgdumpall_runs) -{ - # Create a new target cluster to pg_restore each test case run so that we - # don't need to take care of the cleanup from the target cluster after each - # run. - my $target_node = PostgreSQL::Test::Cluster->new("target_$run"); - $target_node->init; - $target_node->start; - - # Dumpall from node cluster. - $node->command_ok(\@{ $pgdumpall_runs{$run}->{dump_cmd} }, - "$run: pg_dumpall runs"); - - # Restore the dump on "target_node" cluster. - my @restore_cmd = ( - @{ $pgdumpall_runs{$run}->{restore_cmd} }, - '--host', $target_node->host, '--port', $target_node->port); - - my ($stdout, $stderr) = run_command(\@restore_cmd); - - # pg_restore --file output file. - my $output_file = slurp_file("$tempdir/${run}.sql"); - - if ( !($pgdumpall_runs{$run}->{like}) - && !($pgdumpall_runs{$run}->{unlike})) - { - die "missing \"like\" or \"unlike\" in test \"$run\""; - } - - if ($pgdumpall_runs{$run}->{like}) - { - like($output_file, $pgdumpall_runs{$run}->{like}, "should dump $run"); - } - - if ($pgdumpall_runs{$run}->{unlike}) - { - unlike( - $output_file, - $pgdumpall_runs{$run}->{unlike}, - "should not dump $run"); - } -} - -# Some negative test case with dump of pg_dumpall and restore using pg_restore -# test case 1: when -C is not used in pg_restore with dump of pg_dumpall -$node->command_fails_like( - [ - 'pg_restore', - "$tempdir/format_custom", - '--format' => 'custom', - '--file' => "$tempdir/error_test.sql", - ], - qr/\Qpg_restore: error: option -C\/--create must be specified when restoring an archive created by pg_dumpall\E/, - 'When -C is not used in pg_restore with dump of pg_dumpall'); - -# test case 2: When --list option is used with dump of pg_dumpall -$node->command_fails_like( - [ - 'pg_restore', - "$tempdir/format_custom", '-C', - '--format' => 'custom', - '--list', - '--file' => "$tempdir/error_test.sql", - ], - qr/\Qpg_restore: error: option -l\/--list cannot be used when restoring an archive created by pg_dumpall\E/, - 'When --list is used in pg_restore with dump of pg_dumpall'); - -# test case 3: When non-exist database is given with -d option -$node->command_fails_like( - [ - 'pg_restore', - "$tempdir/format_custom", '-C', - '--format' => 'custom', - '-d' => 'dbpq', - ], - qr/\Qpg_restore: error: could not connect to database "dbpq"\E/, - 'When non-existent database is given with -d option in pg_restore with dump of pg_dumpall' -); - -$node->stop('fast'); - -done_testing(); From 412036c22d6a605340dbe397da1fb12fccd3897f Mon Sep 17 00:00:00 2001 From: Nathan Bossart Date: Wed, 30 Jul 2025 10:48:41 -0500 Subject: [PATCH 399/602] Teach pg_upgrade to handle in-place tablespaces. Presently, pg_upgrade assumes that all non-default tablespaces don't move to different directories during upgrade. Unfortunately, this isn't true for in-place tablespaces, which move to the new cluster's pg_tblspc directory. This commit teaches pg_upgrade to handle in-place tablespaces by retrieving the tablespace directories for both the old and new clusters. In turn, we can relax the prohibition on non-default tablespaces for same-version upgrades, i.e., if all non-default tablespaces are in-place, pg_upgrade may proceed. This change is primarily intended to enable additional pg_upgrade testing with non-default tablespaces, as is done in 006_transfer_modes.pl. Reviewed-by: Corey Huinker Reviewed-by: Michael Paquier Discussion: https://postgr.es/m/aA_uBLYMUs5D66Nb%40nathan --- src/bin/pg_upgrade/check.c | 20 +++---- src/bin/pg_upgrade/info.c | 38 +++++++++++-- src/bin/pg_upgrade/parallel.c | 11 ++-- src/bin/pg_upgrade/pg_upgrade.h | 8 +-- src/bin/pg_upgrade/relfilenumber.c | 57 +++++++++---------- src/bin/pg_upgrade/t/006_transfer_modes.pl | 35 ++++++++++-- src/bin/pg_upgrade/tablespace.c | 65 +++++++++++++++++----- 7 files changed, 164 insertions(+), 70 deletions(-) diff --git a/src/bin/pg_upgrade/check.c b/src/bin/pg_upgrade/check.c index 5e6403f07731b..310f53c55771b 100644 --- a/src/bin/pg_upgrade/check.c +++ b/src/bin/pg_upgrade/check.c @@ -956,12 +956,12 @@ check_for_new_tablespace_dir(void) prep_status("Checking for new cluster tablespace directories"); - for (tblnum = 0; tblnum < os_info.num_old_tablespaces; tblnum++) + for (tblnum = 0; tblnum < new_cluster.num_tablespaces; tblnum++) { struct stat statbuf; snprintf(new_tablespace_dir, MAXPGPATH, "%s%s", - os_info.old_tablespaces[tblnum], + new_cluster.tablespaces[tblnum], new_cluster.tablespace_suffix); if (stat(new_tablespace_dir, &statbuf) == 0 || errno != ENOENT) @@ -1013,17 +1013,17 @@ create_script_for_old_cluster_deletion(char **deletion_script_file_name) * directory. We can't create a proper old cluster delete script in that * case. */ - for (tblnum = 0; tblnum < os_info.num_old_tablespaces; tblnum++) + for (tblnum = 0; tblnum < new_cluster.num_tablespaces; tblnum++) { - char old_tablespace_dir[MAXPGPATH]; + char new_tablespace_dir[MAXPGPATH]; - strlcpy(old_tablespace_dir, os_info.old_tablespaces[tblnum], MAXPGPATH); - canonicalize_path(old_tablespace_dir); - if (path_is_prefix_of_path(old_cluster_pgdata, old_tablespace_dir)) + strlcpy(new_tablespace_dir, new_cluster.tablespaces[tblnum], MAXPGPATH); + canonicalize_path(new_tablespace_dir); + if (path_is_prefix_of_path(old_cluster_pgdata, new_tablespace_dir)) { /* reproduce warning from CREATE TABLESPACE that is in the log */ pg_log(PG_WARNING, - "\nWARNING: user-defined tablespace locations should not be inside the data directory, i.e. %s", old_tablespace_dir); + "\nWARNING: user-defined tablespace locations should not be inside the data directory, i.e. %s", new_tablespace_dir); /* Unlink file in case it is left over from a previous run. */ unlink(*deletion_script_file_name); @@ -1051,9 +1051,9 @@ create_script_for_old_cluster_deletion(char **deletion_script_file_name) /* delete old cluster's alternate tablespaces */ old_tblspc_suffix = pg_strdup(old_cluster.tablespace_suffix); fix_path_separator(old_tblspc_suffix); - for (tblnum = 0; tblnum < os_info.num_old_tablespaces; tblnum++) + for (tblnum = 0; tblnum < old_cluster.num_tablespaces; tblnum++) fprintf(script, RMDIR_CMD " %c%s%s%c\n", PATH_QUOTE, - fix_path_separator(os_info.old_tablespaces[tblnum]), + fix_path_separator(old_cluster.tablespaces[tblnum]), old_tblspc_suffix, PATH_QUOTE); pfree(old_tblspc_suffix); diff --git a/src/bin/pg_upgrade/info.c b/src/bin/pg_upgrade/info.c index a437067cdca82..c39eb077c2fae 100644 --- a/src/bin/pg_upgrade/info.c +++ b/src/bin/pg_upgrade/info.c @@ -443,10 +443,26 @@ get_db_infos(ClusterInfo *cluster) for (tupnum = 0; tupnum < ntups; tupnum++) { + char *spcloc = PQgetvalue(res, tupnum, i_spclocation); + bool inplace = spcloc[0] && !is_absolute_path(spcloc); + dbinfos[tupnum].db_oid = atooid(PQgetvalue(res, tupnum, i_oid)); dbinfos[tupnum].db_name = pg_strdup(PQgetvalue(res, tupnum, i_datname)); - snprintf(dbinfos[tupnum].db_tablespace, sizeof(dbinfos[tupnum].db_tablespace), "%s", - PQgetvalue(res, tupnum, i_spclocation)); + + /* + * The tablespace location might be "", meaning the cluster default + * location, i.e. pg_default or pg_global. For in-place tablespaces, + * pg_tablespace_location() returns a path relative to the data + * directory. + */ + if (inplace) + snprintf(dbinfos[tupnum].db_tablespace, + sizeof(dbinfos[tupnum].db_tablespace), + "%s/%s", cluster->pgdata, spcloc); + else + snprintf(dbinfos[tupnum].db_tablespace, + sizeof(dbinfos[tupnum].db_tablespace), + "%s", spcloc); } PQclear(res); @@ -616,11 +632,21 @@ process_rel_infos(DbInfo *dbinfo, PGresult *res, void *arg) /* Is the tablespace oid non-default? */ if (atooid(PQgetvalue(res, relnum, i_reltablespace)) != 0) { + char *spcloc = PQgetvalue(res, relnum, i_spclocation); + bool inplace = spcloc[0] && !is_absolute_path(spcloc); + /* * The tablespace location might be "", meaning the cluster - * default location, i.e. pg_default or pg_global. + * default location, i.e. pg_default or pg_global. For in-place + * tablespaces, pg_tablespace_location() returns a path relative + * to the data directory. */ - tablespace = PQgetvalue(res, relnum, i_spclocation); + if (inplace) + tablespace = psprintf("%s/%s", + os_info.running_cluster->pgdata, + spcloc); + else + tablespace = spcloc; /* Can we reuse the previous string allocation? */ if (last_tablespace && strcmp(tablespace, last_tablespace) == 0) @@ -630,6 +656,10 @@ process_rel_infos(DbInfo *dbinfo, PGresult *res, void *arg) last_tablespace = curr->tablespace = pg_strdup(tablespace); curr->tblsp_alloc = true; } + + /* Free palloc'd string for in-place tablespaces. */ + if (inplace) + pfree(tablespace); } else /* A zero reltablespace oid indicates the database tablespace. */ diff --git a/src/bin/pg_upgrade/parallel.c b/src/bin/pg_upgrade/parallel.c index 056aa2edaee3f..6d7941844a7c8 100644 --- a/src/bin/pg_upgrade/parallel.c +++ b/src/bin/pg_upgrade/parallel.c @@ -40,6 +40,7 @@ typedef struct char *old_pgdata; char *new_pgdata; char *old_tablespace; + char *new_tablespace; } transfer_thread_arg; static exec_thread_arg **exec_thread_args; @@ -171,7 +172,7 @@ win32_exec_prog(exec_thread_arg *args) void parallel_transfer_all_new_dbs(DbInfoArr *old_db_arr, DbInfoArr *new_db_arr, char *old_pgdata, char *new_pgdata, - char *old_tablespace) + char *old_tablespace, char *new_tablespace) { #ifndef WIN32 pid_t child; @@ -181,7 +182,7 @@ parallel_transfer_all_new_dbs(DbInfoArr *old_db_arr, DbInfoArr *new_db_arr, #endif if (user_opts.jobs <= 1) - transfer_all_new_dbs(old_db_arr, new_db_arr, old_pgdata, new_pgdata, NULL); + transfer_all_new_dbs(old_db_arr, new_db_arr, old_pgdata, new_pgdata, NULL, NULL); else { /* parallel */ @@ -225,7 +226,7 @@ parallel_transfer_all_new_dbs(DbInfoArr *old_db_arr, DbInfoArr *new_db_arr, if (child == 0) { transfer_all_new_dbs(old_db_arr, new_db_arr, old_pgdata, new_pgdata, - old_tablespace); + old_tablespace, new_tablespace); /* if we take another exit path, it will be non-zero */ /* use _exit to skip atexit() functions */ _exit(0); @@ -246,6 +247,7 @@ parallel_transfer_all_new_dbs(DbInfoArr *old_db_arr, DbInfoArr *new_db_arr, new_arg->new_pgdata = pg_strdup(new_pgdata); pg_free(new_arg->old_tablespace); new_arg->old_tablespace = old_tablespace ? pg_strdup(old_tablespace) : NULL; + new_arg->new_tablespace = new_tablespace ? pg_strdup(new_tablespace) : NULL; child = (HANDLE) _beginthreadex(NULL, 0, (void *) win32_transfer_all_new_dbs, new_arg, 0, NULL); @@ -263,7 +265,8 @@ DWORD win32_transfer_all_new_dbs(transfer_thread_arg *args) { transfer_all_new_dbs(args->old_db_arr, args->new_db_arr, args->old_pgdata, - args->new_pgdata, args->old_tablespace); + args->new_pgdata, args->old_tablespace, + args->new_tablespace); /* terminates thread */ return 0; diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h index e9401430e697f..0ef47be0dc199 100644 --- a/src/bin/pg_upgrade/pg_upgrade.h +++ b/src/bin/pg_upgrade/pg_upgrade.h @@ -300,6 +300,8 @@ typedef struct uint32 major_version; /* PG_VERSION of cluster */ char major_version_str[64]; /* string PG_VERSION of cluster */ uint32 bin_version; /* version returned from pg_ctl */ + char **tablespaces; /* tablespace directories */ + int num_tablespaces; const char *tablespace_suffix; /* directory specification */ int nsubs; /* number of subscriptions */ bool sub_retain_dead_tuples; /* whether a subscription enables @@ -356,8 +358,6 @@ typedef struct const char *progname; /* complete pathname for this program */ char *user; /* username for clusters */ bool user_specified; /* user specified on command-line */ - char **old_tablespaces; /* tablespaces */ - int num_old_tablespaces; LibraryInfo *libraries; /* loadable libraries */ int num_libraries; ClusterInfo *running_cluster; @@ -457,7 +457,7 @@ void transfer_all_new_tablespaces(DbInfoArr *old_db_arr, DbInfoArr *new_db_arr, char *old_pgdata, char *new_pgdata); void transfer_all_new_dbs(DbInfoArr *old_db_arr, DbInfoArr *new_db_arr, char *old_pgdata, char *new_pgdata, - char *old_tablespace); + char *old_tablespace, char *new_tablespace); /* tablespace.c */ @@ -505,7 +505,7 @@ void parallel_exec_prog(const char *log_file, const char *opt_log_file, const char *fmt,...) pg_attribute_printf(3, 4); void parallel_transfer_all_new_dbs(DbInfoArr *old_db_arr, DbInfoArr *new_db_arr, char *old_pgdata, char *new_pgdata, - char *old_tablespace); + char *old_tablespace, char *new_tablespace); bool reap_child(bool wait_for_child); /* task.c */ diff --git a/src/bin/pg_upgrade/relfilenumber.c b/src/bin/pg_upgrade/relfilenumber.c index 8d8e816a01fa4..38c17ceabf222 100644 --- a/src/bin/pg_upgrade/relfilenumber.c +++ b/src/bin/pg_upgrade/relfilenumber.c @@ -17,7 +17,7 @@ #include "common/logging.h" #include "pg_upgrade.h" -static void transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace); +static void transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace, char *new_tablespace); static void transfer_relfile(FileNameMap *map, const char *type_suffix, bool vm_must_add_frozenbit); /* @@ -136,21 +136,22 @@ transfer_all_new_tablespaces(DbInfoArr *old_db_arr, DbInfoArr *new_db_arr, */ if (user_opts.jobs <= 1) parallel_transfer_all_new_dbs(old_db_arr, new_db_arr, old_pgdata, - new_pgdata, NULL); + new_pgdata, NULL, NULL); else { int tblnum; /* transfer default tablespace */ parallel_transfer_all_new_dbs(old_db_arr, new_db_arr, old_pgdata, - new_pgdata, old_pgdata); + new_pgdata, old_pgdata, new_pgdata); - for (tblnum = 0; tblnum < os_info.num_old_tablespaces; tblnum++) + for (tblnum = 0; tblnum < old_cluster.num_tablespaces; tblnum++) parallel_transfer_all_new_dbs(old_db_arr, new_db_arr, old_pgdata, new_pgdata, - os_info.old_tablespaces[tblnum]); + old_cluster.tablespaces[tblnum], + new_cluster.tablespaces[tblnum]); /* reap all children */ while (reap_child(true) == true) ; @@ -169,7 +170,8 @@ transfer_all_new_tablespaces(DbInfoArr *old_db_arr, DbInfoArr *new_db_arr, */ void transfer_all_new_dbs(DbInfoArr *old_db_arr, DbInfoArr *new_db_arr, - char *old_pgdata, char *new_pgdata, char *old_tablespace) + char *old_pgdata, char *new_pgdata, + char *old_tablespace, char *new_tablespace) { int old_dbnum, new_dbnum; @@ -204,7 +206,7 @@ transfer_all_new_dbs(DbInfoArr *old_db_arr, DbInfoArr *new_db_arr, new_pgdata); if (n_maps) { - transfer_single_new_db(mappings, n_maps, old_tablespace); + transfer_single_new_db(mappings, n_maps, old_tablespace, new_tablespace); } /* We allocate something even for n_maps == 0 */ pg_free(mappings); @@ -234,10 +236,10 @@ transfer_all_new_dbs(DbInfoArr *old_db_arr, DbInfoArr *new_db_arr, * moved_db_dir: Destination for the pg_restore-generated database directory. */ static bool -prepare_for_swap(const char *old_tablespace, Oid db_oid, - char *old_catalog_dir, char *new_db_dir, char *moved_db_dir) +prepare_for_swap(const char *old_tablespace, const char *new_tablespace, + Oid db_oid, char *old_catalog_dir, char *new_db_dir, + char *moved_db_dir) { - const char *new_tablespace; const char *old_tblspc_suffix; const char *new_tblspc_suffix; char old_tblspc[MAXPGPATH]; @@ -247,24 +249,14 @@ prepare_for_swap(const char *old_tablespace, Oid db_oid, struct stat st; if (strcmp(old_tablespace, old_cluster.pgdata) == 0) - { - new_tablespace = new_cluster.pgdata; - new_tblspc_suffix = "/base"; old_tblspc_suffix = "/base"; - } else - { - /* - * XXX: The below line is a hack to deal with the fact that we - * presently don't have an easy way to find the corresponding new - * tablespace's path. This will need to be fixed if/when we add - * pg_upgrade support for in-place tablespaces. - */ - new_tablespace = old_tablespace; + old_tblspc_suffix = old_cluster.tablespace_suffix; + if (strcmp(new_tablespace, new_cluster.pgdata) == 0) + new_tblspc_suffix = "/base"; + else new_tblspc_suffix = new_cluster.tablespace_suffix; - old_tblspc_suffix = old_cluster.tablespace_suffix; - } /* Old and new cluster paths. */ snprintf(old_tblspc, sizeof(old_tblspc), "%s%s", old_tablespace, old_tblspc_suffix); @@ -450,7 +442,7 @@ swap_catalog_files(FileNameMap *maps, int size, const char *old_catalog_dir, * during pg_restore. */ static void -do_swap(FileNameMap *maps, int size, char *old_tablespace) +do_swap(FileNameMap *maps, int size, char *old_tablespace, char *new_tablespace) { char old_catalog_dir[MAXPGPATH]; char new_db_dir[MAXPGPATH]; @@ -470,21 +462,23 @@ do_swap(FileNameMap *maps, int size, char *old_tablespace) */ if (old_tablespace) { - if (prepare_for_swap(old_tablespace, maps[0].db_oid, + if (prepare_for_swap(old_tablespace, new_tablespace, maps[0].db_oid, old_catalog_dir, new_db_dir, moved_db_dir)) swap_catalog_files(maps, size, old_catalog_dir, new_db_dir, moved_db_dir); } else { - if (prepare_for_swap(old_cluster.pgdata, maps[0].db_oid, + if (prepare_for_swap(old_cluster.pgdata, new_cluster.pgdata, maps[0].db_oid, old_catalog_dir, new_db_dir, moved_db_dir)) swap_catalog_files(maps, size, old_catalog_dir, new_db_dir, moved_db_dir); - for (int tblnum = 0; tblnum < os_info.num_old_tablespaces; tblnum++) + for (int tblnum = 0; tblnum < old_cluster.num_tablespaces; tblnum++) { - if (prepare_for_swap(os_info.old_tablespaces[tblnum], maps[0].db_oid, + if (prepare_for_swap(old_cluster.tablespaces[tblnum], + new_cluster.tablespaces[tblnum], + maps[0].db_oid, old_catalog_dir, new_db_dir, moved_db_dir)) swap_catalog_files(maps, size, old_catalog_dir, new_db_dir, moved_db_dir); @@ -498,7 +492,8 @@ do_swap(FileNameMap *maps, int size, char *old_tablespace) * create links for mappings stored in "maps" array. */ static void -transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace) +transfer_single_new_db(FileNameMap *maps, int size, + char *old_tablespace, char *new_tablespace) { int mapnum; bool vm_must_add_frozenbit = false; @@ -520,7 +515,7 @@ transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace) */ Assert(!vm_must_add_frozenbit); - do_swap(maps, size, old_tablespace); + do_swap(maps, size, old_tablespace, new_tablespace); return; } diff --git a/src/bin/pg_upgrade/t/006_transfer_modes.pl b/src/bin/pg_upgrade/t/006_transfer_modes.pl index 58fe8a8c7dcea..348f402146234 100644 --- a/src/bin/pg_upgrade/t/006_transfer_modes.pl +++ b/src/bin/pg_upgrade/t/006_transfer_modes.pl @@ -38,6 +38,13 @@ sub test_mode } $new->init(); + # allow_in_place_tablespaces is available as far back as v10. + if ($old->pg_version >= 10) + { + $new->append_conf('postgresql.conf', "allow_in_place_tablespaces = true"); + $old->append_conf('postgresql.conf', "allow_in_place_tablespaces = true"); + } + # Create a small variety of simple test objects on the old cluster. We'll # check that these reach the new version after upgrading. $old->start; @@ -49,8 +56,7 @@ sub test_mode $old->safe_psql('testdb1', "VACUUM FULL test2"); $old->safe_psql('testdb1', "CREATE SEQUENCE testseq START 5432"); - # For cross-version tests, we can also check that pg_upgrade handles - # tablespaces. + # If an old installation is provided, we can test non-in-place tablespaces. if (defined($ENV{oldinstall})) { my $tblspc = PostgreSQL::Test::Utils::tempdir_short(); @@ -64,6 +70,19 @@ sub test_mode $old->safe_psql('testdb2', "CREATE TABLE test4 AS SELECT generate_series(400, 502)"); } + + # If the old cluster is >= v10, we can test in-place tablespaces. + if ($old->pg_version >= 10) + { + $old->safe_psql('postgres', + "CREATE TABLESPACE inplc_tblspc LOCATION ''"); + $old->safe_psql('postgres', + "CREATE DATABASE testdb3 TABLESPACE inplc_tblspc"); + $old->safe_psql('postgres', + "CREATE TABLE test5 TABLESPACE inplc_tblspc AS SELECT generate_series(503, 606)"); + $old->safe_psql('testdb3', + "CREATE TABLE test6 AS SELECT generate_series(607, 711)"); + } $old->stop; my $result = command_ok_or_fails_like( @@ -94,8 +113,7 @@ sub test_mode $result = $new->safe_psql('testdb1', "SELECT nextval('testseq')"); is($result, '5432', "sequence data after pg_upgrade $mode"); - # For cross-version tests, we should have some objects in a non-default - # tablespace. + # Tests for non-in-place tablespaces. if (defined($ENV{oldinstall})) { $result = @@ -105,6 +123,15 @@ sub test_mode $new->safe_psql('testdb2', "SELECT COUNT(*) FROM test4"); is($result, '103', "test4 data after pg_upgrade $mode"); } + + # Tests for in-place tablespaces. + if ($old->pg_version >= 10) + { + $result = $new->safe_psql('postgres', "SELECT COUNT(*) FROM test5"); + is($result, '104', "test5 data after pg_upgrade $mode"); + $result = $new->safe_psql('testdb3', "SELECT COUNT(*) FROM test6"); + is($result, '105', "test6 data after pg_upgrade $mode"); + } $new->stop; } diff --git a/src/bin/pg_upgrade/tablespace.c b/src/bin/pg_upgrade/tablespace.c index 3520a75ba317d..151d74e17349b 100644 --- a/src/bin/pg_upgrade/tablespace.c +++ b/src/bin/pg_upgrade/tablespace.c @@ -23,10 +23,20 @@ init_tablespaces(void) set_tablespace_directory_suffix(&old_cluster); set_tablespace_directory_suffix(&new_cluster); - if (os_info.num_old_tablespaces > 0 && + if (old_cluster.num_tablespaces > 0 && strcmp(old_cluster.tablespace_suffix, new_cluster.tablespace_suffix) == 0) - pg_fatal("Cannot upgrade to/from the same system catalog version when\n" - "using tablespaces."); + { + for (int i = 0; i < old_cluster.num_tablespaces; i++) + { + /* + * In-place tablespaces are okay for same-version upgrades because + * their paths will differ between clusters. + */ + if (strcmp(old_cluster.tablespaces[i], new_cluster.tablespaces[i]) == 0) + pg_fatal("Cannot upgrade to/from the same system catalog version when\n" + "using tablespaces."); + } + } } @@ -53,19 +63,48 @@ get_tablespace_paths(void) res = executeQueryOrDie(conn, "%s", query); - if ((os_info.num_old_tablespaces = PQntuples(res)) != 0) - os_info.old_tablespaces = - (char **) pg_malloc(os_info.num_old_tablespaces * sizeof(char *)); + old_cluster.num_tablespaces = PQntuples(res); + new_cluster.num_tablespaces = PQntuples(res); + + if (PQntuples(res) != 0) + { + old_cluster.tablespaces = + (char **) pg_malloc(old_cluster.num_tablespaces * sizeof(char *)); + new_cluster.tablespaces = + (char **) pg_malloc(new_cluster.num_tablespaces * sizeof(char *)); + } else - os_info.old_tablespaces = NULL; + { + old_cluster.tablespaces = NULL; + new_cluster.tablespaces = NULL; + } i_spclocation = PQfnumber(res, "spclocation"); - for (tblnum = 0; tblnum < os_info.num_old_tablespaces; tblnum++) + for (tblnum = 0; tblnum < old_cluster.num_tablespaces; tblnum++) { struct stat statBuf; + char *spcloc = PQgetvalue(res, tblnum, i_spclocation); - os_info.old_tablespaces[tblnum] = pg_strdup(PQgetvalue(res, tblnum, i_spclocation)); + /* + * For now, we do not expect non-in-place tablespaces to move during + * upgrade. If that changes, it will likely become necessary to run + * the above query on the new cluster, too. + * + * pg_tablespace_location() returns absolute paths for non-in-place + * tablespaces and relative paths for in-place ones, so we use + * is_absolute_path() to distinguish between them. + */ + if (is_absolute_path(PQgetvalue(res, tblnum, i_spclocation))) + { + old_cluster.tablespaces[tblnum] = pg_strdup(spcloc); + new_cluster.tablespaces[tblnum] = old_cluster.tablespaces[tblnum]; + } + else + { + old_cluster.tablespaces[tblnum] = psprintf("%s/%s", old_cluster.pgdata, spcloc); + new_cluster.tablespaces[tblnum] = psprintf("%s/%s", new_cluster.pgdata, spcloc); + } /* * Check that the tablespace path exists and is a directory. @@ -76,21 +115,21 @@ get_tablespace_paths(void) * that contains user tablespaces is moved as part of pg_upgrade * preparation and the symbolic links are not updated. */ - if (stat(os_info.old_tablespaces[tblnum], &statBuf) != 0) + if (stat(old_cluster.tablespaces[tblnum], &statBuf) != 0) { if (errno == ENOENT) report_status(PG_FATAL, "tablespace directory \"%s\" does not exist", - os_info.old_tablespaces[tblnum]); + old_cluster.tablespaces[tblnum]); else report_status(PG_FATAL, "could not stat tablespace directory \"%s\": %m", - os_info.old_tablespaces[tblnum]); + old_cluster.tablespaces[tblnum]); } if (!S_ISDIR(statBuf.st_mode)) report_status(PG_FATAL, "tablespace path \"%s\" is not a directory", - os_info.old_tablespaces[tblnum]); + old_cluster.tablespaces[tblnum]); } PQclear(res); From ee924698d566223e927cf9d505c1ccdacd7061c8 Mon Sep 17 00:00:00 2001 From: Nathan Bossart Date: Wed, 30 Jul 2025 13:04:47 -0500 Subject: [PATCH 400/602] doc: Adjust documentation for vacuumdb --missing-stats-only. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sentence in question gave readers the impression that vacuumdb removes statistics for a period of time while analyzing, but it's actually meant to convey that --analyze-in-stages temporarily replaces existing statistics with ones generated with lower statistics targets. Reported-by: Frédéric Yhuel Reviewed-by: Frédéric Yhuel Reviewed-by: "David G. Johnston" Reviewed-by: Corey Huinker Reviewed-by: Jeff Davis Discussion: https://postgr.es/m/4b94ca16-7a6d-4581-b2aa-4ea79dbc082a%40dalibo.com Backpatch-through: 18 --- doc/src/sgml/ref/vacuumdb.sgml | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/doc/src/sgml/ref/vacuumdb.sgml b/doc/src/sgml/ref/vacuumdb.sgml index b0680a61814cc..c7d9dca17b867 100644 --- a/doc/src/sgml/ref/vacuumdb.sgml +++ b/doc/src/sgml/ref/vacuumdb.sgml @@ -282,9 +282,11 @@ PostgreSQL documentation Only analyze relations that are missing statistics for a column, index - expression, or extended statistics object. This option prevents - vacuumdb from deleting existing statistics - so that the query optimizer's choices do not become transiently worse. + expression, or extended statistics object. When used with + , this option prevents + vacuumdb from temporarily replacing existing + statistics with ones generated with lower statistics targets, thus + avoiding transiently worse query optimizer choices. This option can only be used in conjunction with From e125e360020a7b0affd5bea938b749e85d8999d3 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Thu, 31 Jul 2025 10:06:34 +0900 Subject: [PATCH 401/602] Rename CachedPlanType to PlannedStmtOrigin for PlannedStmt Commit 719dcf3c42 introduced a field called CachedPlanType in PlannedStmt to allow extensions to determine whether a cached plan is generic or custom. After discussion, the concepts that we want to track are a bit wider than initially anticipated, as it is closer to knowing from which "source" or "origin" a PlannedStmt has been generated or retrieved. Custom and generic cached plans are a subset of that. Based on the state of HEAD, we have been able to define two more origins: - "standard", for the case where PlannedStmt is generated in standard_planner(), the most common case. - "internal", for the fake PlannedStmt generated internally by some query patterns. This could be tuned in the future depending on what is needed. This looks like a good starting point, at least. The default value is called "UNKNOWN", provided as fallback value. This value is not used in the core code, the idea is to let extensions building their own PlannedStmts know about this new field. Author: Michael Paquier Co-authored-by: Sami Imseih Discussion: https://postgr.es/m/aILaHupXbIGgF2wJ@paquier.xyz --- src/backend/commands/foreigncmds.c | 2 +- src/backend/commands/schemacmds.c | 2 +- src/backend/executor/execParallel.c | 2 +- src/backend/optimizer/plan/planner.c | 2 +- src/backend/tcop/postgres.c | 2 +- src/backend/tcop/utility.c | 4 ++-- src/backend/utils/cache/plancache.c | 2 +- src/include/nodes/plannodes.h | 21 +++++++++++---------- src/tools/pgindent/typedefs.list | 2 +- 9 files changed, 20 insertions(+), 19 deletions(-) diff --git a/src/backend/commands/foreigncmds.c b/src/backend/commands/foreigncmds.c index fcd5fcd8915e3..77f8461f42eee 100644 --- a/src/backend/commands/foreigncmds.c +++ b/src/backend/commands/foreigncmds.c @@ -1588,7 +1588,7 @@ ImportForeignSchema(ImportForeignSchemaStmt *stmt) pstmt->utilityStmt = (Node *) cstmt; pstmt->stmt_location = rs->stmt_location; pstmt->stmt_len = rs->stmt_len; - pstmt->cached_plan_type = PLAN_CACHE_NONE; + pstmt->planOrigin = PLAN_STMT_INTERNAL; /* Execute statement */ ProcessUtility(pstmt, cmd, false, diff --git a/src/backend/commands/schemacmds.c b/src/backend/commands/schemacmds.c index c00f1a11384f1..0f03d9743d203 100644 --- a/src/backend/commands/schemacmds.c +++ b/src/backend/commands/schemacmds.c @@ -215,7 +215,7 @@ CreateSchemaCommand(CreateSchemaStmt *stmt, const char *queryString, wrapper->utilityStmt = stmt; wrapper->stmt_location = stmt_location; wrapper->stmt_len = stmt_len; - wrapper->cached_plan_type = PLAN_CACHE_NONE; + wrapper->planOrigin = PLAN_STMT_INTERNAL; /* do this step */ ProcessUtility(wrapper, diff --git a/src/backend/executor/execParallel.c b/src/backend/executor/execParallel.c index fc76f22fb8238..f098a5557cf07 100644 --- a/src/backend/executor/execParallel.c +++ b/src/backend/executor/execParallel.c @@ -189,7 +189,7 @@ ExecSerializePlan(Plan *plan, EState *estate) pstmt->permInfos = estate->es_rteperminfos; pstmt->resultRelations = NIL; pstmt->appendRelations = NIL; - pstmt->cached_plan_type = PLAN_CACHE_NONE; + pstmt->planOrigin = PLAN_STMT_INTERNAL; /* * Transfer only parallel-safe subplans, leaving a NULL "hole" in the list diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index a77b2147e9592..d59d6e4c6a021 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -558,6 +558,7 @@ standard_planner(Query *parse, const char *query_string, int cursorOptions, result->commandType = parse->commandType; result->queryId = parse->queryId; + result->planOrigin = PLAN_STMT_STANDARD; result->hasReturning = (parse->returningList != NIL); result->hasModifyingCTE = parse->hasModifyingCTE; result->canSetTag = parse->canSetTag; @@ -582,7 +583,6 @@ standard_planner(Query *parse, const char *query_string, int cursorOptions, result->utilityStmt = parse->utilityStmt; result->stmt_location = parse->stmt_location; result->stmt_len = parse->stmt_len; - result->cached_plan_type = PLAN_CACHE_NONE; result->jitFlags = PGJIT_NONE; if (jit_enabled && jit_above_cost >= 0 && diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index a297606cdd7fa..0cecd4649020f 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -988,7 +988,7 @@ pg_plan_queries(List *querytrees, const char *query_string, int cursorOptions, stmt->stmt_location = query->stmt_location; stmt->stmt_len = query->stmt_len; stmt->queryId = query->queryId; - stmt->cached_plan_type = PLAN_CACHE_NONE; + stmt->planOrigin = PLAN_STMT_INTERNAL; } else { diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c index babc34d0cbe1d..4f4191b0ea6b4 100644 --- a/src/backend/tcop/utility.c +++ b/src/backend/tcop/utility.c @@ -1234,7 +1234,7 @@ ProcessUtilitySlow(ParseState *pstate, wrapper->utilityStmt = stmt; wrapper->stmt_location = pstmt->stmt_location; wrapper->stmt_len = pstmt->stmt_len; - wrapper->cached_plan_type = PLAN_CACHE_NONE; + wrapper->planOrigin = PLAN_STMT_INTERNAL; ProcessUtility(wrapper, queryString, @@ -1965,7 +1965,7 @@ ProcessUtilityForAlterTable(Node *stmt, AlterTableUtilityContext *context) wrapper->utilityStmt = stmt; wrapper->stmt_location = context->pstmt->stmt_location; wrapper->stmt_len = context->pstmt->stmt_len; - wrapper->cached_plan_type = PLAN_CACHE_NONE; + wrapper->planOrigin = PLAN_STMT_INTERNAL; ProcessUtility(wrapper, context->queryString, diff --git a/src/backend/utils/cache/plancache.c b/src/backend/utils/cache/plancache.c index f4d2b9458a5ea..0c506d320b137 100644 --- a/src/backend/utils/cache/plancache.c +++ b/src/backend/utils/cache/plancache.c @@ -1390,7 +1390,7 @@ GetCachedPlan(CachedPlanSource *plansource, ParamListInfo boundParams, { PlannedStmt *pstmt = (PlannedStmt *) lfirst(lc); - pstmt->cached_plan_type = customplan ? PLAN_CACHE_CUSTOM : PLAN_CACHE_GENERIC; + pstmt->planOrigin = customplan ? PLAN_STMT_CACHE_CUSTOM : PLAN_STMT_CACHE_GENERIC; } return plan; diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index 6d8e1e99db3bd..29d7732d6a031 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -29,18 +29,19 @@ */ /* ---------------- - * CachedPlanType + * PlannedStmtOrigin * - * CachedPlanType identifies whether a PlannedStmt is a cached plan, and if - * so, whether it is generic or custom. + * PlannedStmtOrigin identifies from where a PlannedStmt comes from. * ---------------- */ -typedef enum CachedPlanType +typedef enum PlannedStmtOrigin { - PLAN_CACHE_NONE = 0, /* Not a cached plan */ - PLAN_CACHE_GENERIC, /* Generic cached plan */ - PLAN_CACHE_CUSTOM, /* Custom cached plan */ -} CachedPlanType; + PLAN_STMT_UNKNOWN = 0, /* plan origin is not yet known */ + PLAN_STMT_INTERNAL, /* generated internally by a query */ + PLAN_STMT_STANDARD, /* standard planned statement */ + PLAN_STMT_CACHE_GENERIC, /* Generic cached plan */ + PLAN_STMT_CACHE_CUSTOM, /* Custom cached plan */ +} PlannedStmtOrigin; /* ---------------- * PlannedStmt node @@ -72,8 +73,8 @@ typedef struct PlannedStmt /* plan identifier (can be set by plugins) */ int64 planId; - /* type of cached plan */ - CachedPlanType cached_plan_type; + /* origin of plan */ + PlannedStmtOrigin planOrigin; /* is it insert|update|delete|merge RETURNING? */ bool hasReturning; diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 3daba26b23723..e6f2e93b2d6fa 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -391,7 +391,6 @@ CachedFunctionHashEntry CachedFunctionHashKey CachedPlan CachedPlanSource -CachedPlanType CallContext CallStmt CancelRequestPacket @@ -2276,6 +2275,7 @@ PlanInvalItem PlanRowMark PlanState PlannedStmt +PlannedStmtOrigin PlannerGlobal PlannerInfo PlannerParamItem From 3357471cf9f5e470dfed0c7919bcf31c7efaf2b9 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Thu, 31 Jul 2025 11:20:29 +0900 Subject: [PATCH 402/602] pg_stat_statements: Add counters for generic and custom plans This patch adds two new counters to pg_stat_statements: - generic_plan_calls - custom_plan_calls These counters track how many times a prepared statement was executed using a generic or custom plan, respectively, providing a global equivalent at query level, for top and non-top levels, of pg_prepared_statements whose data is restricted to a single session. This commit builds upon e125e360020a. The module is bumped to version 1.13. PGSS_FILE_HEADER is bumped as well, something that the latest patches touching the on-disk format of the PGSS file did not actually bother with since 2022.. Author: Sami Imseih Reviewed-by: Ilia Evdokimov Reviewed-by: Andrei Lepikhov Reviewed-by: Michael Paquier Reviewed-by: Nikolay Samokhvalov Discussion: https://postgr.es/m/CAA5RZ0uFw8Y9GCFvafhC=OA8NnMqVZyzXPfv_EePOt+iv1T-qQ@mail.gmail.com --- contrib/pg_stat_statements/Makefile | 3 +- .../expected/oldextversions.out | 67 ++++++ .../pg_stat_statements/expected/plancache.out | 224 ++++++++++++++++++ contrib/pg_stat_statements/meson.build | 2 + .../pg_stat_statements--1.12--1.13.sql | 78 ++++++ .../pg_stat_statements/pg_stat_statements.c | 53 ++++- .../pg_stat_statements.control | 2 +- .../pg_stat_statements/sql/oldextversions.sql | 5 + contrib/pg_stat_statements/sql/plancache.sql | 94 ++++++++ doc/src/sgml/pgstatstatements.sgml | 18 ++ 10 files changed, 536 insertions(+), 10 deletions(-) create mode 100644 contrib/pg_stat_statements/expected/plancache.out create mode 100644 contrib/pg_stat_statements/pg_stat_statements--1.12--1.13.sql create mode 100644 contrib/pg_stat_statements/sql/plancache.sql diff --git a/contrib/pg_stat_statements/Makefile b/contrib/pg_stat_statements/Makefile index b2bd8794d2a14..fe0478ac55266 100644 --- a/contrib/pg_stat_statements/Makefile +++ b/contrib/pg_stat_statements/Makefile @@ -7,6 +7,7 @@ OBJS = \ EXTENSION = pg_stat_statements DATA = pg_stat_statements--1.4.sql \ + pg_stat_statements--1.12--1.13.sql \ pg_stat_statements--1.11--1.12.sql pg_stat_statements--1.10--1.11.sql \ pg_stat_statements--1.9--1.10.sql pg_stat_statements--1.8--1.9.sql \ pg_stat_statements--1.7--1.8.sql pg_stat_statements--1.6--1.7.sql \ @@ -20,7 +21,7 @@ LDFLAGS_SL += $(filter -lm, $(LIBS)) REGRESS_OPTS = --temp-config $(top_srcdir)/contrib/pg_stat_statements/pg_stat_statements.conf REGRESS = select dml cursors utility level_tracking planning \ user_activity wal entry_timestamp privileges extended \ - parallel cleanup oldextversions squashing + parallel plancache cleanup oldextversions squashing # Disabled because these tests require "shared_preload_libraries=pg_stat_statements", # which typical installcheck users do not have (e.g. buildfarm clients). NO_INSTALLCHECK = 1 diff --git a/contrib/pg_stat_statements/expected/oldextversions.out b/contrib/pg_stat_statements/expected/oldextversions.out index de679b19711ab..726383a99d7c1 100644 --- a/contrib/pg_stat_statements/expected/oldextversions.out +++ b/contrib/pg_stat_statements/expected/oldextversions.out @@ -407,4 +407,71 @@ SELECT count(*) > 0 AS has_data FROM pg_stat_statements; t (1 row) +-- New functions and views for pg_stat_statements in 1.13 +AlTER EXTENSION pg_stat_statements UPDATE TO '1.13'; +\d pg_stat_statements + View "public.pg_stat_statements" + Column | Type | Collation | Nullable | Default +----------------------------+--------------------------+-----------+----------+--------- + userid | oid | | | + dbid | oid | | | + toplevel | boolean | | | + queryid | bigint | | | + query | text | | | + plans | bigint | | | + total_plan_time | double precision | | | + min_plan_time | double precision | | | + max_plan_time | double precision | | | + mean_plan_time | double precision | | | + stddev_plan_time | double precision | | | + calls | bigint | | | + total_exec_time | double precision | | | + min_exec_time | double precision | | | + max_exec_time | double precision | | | + mean_exec_time | double precision | | | + stddev_exec_time | double precision | | | + rows | bigint | | | + shared_blks_hit | bigint | | | + shared_blks_read | bigint | | | + shared_blks_dirtied | bigint | | | + shared_blks_written | bigint | | | + local_blks_hit | bigint | | | + local_blks_read | bigint | | | + local_blks_dirtied | bigint | | | + local_blks_written | bigint | | | + temp_blks_read | bigint | | | + temp_blks_written | bigint | | | + shared_blk_read_time | double precision | | | + shared_blk_write_time | double precision | | | + local_blk_read_time | double precision | | | + local_blk_write_time | double precision | | | + temp_blk_read_time | double precision | | | + temp_blk_write_time | double precision | | | + wal_records | bigint | | | + wal_fpi | bigint | | | + wal_bytes | numeric | | | + wal_buffers_full | bigint | | | + jit_functions | bigint | | | + jit_generation_time | double precision | | | + jit_inlining_count | bigint | | | + jit_inlining_time | double precision | | | + jit_optimization_count | bigint | | | + jit_optimization_time | double precision | | | + jit_emission_count | bigint | | | + jit_emission_time | double precision | | | + jit_deform_count | bigint | | | + jit_deform_time | double precision | | | + parallel_workers_to_launch | bigint | | | + parallel_workers_launched | bigint | | | + generic_plan_calls | bigint | | | + custom_plan_calls | bigint | | | + stats_since | timestamp with time zone | | | + minmax_stats_since | timestamp with time zone | | | + +SELECT count(*) > 0 AS has_data FROM pg_stat_statements; + has_data +---------- + t +(1 row) + DROP EXTENSION pg_stat_statements; diff --git a/contrib/pg_stat_statements/expected/plancache.out b/contrib/pg_stat_statements/expected/plancache.out new file mode 100644 index 0000000000000..e152de9f55130 --- /dev/null +++ b/contrib/pg_stat_statements/expected/plancache.out @@ -0,0 +1,224 @@ +-- +-- Tests with plan cache +-- +-- Setup +CREATE OR REPLACE FUNCTION select_one_func(int) RETURNS VOID AS $$ +DECLARE + ret INT; +BEGIN + SELECT $1 INTO ret; +END; +$$ LANGUAGE plpgsql; +CREATE OR REPLACE PROCEDURE select_one_proc(int) AS $$ +DECLARE + ret INT; +BEGIN + SELECT $1 INTO ret; +END; +$$ LANGUAGE plpgsql; +-- Prepared statements +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +PREPARE p1 AS SELECT $1 AS a; +SET plan_cache_mode TO force_generic_plan; +EXECUTE p1(1); + a +--- + 1 +(1 row) + +SET plan_cache_mode TO force_custom_plan; +EXECUTE p1(1); + a +--- + 1 +(1 row) + +SELECT calls, generic_plan_calls, custom_plan_calls, query FROM pg_stat_statements + ORDER BY query COLLATE "C"; + calls | generic_plan_calls | custom_plan_calls | query +-------+--------------------+-------------------+---------------------------------------------------- + 2 | 1 | 1 | PREPARE p1 AS SELECT $1 AS a + 1 | 0 | 0 | SELECT pg_stat_statements_reset() IS NOT NULL AS t + 2 | 0 | 0 | SET plan_cache_mode TO $1 +(3 rows) + +DEALLOCATE p1; +-- Extended query protocol +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +SELECT $1 AS a \parse p1 +SET plan_cache_mode TO force_generic_plan; +\bind_named p1 1 +; + a +--- + 1 +(1 row) + +SET plan_cache_mode TO force_custom_plan; +\bind_named p1 1 +; + a +--- + 1 +(1 row) + +SELECT calls, generic_plan_calls, custom_plan_calls, query FROM pg_stat_statements + ORDER BY query COLLATE "C"; + calls | generic_plan_calls | custom_plan_calls | query +-------+--------------------+-------------------+---------------------------------------------------- + 2 | 1 | 1 | SELECT $1 AS a + 1 | 0 | 0 | SELECT pg_stat_statements_reset() IS NOT NULL AS t + 2 | 0 | 0 | SET plan_cache_mode TO $1 +(3 rows) + +\close_prepared p1 +-- EXPLAIN [ANALYZE] EXECUTE +SET pg_stat_statements.track = 'all'; +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +PREPARE p1 AS SELECT $1; +SET plan_cache_mode TO force_generic_plan; +EXPLAIN (COSTS OFF) EXECUTE p1(1); + QUERY PLAN +------------ + Result +(1 row) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF) EXECUTE p1(1); + QUERY PLAN +----------------------------------- + Result (actual rows=1.00 loops=1) +(1 row) + +SET plan_cache_mode TO force_custom_plan; +EXPLAIN (COSTS OFF) EXECUTE p1(1); + QUERY PLAN +------------ + Result +(1 row) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF) EXECUTE p1(1); + QUERY PLAN +----------------------------------- + Result (actual rows=1.00 loops=1) +(1 row) + +SELECT calls, generic_plan_calls, custom_plan_calls, toplevel, query FROM pg_stat_statements + ORDER BY query COLLATE "C"; + calls | generic_plan_calls | custom_plan_calls | toplevel | query +-------+--------------------+-------------------+----------+---------------------------------------------------------------------------------- + 2 | 0 | 0 | t | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF) EXECUTE p1(1) + 2 | 0 | 0 | t | EXPLAIN (COSTS OFF) EXECUTE p1(1) + 4 | 2 | 2 | f | PREPARE p1 AS SELECT $1 + 1 | 0 | 0 | t | SELECT pg_stat_statements_reset() IS NOT NULL AS t + 2 | 0 | 0 | t | SET plan_cache_mode TO $1 +(5 rows) + +RESET pg_stat_statements.track; +DEALLOCATE p1; +-- Functions/procedures +SET pg_stat_statements.track = 'all'; +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +SET plan_cache_mode TO force_generic_plan; +SELECT select_one_func(1); + select_one_func +----------------- + +(1 row) + +CALL select_one_proc(1); +SET plan_cache_mode TO force_custom_plan; +SELECT select_one_func(1); + select_one_func +----------------- + +(1 row) + +CALL select_one_proc(1); +SELECT calls, generic_plan_calls, custom_plan_calls, toplevel, query FROM pg_stat_statements + ORDER BY query COLLATE "C"; + calls | generic_plan_calls | custom_plan_calls | toplevel | query +-------+--------------------+-------------------+----------+---------------------------------------------------- + 2 | 0 | 0 | t | CALL select_one_proc($1) + 4 | 2 | 2 | f | SELECT $1 + 1 | 0 | 0 | t | SELECT pg_stat_statements_reset() IS NOT NULL AS t + 2 | 0 | 0 | t | SELECT select_one_func($1) + 2 | 0 | 0 | t | SET plan_cache_mode TO $1 +(5 rows) + +-- +-- EXPLAIN [ANALYZE] EXECUTE + functions/procedures +-- +SET pg_stat_statements.track = 'all'; +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +SET plan_cache_mode TO force_generic_plan; +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF) SELECT select_one_func(1); + QUERY PLAN +----------------------------------- + Result (actual rows=1.00 loops=1) +(1 row) + +EXPLAIN (COSTS OFF) SELECT select_one_func(1); + QUERY PLAN +------------ + Result +(1 row) + +CALL select_one_proc(1); +SET plan_cache_mode TO force_custom_plan; +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF) SELECT select_one_func(1); + QUERY PLAN +----------------------------------- + Result (actual rows=1.00 loops=1) +(1 row) + +EXPLAIN (COSTS OFF) SELECT select_one_func(1); + QUERY PLAN +------------ + Result +(1 row) + +CALL select_one_proc(1); +SELECT calls, generic_plan_calls, custom_plan_calls, toplevel, query FROM pg_stat_statements + ORDER BY query COLLATE "C", toplevel; + calls | generic_plan_calls | custom_plan_calls | toplevel | query +-------+--------------------+-------------------+----------+------------------------------------------------------------------------------------------------ + 2 | 0 | 0 | t | CALL select_one_proc($1) + 2 | 0 | 0 | t | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF) SELECT select_one_func($1) + 4 | 0 | 0 | f | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF) SELECT select_one_func($1); + 2 | 0 | 0 | t | EXPLAIN (COSTS OFF) SELECT select_one_func($1) + 4 | 2 | 2 | f | SELECT $1 + 1 | 0 | 0 | t | SELECT pg_stat_statements_reset() IS NOT NULL AS t + 2 | 0 | 0 | t | SET plan_cache_mode TO $1 +(7 rows) + +RESET pg_stat_statements.track; +-- +-- Cleanup +-- +DROP FUNCTION select_one_func(int); +DROP PROCEDURE select_one_proc(int); diff --git a/contrib/pg_stat_statements/meson.build b/contrib/pg_stat_statements/meson.build index 01a6cbdcf6139..7b8bfbb1de78c 100644 --- a/contrib/pg_stat_statements/meson.build +++ b/contrib/pg_stat_statements/meson.build @@ -21,6 +21,7 @@ contrib_targets += pg_stat_statements install_data( 'pg_stat_statements.control', 'pg_stat_statements--1.4.sql', + 'pg_stat_statements--1.12--1.13.sql', 'pg_stat_statements--1.11--1.12.sql', 'pg_stat_statements--1.10--1.11.sql', 'pg_stat_statements--1.9--1.10.sql', @@ -54,6 +55,7 @@ tests += { 'privileges', 'extended', 'parallel', + 'plancache', 'cleanup', 'oldextversions', 'squashing', diff --git a/contrib/pg_stat_statements/pg_stat_statements--1.12--1.13.sql b/contrib/pg_stat_statements/pg_stat_statements--1.12--1.13.sql new file mode 100644 index 0000000000000..2f0eaf14ec34d --- /dev/null +++ b/contrib/pg_stat_statements/pg_stat_statements--1.12--1.13.sql @@ -0,0 +1,78 @@ +/* contrib/pg_stat_statements/pg_stat_statements--1.12--1.13.sql */ + +-- complain if script is sourced in psql, rather than via ALTER EXTENSION +\echo Use "ALTER EXTENSION pg_stat_statements UPDATE TO '1.13'" to load this file. \quit + +/* First we have to remove them from the extension */ +ALTER EXTENSION pg_stat_statements DROP VIEW pg_stat_statements; +ALTER EXTENSION pg_stat_statements DROP FUNCTION pg_stat_statements(boolean); + +/* Then we can drop them */ +DROP VIEW pg_stat_statements; +DROP FUNCTION pg_stat_statements(boolean); + +/* Now redefine */ +CREATE FUNCTION pg_stat_statements(IN showtext boolean, + OUT userid oid, + OUT dbid oid, + OUT toplevel bool, + OUT queryid bigint, + OUT query text, + OUT plans int8, + OUT total_plan_time float8, + OUT min_plan_time float8, + OUT max_plan_time float8, + OUT mean_plan_time float8, + OUT stddev_plan_time float8, + OUT calls int8, + OUT total_exec_time float8, + OUT min_exec_time float8, + OUT max_exec_time float8, + OUT mean_exec_time float8, + OUT stddev_exec_time float8, + OUT rows int8, + OUT shared_blks_hit int8, + OUT shared_blks_read int8, + OUT shared_blks_dirtied int8, + OUT shared_blks_written int8, + OUT local_blks_hit int8, + OUT local_blks_read int8, + OUT local_blks_dirtied int8, + OUT local_blks_written int8, + OUT temp_blks_read int8, + OUT temp_blks_written int8, + OUT shared_blk_read_time float8, + OUT shared_blk_write_time float8, + OUT local_blk_read_time float8, + OUT local_blk_write_time float8, + OUT temp_blk_read_time float8, + OUT temp_blk_write_time float8, + OUT wal_records int8, + OUT wal_fpi int8, + OUT wal_bytes numeric, + OUT wal_buffers_full int8, + OUT jit_functions int8, + OUT jit_generation_time float8, + OUT jit_inlining_count int8, + OUT jit_inlining_time float8, + OUT jit_optimization_count int8, + OUT jit_optimization_time float8, + OUT jit_emission_count int8, + OUT jit_emission_time float8, + OUT jit_deform_count int8, + OUT jit_deform_time float8, + OUT parallel_workers_to_launch int8, + OUT parallel_workers_launched int8, + OUT generic_plan_calls int8, + OUT custom_plan_calls int8, + OUT stats_since timestamp with time zone, + OUT minmax_stats_since timestamp with time zone +) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'pg_stat_statements_1_13' +LANGUAGE C STRICT VOLATILE PARALLEL SAFE; + +CREATE VIEW pg_stat_statements AS + SELECT * FROM pg_stat_statements(true); + +GRANT SELECT ON pg_stat_statements TO PUBLIC; diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c index e7857f81ec057..9fc9635d3300d 100644 --- a/contrib/pg_stat_statements/pg_stat_statements.c +++ b/contrib/pg_stat_statements/pg_stat_statements.c @@ -85,7 +85,7 @@ PG_MODULE_MAGIC_EXT( #define PGSS_TEXT_FILE PG_STAT_TMP_DIR "/pgss_query_texts.stat" /* Magic number identifying the stats file format */ -static const uint32 PGSS_FILE_HEADER = 0x20220408; +static const uint32 PGSS_FILE_HEADER = 0x20250731; /* PostgreSQL major version number, changes in which invalidate all entries */ static const uint32 PGSS_PG_MAJOR_VERSION = PG_VERSION_NUM / 100; @@ -114,6 +114,7 @@ typedef enum pgssVersion PGSS_V1_10, PGSS_V1_11, PGSS_V1_12, + PGSS_V1_13, } pgssVersion; typedef enum pgssStoreKind @@ -210,6 +211,8 @@ typedef struct Counters * to be launched */ int64 parallel_workers_launched; /* # of parallel workers actually * launched */ + int64 generic_plan_calls; /* number of calls using a generic plan */ + int64 custom_plan_calls; /* number of calls using a custom plan */ } Counters; /* @@ -323,6 +326,7 @@ PG_FUNCTION_INFO_V1(pg_stat_statements_1_9); PG_FUNCTION_INFO_V1(pg_stat_statements_1_10); PG_FUNCTION_INFO_V1(pg_stat_statements_1_11); PG_FUNCTION_INFO_V1(pg_stat_statements_1_12); +PG_FUNCTION_INFO_V1(pg_stat_statements_1_13); PG_FUNCTION_INFO_V1(pg_stat_statements); PG_FUNCTION_INFO_V1(pg_stat_statements_info); @@ -355,7 +359,8 @@ static void pgss_store(const char *query, int64 queryId, const struct JitInstrumentation *jitusage, JumbleState *jstate, int parallel_workers_to_launch, - int parallel_workers_launched); + int parallel_workers_launched, + PlannedStmtOrigin planOrigin); static void pg_stat_statements_internal(FunctionCallInfo fcinfo, pgssVersion api_version, bool showtext); @@ -877,7 +882,8 @@ pgss_post_parse_analyze(ParseState *pstate, Query *query, JumbleState *jstate) NULL, jstate, 0, - 0); + 0, + PLAN_STMT_UNKNOWN); } /* @@ -957,7 +963,8 @@ pgss_planner(Query *parse, NULL, NULL, 0, - 0); + 0, + result->planOrigin); } else { @@ -1091,7 +1098,8 @@ pgss_ExecutorEnd(QueryDesc *queryDesc) queryDesc->estate->es_jit ? &queryDesc->estate->es_jit->instr : NULL, NULL, queryDesc->estate->es_parallel_workers_to_launch, - queryDesc->estate->es_parallel_workers_launched); + queryDesc->estate->es_parallel_workers_launched, + queryDesc->plannedstmt->planOrigin); } if (prev_ExecutorEnd) @@ -1224,7 +1232,8 @@ pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString, NULL, NULL, 0, - 0); + 0, + pstmt->planOrigin); } else { @@ -1287,7 +1296,8 @@ pgss_store(const char *query, int64 queryId, const struct JitInstrumentation *jitusage, JumbleState *jstate, int parallel_workers_to_launch, - int parallel_workers_launched) + int parallel_workers_launched, + PlannedStmtOrigin planOrigin) { pgssHashKey key; pgssEntry *entry; @@ -1495,6 +1505,12 @@ pgss_store(const char *query, int64 queryId, entry->counters.parallel_workers_to_launch += parallel_workers_to_launch; entry->counters.parallel_workers_launched += parallel_workers_launched; + /* plan cache counters */ + if (planOrigin == PLAN_STMT_CACHE_GENERIC) + entry->counters.generic_plan_calls++; + else if (planOrigin == PLAN_STMT_CACHE_CUSTOM) + entry->counters.custom_plan_calls++; + SpinLockRelease(&entry->mutex); } @@ -1562,7 +1578,8 @@ pg_stat_statements_reset(PG_FUNCTION_ARGS) #define PG_STAT_STATEMENTS_COLS_V1_10 43 #define PG_STAT_STATEMENTS_COLS_V1_11 49 #define PG_STAT_STATEMENTS_COLS_V1_12 52 -#define PG_STAT_STATEMENTS_COLS 52 /* maximum of above */ +#define PG_STAT_STATEMENTS_COLS_V1_13 54 +#define PG_STAT_STATEMENTS_COLS 54 /* maximum of above */ /* * Retrieve statement statistics. @@ -1574,6 +1591,16 @@ pg_stat_statements_reset(PG_FUNCTION_ARGS) * expected API version is identified by embedding it in the C name of the * function. Unfortunately we weren't bright enough to do that for 1.1. */ +Datum +pg_stat_statements_1_13(PG_FUNCTION_ARGS) +{ + bool showtext = PG_GETARG_BOOL(0); + + pg_stat_statements_internal(fcinfo, PGSS_V1_13, showtext); + + return (Datum) 0; +} + Datum pg_stat_statements_1_12(PG_FUNCTION_ARGS) { @@ -1732,6 +1759,10 @@ pg_stat_statements_internal(FunctionCallInfo fcinfo, if (api_version != PGSS_V1_12) elog(ERROR, "incorrect number of output arguments"); break; + case PG_STAT_STATEMENTS_COLS_V1_13: + if (api_version != PGSS_V1_13) + elog(ERROR, "incorrect number of output arguments"); + break; default: elog(ERROR, "incorrect number of output arguments"); } @@ -1984,6 +2015,11 @@ pg_stat_statements_internal(FunctionCallInfo fcinfo, values[i++] = Int64GetDatumFast(tmp.parallel_workers_to_launch); values[i++] = Int64GetDatumFast(tmp.parallel_workers_launched); } + if (api_version >= PGSS_V1_13) + { + values[i++] = Int64GetDatumFast(tmp.generic_plan_calls); + values[i++] = Int64GetDatumFast(tmp.custom_plan_calls); + } if (api_version >= PGSS_V1_11) { values[i++] = TimestampTzGetDatum(stats_since); @@ -1999,6 +2035,7 @@ pg_stat_statements_internal(FunctionCallInfo fcinfo, api_version == PGSS_V1_10 ? PG_STAT_STATEMENTS_COLS_V1_10 : api_version == PGSS_V1_11 ? PG_STAT_STATEMENTS_COLS_V1_11 : api_version == PGSS_V1_12 ? PG_STAT_STATEMENTS_COLS_V1_12 : + api_version == PGSS_V1_13 ? PG_STAT_STATEMENTS_COLS_V1_13 : -1 /* fail if you forget to update this assert */ )); tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls); diff --git a/contrib/pg_stat_statements/pg_stat_statements.control b/contrib/pg_stat_statements/pg_stat_statements.control index d45ebc12e3605..2eee0ceffa894 100644 --- a/contrib/pg_stat_statements/pg_stat_statements.control +++ b/contrib/pg_stat_statements/pg_stat_statements.control @@ -1,5 +1,5 @@ # pg_stat_statements extension comment = 'track planning and execution statistics of all SQL statements executed' -default_version = '1.12' +default_version = '1.13' module_pathname = '$libdir/pg_stat_statements' relocatable = true diff --git a/contrib/pg_stat_statements/sql/oldextversions.sql b/contrib/pg_stat_statements/sql/oldextversions.sql index 13b8ca28586d1..e416efe9ffbee 100644 --- a/contrib/pg_stat_statements/sql/oldextversions.sql +++ b/contrib/pg_stat_statements/sql/oldextversions.sql @@ -63,4 +63,9 @@ AlTER EXTENSION pg_stat_statements UPDATE TO '1.12'; \d pg_stat_statements SELECT count(*) > 0 AS has_data FROM pg_stat_statements; +-- New functions and views for pg_stat_statements in 1.13 +AlTER EXTENSION pg_stat_statements UPDATE TO '1.13'; +\d pg_stat_statements +SELECT count(*) > 0 AS has_data FROM pg_stat_statements; + DROP EXTENSION pg_stat_statements; diff --git a/contrib/pg_stat_statements/sql/plancache.sql b/contrib/pg_stat_statements/sql/plancache.sql new file mode 100644 index 0000000000000..160ced7add368 --- /dev/null +++ b/contrib/pg_stat_statements/sql/plancache.sql @@ -0,0 +1,94 @@ +-- +-- Tests with plan cache +-- + +-- Setup +CREATE OR REPLACE FUNCTION select_one_func(int) RETURNS VOID AS $$ +DECLARE + ret INT; +BEGIN + SELECT $1 INTO ret; +END; +$$ LANGUAGE plpgsql; +CREATE OR REPLACE PROCEDURE select_one_proc(int) AS $$ +DECLARE + ret INT; +BEGIN + SELECT $1 INTO ret; +END; +$$ LANGUAGE plpgsql; + +-- Prepared statements +SELECT pg_stat_statements_reset() IS NOT NULL AS t; +PREPARE p1 AS SELECT $1 AS a; +SET plan_cache_mode TO force_generic_plan; +EXECUTE p1(1); +SET plan_cache_mode TO force_custom_plan; +EXECUTE p1(1); +SELECT calls, generic_plan_calls, custom_plan_calls, query FROM pg_stat_statements + ORDER BY query COLLATE "C"; +DEALLOCATE p1; + +-- Extended query protocol +SELECT pg_stat_statements_reset() IS NOT NULL AS t; +SELECT $1 AS a \parse p1 +SET plan_cache_mode TO force_generic_plan; +\bind_named p1 1 +; +SET plan_cache_mode TO force_custom_plan; +\bind_named p1 1 +; +SELECT calls, generic_plan_calls, custom_plan_calls, query FROM pg_stat_statements + ORDER BY query COLLATE "C"; +\close_prepared p1 + +-- EXPLAIN [ANALYZE] EXECUTE +SET pg_stat_statements.track = 'all'; +SELECT pg_stat_statements_reset() IS NOT NULL AS t; +PREPARE p1 AS SELECT $1; +SET plan_cache_mode TO force_generic_plan; +EXPLAIN (COSTS OFF) EXECUTE p1(1); +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF) EXECUTE p1(1); +SET plan_cache_mode TO force_custom_plan; +EXPLAIN (COSTS OFF) EXECUTE p1(1); +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF) EXECUTE p1(1); +SELECT calls, generic_plan_calls, custom_plan_calls, toplevel, query FROM pg_stat_statements + ORDER BY query COLLATE "C"; +RESET pg_stat_statements.track; +DEALLOCATE p1; + +-- Functions/procedures +SET pg_stat_statements.track = 'all'; +SELECT pg_stat_statements_reset() IS NOT NULL AS t; +SET plan_cache_mode TO force_generic_plan; +SELECT select_one_func(1); +CALL select_one_proc(1); +SET plan_cache_mode TO force_custom_plan; +SELECT select_one_func(1); +CALL select_one_proc(1); +SELECT calls, generic_plan_calls, custom_plan_calls, toplevel, query FROM pg_stat_statements + ORDER BY query COLLATE "C"; + +-- +-- EXPLAIN [ANALYZE] EXECUTE + functions/procedures +-- +SET pg_stat_statements.track = 'all'; +SELECT pg_stat_statements_reset() IS NOT NULL AS t; +SET plan_cache_mode TO force_generic_plan; +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF) SELECT select_one_func(1); +EXPLAIN (COSTS OFF) SELECT select_one_func(1); +CALL select_one_proc(1); +SET plan_cache_mode TO force_custom_plan; +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF) SELECT select_one_func(1); +EXPLAIN (COSTS OFF) SELECT select_one_func(1); +CALL select_one_proc(1); +SELECT calls, generic_plan_calls, custom_plan_calls, toplevel, query FROM pg_stat_statements + ORDER BY query COLLATE "C", toplevel; + +RESET pg_stat_statements.track; + +-- +-- Cleanup +-- +DROP FUNCTION select_one_func(int); +DROP PROCEDURE select_one_proc(int); diff --git a/doc/src/sgml/pgstatstatements.sgml b/doc/src/sgml/pgstatstatements.sgml index 7baa07dcdbf7f..d753de5836efb 100644 --- a/doc/src/sgml/pgstatstatements.sgml +++ b/doc/src/sgml/pgstatstatements.sgml @@ -554,6 +554,24 @@ + + + generic_plan_calls bigint + + + Number of times the statement has been executed using a generic plan + + + + + + custom_plan_calls bigint + + + Number of times the statement has been executed using a custom plan + + + stats_since timestamp with time zone From 0decd5e89db9f5edb9b27351082f0d74aae7a9b6 Mon Sep 17 00:00:00 2001 From: Noah Misch Date: Thu, 31 Jul 2025 06:37:56 -0700 Subject: [PATCH 403/602] Sort dump objects independent of OIDs, for the 7 holdout object types. pg_dump sorts objects by their logical names, e.g. (nspname, relname, tgname), before dependency-driven reordering. That removes one source of logically-identical databases differing in their schema-only dumps. In other words, it helps with schema diffing. The logical name sort ignored essential sort keys for constraints, operators, PUBLICATION ... FOR TABLE, PUBLICATION ... FOR TABLES IN SCHEMA, operator classes, and operator families. pg_dump's sort then depended on object OID, yielding spurious schema diffs. After this change, OIDs affect dump order only in the event of catalog corruption. While pg_dump also wrongly ignored pg_collation.collencoding, CREATE COLLATION restrictions have been keeping that imperceptible in practical use. Use techniques like we use for object types already having full sort key coverage. Where the pertinent queries weren't fetching the ignored sort keys, this adds columns to those queries and stores those keys in memory for the long term. The ignorance of sort keys became more problematic when commit 172259afb563d35001410dc6daad78b250924038 added a schema diff test sensitive to it. Buildfarm member hippopotamus witnessed that. However, dump order stability isn't a new goal, and this might avoid other dump comparison failures. Hence, back-patch to v13 (all supported versions). Reviewed-by: Robert Haas Discussion: https://postgr.es/m/20250707192654.9e.nmisch@google.com Backpatch-through: 13 --- src/bin/pg_dump/common.c | 19 ++ src/bin/pg_dump/pg_dump.c | 59 +++++- src/bin/pg_dump/pg_dump.h | 6 + src/bin/pg_dump/pg_dump_sort.c | 238 ++++++++++++++++++++-- src/test/regress/expected/publication.out | 21 ++ src/test/regress/sql/publication.sql | 22 ++ 6 files changed, 335 insertions(+), 30 deletions(-) diff --git a/src/bin/pg_dump/common.c b/src/bin/pg_dump/common.c index aa1589e3331d2..a1976fae607d6 100644 --- a/src/bin/pg_dump/common.c +++ b/src/bin/pg_dump/common.c @@ -17,6 +17,7 @@ #include +#include "catalog/pg_am_d.h" #include "catalog/pg_class_d.h" #include "catalog/pg_collation_d.h" #include "catalog/pg_extension_d.h" @@ -944,6 +945,24 @@ findOprByOid(Oid oid) return (OprInfo *) dobj; } +/* + * findAccessMethodByOid + * finds the DumpableObject for the access method with the given oid + * returns NULL if not found + */ +AccessMethodInfo * +findAccessMethodByOid(Oid oid) +{ + CatalogId catId; + DumpableObject *dobj; + + catId.tableoid = AccessMethodRelationId; + catId.oid = oid; + dobj = findObjectByCatalogId(catId); + Assert(dobj == NULL || dobj->objType == DO_ACCESS_METHOD); + return (AccessMethodInfo *) dobj; +} + /* * findCollationByOid * finds the DumpableObject for the collation with the given oid diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 1da6bd7d9726c..273117c977c52 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -2207,6 +2207,13 @@ selectDumpableProcLang(ProcLangInfo *plang, Archive *fout) static void selectDumpableAccessMethod(AccessMethodInfo *method, Archive *fout) { + /* see getAccessMethods() comment about v9.6. */ + if (fout->remoteVersion < 90600) + { + method->dobj.dump = DUMP_COMPONENT_NONE; + return; + } + if (checkExtensionMembership(&method->dobj, fout)) return; /* extension membership overrides all else */ @@ -6262,6 +6269,8 @@ getOperators(Archive *fout) int i_oprnamespace; int i_oprowner; int i_oprkind; + int i_oprleft; + int i_oprright; int i_oprcode; /* @@ -6273,6 +6282,8 @@ getOperators(Archive *fout) "oprnamespace, " "oprowner, " "oprkind, " + "oprleft, " + "oprright, " "oprcode::oid AS oprcode " "FROM pg_operator"); @@ -6288,6 +6299,8 @@ getOperators(Archive *fout) i_oprnamespace = PQfnumber(res, "oprnamespace"); i_oprowner = PQfnumber(res, "oprowner"); i_oprkind = PQfnumber(res, "oprkind"); + i_oprleft = PQfnumber(res, "oprleft"); + i_oprright = PQfnumber(res, "oprright"); i_oprcode = PQfnumber(res, "oprcode"); for (i = 0; i < ntups; i++) @@ -6301,6 +6314,8 @@ getOperators(Archive *fout) findNamespace(atooid(PQgetvalue(res, i, i_oprnamespace))); oprinfo[i].rolname = getRoleName(PQgetvalue(res, i, i_oprowner)); oprinfo[i].oprkind = (PQgetvalue(res, i, i_oprkind))[0]; + oprinfo[i].oprleft = atooid(PQgetvalue(res, i, i_oprleft)); + oprinfo[i].oprright = atooid(PQgetvalue(res, i, i_oprright)); oprinfo[i].oprcode = atooid(PQgetvalue(res, i, i_oprcode)); /* Decide whether we want to dump it */ @@ -6329,6 +6344,7 @@ getCollations(Archive *fout) int i_collname; int i_collnamespace; int i_collowner; + int i_collencoding; query = createPQExpBuffer(); @@ -6339,7 +6355,8 @@ getCollations(Archive *fout) appendPQExpBufferStr(query, "SELECT tableoid, oid, collname, " "collnamespace, " - "collowner " + "collowner, " + "collencoding " "FROM pg_collation"); res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK); @@ -6353,6 +6370,7 @@ getCollations(Archive *fout) i_collname = PQfnumber(res, "collname"); i_collnamespace = PQfnumber(res, "collnamespace"); i_collowner = PQfnumber(res, "collowner"); + i_collencoding = PQfnumber(res, "collencoding"); for (i = 0; i < ntups; i++) { @@ -6364,6 +6382,7 @@ getCollations(Archive *fout) collinfo[i].dobj.namespace = findNamespace(atooid(PQgetvalue(res, i, i_collnamespace))); collinfo[i].rolname = getRoleName(PQgetvalue(res, i, i_collowner)); + collinfo[i].collencoding = atoi(PQgetvalue(res, i, i_collencoding)); /* Decide whether we want to dump it */ selectDumpableObject(&(collinfo[i].dobj), fout); @@ -6454,16 +6473,28 @@ getAccessMethods(Archive *fout) int i_amhandler; int i_amtype; - /* Before 9.6, there are no user-defined access methods */ - if (fout->remoteVersion < 90600) - return; - query = createPQExpBuffer(); - /* Select all access methods from pg_am table */ - appendPQExpBufferStr(query, "SELECT tableoid, oid, amname, amtype, " - "amhandler::pg_catalog.regproc AS amhandler " - "FROM pg_am"); + /* + * Select all access methods from pg_am table. v9.6 introduced CREATE + * ACCESS METHOD, so earlier versions usually have only built-in access + * methods. v9.6 also changed the access method API, replacing dozens of + * pg_am columns with amhandler. Even if a user created an access method + * by "INSERT INTO pg_am", we have no way to translate pre-v9.6 pg_am + * columns to a v9.6+ CREATE ACCESS METHOD. Hence, before v9.6, read + * pg_am just to facilitate findAccessMethodByOid() providing the + * OID-to-name mapping. + */ + appendPQExpBufferStr(query, "SELECT tableoid, oid, amname, "); + if (fout->remoteVersion >= 90600) + appendPQExpBufferStr(query, + "amtype, " + "amhandler::pg_catalog.regproc AS amhandler "); + else + appendPQExpBufferStr(query, + "'i'::pg_catalog.\"char\" AS amtype, " + "'-'::pg_catalog.regproc AS amhandler "); + appendPQExpBufferStr(query, "FROM pg_am"); res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK); @@ -6512,6 +6543,7 @@ getOpclasses(Archive *fout) OpclassInfo *opcinfo; int i_tableoid; int i_oid; + int i_opcmethod; int i_opcname; int i_opcnamespace; int i_opcowner; @@ -6521,7 +6553,7 @@ getOpclasses(Archive *fout) * system-defined opclasses at dump-out time. */ - appendPQExpBufferStr(query, "SELECT tableoid, oid, opcname, " + appendPQExpBufferStr(query, "SELECT tableoid, oid, opcmethod, opcname, " "opcnamespace, " "opcowner " "FROM pg_opclass"); @@ -6534,6 +6566,7 @@ getOpclasses(Archive *fout) i_tableoid = PQfnumber(res, "tableoid"); i_oid = PQfnumber(res, "oid"); + i_opcmethod = PQfnumber(res, "opcmethod"); i_opcname = PQfnumber(res, "opcname"); i_opcnamespace = PQfnumber(res, "opcnamespace"); i_opcowner = PQfnumber(res, "opcowner"); @@ -6547,6 +6580,7 @@ getOpclasses(Archive *fout) opcinfo[i].dobj.name = pg_strdup(PQgetvalue(res, i, i_opcname)); opcinfo[i].dobj.namespace = findNamespace(atooid(PQgetvalue(res, i, i_opcnamespace))); + opcinfo[i].opcmethod = atooid(PQgetvalue(res, i, i_opcmethod)); opcinfo[i].rolname = getRoleName(PQgetvalue(res, i, i_opcowner)); /* Decide whether we want to dump it */ @@ -6572,6 +6606,7 @@ getOpfamilies(Archive *fout) OpfamilyInfo *opfinfo; int i_tableoid; int i_oid; + int i_opfmethod; int i_opfname; int i_opfnamespace; int i_opfowner; @@ -6583,7 +6618,7 @@ getOpfamilies(Archive *fout) * system-defined opfamilies at dump-out time. */ - appendPQExpBufferStr(query, "SELECT tableoid, oid, opfname, " + appendPQExpBufferStr(query, "SELECT tableoid, oid, opfmethod, opfname, " "opfnamespace, " "opfowner " "FROM pg_opfamily"); @@ -6597,6 +6632,7 @@ getOpfamilies(Archive *fout) i_tableoid = PQfnumber(res, "tableoid"); i_oid = PQfnumber(res, "oid"); i_opfname = PQfnumber(res, "opfname"); + i_opfmethod = PQfnumber(res, "opfmethod"); i_opfnamespace = PQfnumber(res, "opfnamespace"); i_opfowner = PQfnumber(res, "opfowner"); @@ -6609,6 +6645,7 @@ getOpfamilies(Archive *fout) opfinfo[i].dobj.name = pg_strdup(PQgetvalue(res, i, i_opfname)); opfinfo[i].dobj.namespace = findNamespace(atooid(PQgetvalue(res, i, i_opfnamespace))); + opfinfo[i].opfmethod = atooid(PQgetvalue(res, i, i_opfmethod)); opfinfo[i].rolname = getRoleName(PQgetvalue(res, i, i_opfowner)); /* Decide whether we want to dump it */ diff --git a/src/bin/pg_dump/pg_dump.h b/src/bin/pg_dump/pg_dump.h index 93a4475d51b80..dde85ed156cc8 100644 --- a/src/bin/pg_dump/pg_dump.h +++ b/src/bin/pg_dump/pg_dump.h @@ -260,6 +260,8 @@ typedef struct _oprInfo DumpableObject dobj; const char *rolname; char oprkind; + Oid oprleft; + Oid oprright; Oid oprcode; } OprInfo; @@ -273,12 +275,14 @@ typedef struct _accessMethodInfo typedef struct _opclassInfo { DumpableObject dobj; + Oid opcmethod; const char *rolname; } OpclassInfo; typedef struct _opfamilyInfo { DumpableObject dobj; + Oid opfmethod; const char *rolname; } OpfamilyInfo; @@ -286,6 +290,7 @@ typedef struct _collInfo { DumpableObject dobj; const char *rolname; + int collencoding; } CollInfo; typedef struct _convInfo @@ -760,6 +765,7 @@ extern TableInfo *findTableByOid(Oid oid); extern TypeInfo *findTypeByOid(Oid oid); extern FuncInfo *findFuncByOid(Oid oid); extern OprInfo *findOprByOid(Oid oid); +extern AccessMethodInfo *findAccessMethodByOid(Oid oid); extern CollInfo *findCollationByOid(Oid oid); extern NamespaceInfo *findNamespaceByOid(Oid oid); extern ExtensionInfo *findExtensionByOid(Oid oid); diff --git a/src/bin/pg_dump/pg_dump_sort.c b/src/bin/pg_dump/pg_dump_sort.c index f99a0797ea7fb..a02da3e9652c1 100644 --- a/src/bin/pg_dump/pg_dump_sort.c +++ b/src/bin/pg_dump/pg_dump_sort.c @@ -162,6 +162,8 @@ static DumpId postDataBoundId; static int DOTypeNameCompare(const void *p1, const void *p2); +static int pgTypeNameCompare(Oid typid1, Oid typid2); +static int accessMethodNameCompare(Oid am1, Oid am2); static bool TopoSort(DumpableObject **objs, int numObjs, DumpableObject **ordering, @@ -228,12 +230,39 @@ DOTypeNameCompare(const void *p1, const void *p2) else if (obj2->namespace) return 1; - /* Sort by name */ + /* + * Sort by name. With a few exceptions, names here are single catalog + * columns. To get a fuller picture, grep pg_dump.c for "dobj.name = ". + * Names here don't match "Name:" in plain format output, which is a + * _tocEntry.tag. For example, DumpableObject.name of a constraint is + * pg_constraint.conname, but _tocEntry.tag of a constraint is relname and + * conname joined with a space. + */ cmpval = strcmp(obj1->name, obj2->name); if (cmpval != 0) return cmpval; - /* To have a stable sort order, break ties for some object types */ + /* + * Sort by type. This helps types that share a type priority without + * sharing a unique name constraint, e.g. opclass and opfamily. + */ + cmpval = obj1->objType - obj2->objType; + if (cmpval != 0) + return cmpval; + + /* + * To have a stable sort order, break ties for some object types. Most + * catalogs have a natural key, e.g. pg_proc_proname_args_nsp_index. Where + * the above "namespace" and "name" comparisons don't cover all natural + * key columns, compare the rest here. + * + * The natural key usually refers to other catalogs by surrogate keys. + * Hence, this translates each of those references to the natural key of + * the referenced catalog. That may descend through multiple levels of + * catalog references. For example, to sort by pg_proc.proargtypes, + * descend to each pg_type and then further to its pg_namespace, for an + * overall sort by (nspname, typname). + */ if (obj1->objType == DO_FUNC || obj1->objType == DO_AGG) { FuncInfo *fobj1 = *(FuncInfo *const *) p1; @@ -246,22 +275,10 @@ DOTypeNameCompare(const void *p1, const void *p2) return cmpval; for (i = 0; i < fobj1->nargs; i++) { - TypeInfo *argtype1 = findTypeByOid(fobj1->argtypes[i]); - TypeInfo *argtype2 = findTypeByOid(fobj2->argtypes[i]); - - if (argtype1 && argtype2) - { - if (argtype1->dobj.namespace && argtype2->dobj.namespace) - { - cmpval = strcmp(argtype1->dobj.namespace->dobj.name, - argtype2->dobj.namespace->dobj.name); - if (cmpval != 0) - return cmpval; - } - cmpval = strcmp(argtype1->dobj.name, argtype2->dobj.name); - if (cmpval != 0) - return cmpval; - } + cmpval = pgTypeNameCompare(fobj1->argtypes[i], + fobj2->argtypes[i]); + if (cmpval != 0) + return cmpval; } } else if (obj1->objType == DO_OPERATOR) @@ -273,6 +290,57 @@ DOTypeNameCompare(const void *p1, const void *p2) cmpval = (oobj2->oprkind - oobj1->oprkind); if (cmpval != 0) return cmpval; + /* Within an oprkind, sort by argument type names */ + cmpval = pgTypeNameCompare(oobj1->oprleft, oobj2->oprleft); + if (cmpval != 0) + return cmpval; + cmpval = pgTypeNameCompare(oobj1->oprright, oobj2->oprright); + if (cmpval != 0) + return cmpval; + } + else if (obj1->objType == DO_OPCLASS) + { + OpclassInfo *opcobj1 = *(OpclassInfo *const *) p1; + OpclassInfo *opcobj2 = *(OpclassInfo *const *) p2; + + /* Sort by access method name, per pg_opclass_am_name_nsp_index */ + cmpval = accessMethodNameCompare(opcobj1->opcmethod, + opcobj2->opcmethod); + if (cmpval != 0) + return cmpval; + } + else if (obj1->objType == DO_OPFAMILY) + { + OpfamilyInfo *opfobj1 = *(OpfamilyInfo *const *) p1; + OpfamilyInfo *opfobj2 = *(OpfamilyInfo *const *) p2; + + /* Sort by access method name, per pg_opfamily_am_name_nsp_index */ + cmpval = accessMethodNameCompare(opfobj1->opfmethod, + opfobj2->opfmethod); + if (cmpval != 0) + return cmpval; + } + else if (obj1->objType == DO_COLLATION) + { + CollInfo *cobj1 = *(CollInfo *const *) p1; + CollInfo *cobj2 = *(CollInfo *const *) p2; + + /* + * Sort by encoding, per pg_collation_name_enc_nsp_index. Technically, + * this is not necessary, because wherever this changes dump order, + * restoring the dump fails anyway. CREATE COLLATION can't create a + * tie for this to break, because it imposes restrictions to make + * (nspname, collname) uniquely identify a collation within a given + * DatabaseEncoding. While pg_import_system_collations() can create a + * tie, pg_dump+restore fails after + * pg_import_system_collations('my_schema') does so. However, there's + * little to gain by ignoring one natural key column on the basis of + * those limitations elsewhere, so respect the full natural key like + * we do for other object types. + */ + cmpval = cobj1->collencoding - cobj2->collencoding; + if (cmpval != 0) + return cmpval; } else if (obj1->objType == DO_ATTRDEF) { @@ -317,11 +385,143 @@ DOTypeNameCompare(const void *p1, const void *p2) if (cmpval != 0) return cmpval; } + else if (obj1->objType == DO_CONSTRAINT) + { + ConstraintInfo *robj1 = *(ConstraintInfo *const *) p1; + ConstraintInfo *robj2 = *(ConstraintInfo *const *) p2; - /* Usually shouldn't get here, but if we do, sort by OID */ + /* + * Sort domain constraints before table constraints, for consistency + * with our decision to sort CREATE DOMAIN before CREATE TABLE. + */ + if (robj1->condomain) + { + if (robj2->condomain) + { + /* Sort by domain name (domain namespace was considered) */ + cmpval = strcmp(robj1->condomain->dobj.name, + robj2->condomain->dobj.name); + if (cmpval != 0) + return cmpval; + } + else + return PRIO_TYPE - PRIO_TABLE; + } + else if (robj2->condomain) + return PRIO_TABLE - PRIO_TYPE; + else + { + /* Sort by table name (table namespace was considered already) */ + cmpval = strcmp(robj1->contable->dobj.name, + robj2->contable->dobj.name); + if (cmpval != 0) + return cmpval; + } + } + else if (obj1->objType == DO_PUBLICATION_REL) + { + PublicationRelInfo *probj1 = *(PublicationRelInfo *const *) p1; + PublicationRelInfo *probj2 = *(PublicationRelInfo *const *) p2; + + /* Sort by publication name, since (namespace, name) match the rel */ + cmpval = strcmp(probj1->publication->dobj.name, + probj2->publication->dobj.name); + if (cmpval != 0) + return cmpval; + } + else if (obj1->objType == DO_PUBLICATION_TABLE_IN_SCHEMA) + { + PublicationSchemaInfo *psobj1 = *(PublicationSchemaInfo *const *) p1; + PublicationSchemaInfo *psobj2 = *(PublicationSchemaInfo *const *) p2; + + /* Sort by publication name, since ->name is just nspname */ + cmpval = strcmp(psobj1->publication->dobj.name, + psobj2->publication->dobj.name); + if (cmpval != 0) + return cmpval; + } + + /* + * Shouldn't get here except after catalog corruption, but if we do, sort + * by OID. This may make logically-identical databases differ in the + * order of objects in dump output. Users will get spurious schema diffs. + * Expect flaky failures of 002_pg_upgrade.pl test 'dump outputs from + * original and restored regression databases match' if the regression + * database contains objects allowing that test to reach here. That's a + * consequence of the test using "pg_restore -j", which doesn't fully + * constrain OID assignment order. + */ + Assert(false); return oidcmp(obj1->catId.oid, obj2->catId.oid); } +/* Compare two OID-identified pg_type values by nspname, then by typname. */ +static int +pgTypeNameCompare(Oid typid1, Oid typid2) +{ + TypeInfo *typobj1; + TypeInfo *typobj2; + int cmpval; + + if (typid1 == typid2) + return 0; + + typobj1 = findTypeByOid(typid1); + typobj2 = findTypeByOid(typid2); + + if (!typobj1 || !typobj2) + { + /* + * getTypes() didn't find some OID. Assume catalog corruption, e.g. + * an oprright value without the corresponding OID in a pg_type row. + * Report as "equal", so the caller uses the next available basis for + * comparison, e.g. the next function argument. + * + * Unary operators have InvalidOid in oprleft (if oprkind='r') or in + * oprright (if oprkind='l'). Caller already sorted by oprkind, + * calling us only for like-kind operators. Hence, "typid1 == typid2" + * took care of InvalidOid. (v14 removed postfix operator support. + * Hence, when dumping from v14+, only oprleft can be InvalidOid.) + */ + Assert(false); + return 0; + } + + if (!typobj1->dobj.namespace || !typobj2->dobj.namespace) + Assert(false); /* catalog corruption */ + else + { + cmpval = strcmp(typobj1->dobj.namespace->dobj.name, + typobj2->dobj.namespace->dobj.name); + if (cmpval != 0) + return cmpval; + } + return strcmp(typobj1->dobj.name, typobj2->dobj.name); +} + +/* Compare two OID-identified pg_am values by amname. */ +static int +accessMethodNameCompare(Oid am1, Oid am2) +{ + AccessMethodInfo *amobj1; + AccessMethodInfo *amobj2; + + if (am1 == am2) + return 0; + + amobj1 = findAccessMethodByOid(am1); + amobj2 = findAccessMethodByOid(am2); + + if (!amobj1 || !amobj2) + { + /* catalog corruption: handle like pgTypeNameCompare() does */ + Assert(false); + return 0; + } + + return strcmp(amobj1->dobj.name, amobj2->dobj.name); +} + /* * Sort the given objects into a safe dump order using dependency diff --git a/src/test/regress/expected/publication.out b/src/test/regress/expected/publication.out index 3a2eacd793f70..1ec3fa34a2d5a 100644 --- a/src/test/regress/expected/publication.out +++ b/src/test/regress/expected/publication.out @@ -1934,3 +1934,24 @@ RESET client_min_messages; RESET SESSION AUTHORIZATION; DROP ROLE regress_publication_user, regress_publication_user2; DROP ROLE regress_publication_user_dummy; +-- stage objects for pg_dump tests +CREATE SCHEMA pubme CREATE TABLE t0 (c int, d int) CREATE TABLE t1 (c int); +CREATE SCHEMA pubme2 CREATE TABLE t0 (c int, d int); +SET client_min_messages = 'ERROR'; +CREATE PUBLICATION dump_pub_qual_1ct FOR + TABLE ONLY pubme.t0 (c, d) WHERE (c > 0); +CREATE PUBLICATION dump_pub_qual_2ct FOR + TABLE ONLY pubme.t0 (c) WHERE (c > 0), + TABLE ONLY pubme.t1 (c); +CREATE PUBLICATION dump_pub_nsp_1ct FOR + TABLES IN SCHEMA pubme; +CREATE PUBLICATION dump_pub_nsp_2ct FOR + TABLES IN SCHEMA pubme, + TABLES IN SCHEMA pubme2; +CREATE PUBLICATION dump_pub_all FOR + TABLE ONLY pubme.t0, + TABLE ONLY pubme.t1 WHERE (c < 0), + TABLES IN SCHEMA pubme, + TABLES IN SCHEMA pubme2 + WITH (publish_via_partition_root = true); +RESET client_min_messages; diff --git a/src/test/regress/sql/publication.sql b/src/test/regress/sql/publication.sql index c9e309190dfa6..2585f08318150 100644 --- a/src/test/regress/sql/publication.sql +++ b/src/test/regress/sql/publication.sql @@ -1229,3 +1229,25 @@ RESET client_min_messages; RESET SESSION AUTHORIZATION; DROP ROLE regress_publication_user, regress_publication_user2; DROP ROLE regress_publication_user_dummy; + +-- stage objects for pg_dump tests +CREATE SCHEMA pubme CREATE TABLE t0 (c int, d int) CREATE TABLE t1 (c int); +CREATE SCHEMA pubme2 CREATE TABLE t0 (c int, d int); +SET client_min_messages = 'ERROR'; +CREATE PUBLICATION dump_pub_qual_1ct FOR + TABLE ONLY pubme.t0 (c, d) WHERE (c > 0); +CREATE PUBLICATION dump_pub_qual_2ct FOR + TABLE ONLY pubme.t0 (c) WHERE (c > 0), + TABLE ONLY pubme.t1 (c); +CREATE PUBLICATION dump_pub_nsp_1ct FOR + TABLES IN SCHEMA pubme; +CREATE PUBLICATION dump_pub_nsp_2ct FOR + TABLES IN SCHEMA pubme, + TABLES IN SCHEMA pubme2; +CREATE PUBLICATION dump_pub_all FOR + TABLE ONLY pubme.t0, + TABLE ONLY pubme.t1 WHERE (c < 0), + TABLES IN SCHEMA pubme, + TABLES IN SCHEMA pubme2 + WITH (publish_via_partition_root = true); +RESET client_min_messages; From dbf5a83d4650fc893838a2f92306b3d6439f55ba Mon Sep 17 00:00:00 2001 From: Tomas Vondra Date: Thu, 31 Jul 2025 15:15:44 +0200 Subject: [PATCH 404/602] Schema-qualify unnest() in ALTER DATABASE ... RESET MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 9df8727c5067 failed to schema-quality the unnest() call in the query used to list the variables in ALTER DATABASE ... RESET. If there's another unnest() function in the search_path, this could cause either failures, or even security issues (when the tab-completion gets used by privileged accounts). Report and fix by Dagfinn Ilmari Mannsåker. Backpatch to 18, same as 9df8727c5067. Author: Dagfinn Ilmari Mannsåker Reviewed-by: jian he Discussion: https://postgr.es/m/87qzyghw2x.fsf%40wibble.ilmari.org Discussion: https://postgr.es/m/87tt4lumqz.fsf%40wibble.ilmari.org Backpatch-through: 18 --- src/bin/psql/tab-complete.in.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bin/psql/tab-complete.in.c b/src/bin/psql/tab-complete.in.c index dbc586c5bc370..3c50847f958cd 100644 --- a/src/bin/psql/tab-complete.in.c +++ b/src/bin/psql/tab-complete.in.c @@ -1010,7 +1010,7 @@ static const SchemaQuery Query_for_trigger_of_table = { #define Query_for_list_of_database_vars \ "SELECT conf FROM ("\ -" SELECT setdatabase, pg_catalog.split_part(unnest(setconfig),'=',1) conf"\ +" SELECT setdatabase, pg_catalog.split_part(pg_catalog.unnest(setconfig),'=',1) conf"\ " FROM pg_db_role_setting "\ " ) s, pg_database d "\ " WHERE s.setdatabase = d.oid "\ From ca09ef3a6aa69a1250bc83e6d9517f28a2ff181c Mon Sep 17 00:00:00 2001 From: Tomas Vondra Date: Thu, 31 Jul 2025 15:17:26 +0200 Subject: [PATCH 405/602] Fix tab completion for ALTER ROLE|USER ... RESET MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit c407d5426b87 added tab completion for ALTER ROLE|USER ... RESET, with the intent to offer only the variables actually set on the role. But as soon as the user started typing something, it would start to offer all possible matching variables. Fix this the same way ALTER DATABASE ... RESET does it, i.e. by properly considering the prefix. A second issue causing similar symptoms (offering variables that are not actually set for a role) was caused by a match to another pattern. The ALTER DATABASE ... RESET was already excluded, so do the same thing for ROLE/USER. Report and fix by Dagfinn Ilmari Mannsåker. Backpatch to 18, same as c407d5426b87. Author: Dagfinn Ilmari Mannsåker Reviewed-by: jian he Discussion: https://postgr.es/m/87qzyghw2x.fsf%40wibble.ilmari.org Discussion: https://postgr.es/m/87tt4lumqz.fsf%40wibble.ilmari.org Backpatch-through: 18 --- src/bin/psql/tab-complete.in.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/bin/psql/tab-complete.in.c b/src/bin/psql/tab-complete.in.c index 3c50847f958cd..1f2ca946fc500 100644 --- a/src/bin/psql/tab-complete.in.c +++ b/src/bin/psql/tab-complete.in.c @@ -1086,9 +1086,12 @@ Keywords_for_list_of_owner_roles, "PUBLIC" " WHERE usename LIKE '%s'" #define Query_for_list_of_user_vars \ -" SELECT pg_catalog.split_part(pg_catalog.unnest(rolconfig),'=',1) "\ -" FROM pg_catalog.pg_roles "\ -" WHERE rolname LIKE '%s'" +"SELECT conf FROM ("\ +" SELECT rolname, pg_catalog.split_part(pg_catalog.unnest(rolconfig),'=',1) conf"\ +" FROM pg_catalog.pg_roles"\ +" ) s"\ +" WHERE s.conf like '%s' "\ +" AND s.rolname LIKE '%s'" #define Query_for_list_of_access_methods \ " SELECT amname "\ @@ -2517,7 +2520,10 @@ match_previous_words(int pattern_id, /* ALTER USER,ROLE RESET */ else if (Matches("ALTER", "USER|ROLE", MatchAny, "RESET")) + { + set_completion_reference(prev2_wd); COMPLETE_WITH_QUERY_PLUS(Query_for_list_of_user_vars, "ALL"); + } /* ALTER USER,ROLE WITH */ else if (Matches("ALTER", "USER|ROLE", MatchAny, "WITH")) @@ -5015,7 +5021,7 @@ match_previous_words(int pattern_id, /* Complete with a variable name */ else if (TailMatches("SET|RESET") && !TailMatches("UPDATE", MatchAny, "SET") && - !TailMatches("ALTER", "DATABASE", MatchAny, "RESET")) + !TailMatches("ALTER", "DATABASE|USER|ROLE", MatchAny, "RESET")) COMPLETE_WITH_QUERY_VERBATIM_PLUS(Query_for_list_of_set_vars, "CONSTRAINTS", "TRANSACTION", From 2ab2d6f970584b7ca60cfdf6569336903aa88db5 Mon Sep 17 00:00:00 2001 From: Amit Kapila Date: Fri, 1 Aug 2025 07:58:48 +0000 Subject: [PATCH 406/602] Fix a deadlock during ALTER SUBSCRIPTION ... DROP PUBLICATION. A deadlock can occur when the DDL command and the apply worker acquire catalog locks in different orders while dropping replication origins. The issue is rare in PG16 and higher branches because, in most cases, the tablesync worker performs the origin drop in those branches, and its locking sequence does not conflict with DDL operations. This patch ensures consistent lock acquisition to prevent such deadlocks. As per buildfarm. Reported-by: Alexander Lakhin Author: Ajin Cherian Reviewed-by: Hayato Kuroda Reviewed-by: vignesh C Reviewed-by: Amit Kapila Backpatch-through: 14, where it was introduced Discussion: https://postgr.es/m/bab95e12-6cc5-4ebb-80a8-3e41956aa297@gmail.com --- src/backend/catalog/pg_subscription.c | 21 +++++++++++-- src/backend/replication/logical/tablesync.c | 34 ++++++++++++++++++--- src/include/catalog/pg_subscription_rel.h | 2 +- 3 files changed, 49 insertions(+), 8 deletions(-) diff --git a/src/backend/catalog/pg_subscription.c b/src/backend/catalog/pg_subscription.c index 63c2992d19f75..244acf52f3602 100644 --- a/src/backend/catalog/pg_subscription.c +++ b/src/backend/catalog/pg_subscription.c @@ -320,7 +320,7 @@ AddSubscriptionRelState(Oid subid, Oid relid, char state, */ void UpdateSubscriptionRelState(Oid subid, Oid relid, char state, - XLogRecPtr sublsn) + XLogRecPtr sublsn, bool already_locked) { Relation rel; HeapTuple tup; @@ -328,9 +328,24 @@ UpdateSubscriptionRelState(Oid subid, Oid relid, char state, Datum values[Natts_pg_subscription_rel]; bool replaces[Natts_pg_subscription_rel]; - LockSharedObject(SubscriptionRelationId, subid, 0, AccessShareLock); + if (already_locked) + { +#ifdef USE_ASSERT_CHECKING + LOCKTAG tag; - rel = table_open(SubscriptionRelRelationId, RowExclusiveLock); + Assert(CheckRelationOidLockedByMe(SubscriptionRelRelationId, + RowExclusiveLock, true)); + SET_LOCKTAG_OBJECT(tag, InvalidOid, SubscriptionRelationId, subid, 0); + Assert(LockHeldByMe(&tag, AccessShareLock, true)); +#endif + + rel = table_open(SubscriptionRelRelationId, NoLock); + } + else + { + LockSharedObject(SubscriptionRelationId, subid, 0, AccessShareLock); + rel = table_open(SubscriptionRelRelationId, RowExclusiveLock); + } /* Try finding existing mapping. */ tup = SearchSysCacheCopy2(SUBSCRIPTIONRELMAP, diff --git a/src/backend/replication/logical/tablesync.c b/src/backend/replication/logical/tablesync.c index 3fea0a0206ed3..d3356bc84ee0c 100644 --- a/src/backend/replication/logical/tablesync.c +++ b/src/backend/replication/logical/tablesync.c @@ -316,7 +316,8 @@ process_syncing_tables_for_sync(XLogRecPtr current_lsn) UpdateSubscriptionRelState(MyLogicalRepWorker->subid, MyLogicalRepWorker->relid, MyLogicalRepWorker->relstate, - MyLogicalRepWorker->relstate_lsn); + MyLogicalRepWorker->relstate_lsn, + false); /* * End streaming so that LogRepWorkerWalRcvConn can be used to drop @@ -425,6 +426,7 @@ process_syncing_tables_for_apply(XLogRecPtr current_lsn) ListCell *lc; bool started_tx = false; bool should_exit = false; + Relation rel = NULL; Assert(!IsTransactionState()); @@ -492,7 +494,17 @@ process_syncing_tables_for_apply(XLogRecPtr current_lsn) * worker to remove the origin tracking as if there is any * error while dropping we won't restart it to drop the * origin. So passing missing_ok = true. + * + * Lock the subscription and origin in the same order as we + * are doing during DDL commands to avoid deadlocks. See + * AlterSubscription_refresh. */ + LockSharedObject(SubscriptionRelationId, MyLogicalRepWorker->subid, + 0, AccessShareLock); + + if (!rel) + rel = table_open(SubscriptionRelRelationId, RowExclusiveLock); + ReplicationOriginNameForLogicalRep(MyLogicalRepWorker->subid, rstate->relid, originname, @@ -504,7 +516,7 @@ process_syncing_tables_for_apply(XLogRecPtr current_lsn) */ UpdateSubscriptionRelState(MyLogicalRepWorker->subid, rstate->relid, rstate->state, - rstate->lsn); + rstate->lsn, true); } } else @@ -555,7 +567,14 @@ process_syncing_tables_for_apply(XLogRecPtr current_lsn) * This is required to avoid any undetected deadlocks * due to any existing lock as deadlock detector won't * be able to detect the waits on the latch. + * + * Also close any tables prior to the commit. */ + if (rel) + { + table_close(rel, NoLock); + rel = NULL; + } CommitTransactionCommand(); pgstat_report_stat(false); } @@ -623,6 +642,11 @@ process_syncing_tables_for_apply(XLogRecPtr current_lsn) } } + /* Close table if opened */ + if (rel) + table_close(rel, NoLock); + + if (started_tx) { /* @@ -1414,7 +1438,8 @@ LogicalRepSyncTableStart(XLogRecPtr *origin_startpos) UpdateSubscriptionRelState(MyLogicalRepWorker->subid, MyLogicalRepWorker->relid, MyLogicalRepWorker->relstate, - MyLogicalRepWorker->relstate_lsn); + MyLogicalRepWorker->relstate_lsn, + false); CommitTransactionCommand(); pgstat_report_stat(true); @@ -1547,7 +1572,8 @@ LogicalRepSyncTableStart(XLogRecPtr *origin_startpos) UpdateSubscriptionRelState(MyLogicalRepWorker->subid, MyLogicalRepWorker->relid, SUBREL_STATE_FINISHEDCOPY, - MyLogicalRepWorker->relstate_lsn); + MyLogicalRepWorker->relstate_lsn, + false); CommitTransactionCommand(); diff --git a/src/include/catalog/pg_subscription_rel.h b/src/include/catalog/pg_subscription_rel.h index c91797c869c24..f458447a0e5fb 100644 --- a/src/include/catalog/pg_subscription_rel.h +++ b/src/include/catalog/pg_subscription_rel.h @@ -85,7 +85,7 @@ typedef struct SubscriptionRelState extern void AddSubscriptionRelState(Oid subid, Oid relid, char state, XLogRecPtr sublsn, bool retain_lock); extern void UpdateSubscriptionRelState(Oid subid, Oid relid, char state, - XLogRecPtr sublsn); + XLogRecPtr sublsn, bool already_locked); extern char GetSubscriptionRelState(Oid subid, Oid relid, XLogRecPtr *sublsn); extern void RemoveSubscriptionRel(Oid subid, Oid relid); From a4801eb691ed18ca483f3e2b5f313d5610a7c839 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Fri, 1 Aug 2025 18:24:19 +0300 Subject: [PATCH 407/602] libpq: Complain about missing BackendKeyData later with PGgetCancel() PostgreSQL always sends the BackendKeyData message at connection startup, but there are some third party backend implementations out there that don't support cancellation, and don't send the message [1]. While the protocol docs left it up for interpretation if that is valid behavior, libpq in PostgreSQL 17 and below accepted it. It does not seem like the libpq behavior was intentional though, since it did so by sending CancelRequest messages with all zeros to such servers (instead of returning an error or making the cancel a no-op). In version 18 the behavior was changed to return an error when trying to create the cancel object with PGgetCancel() or PGcancelCreate(). This was done without any discussion, as part of supporting different lengths of cancel packets for the new 3.2 version of the protocol. This commit changes the behavior of PGgetCancel() / PGcancel() once more to only return an error when the cancel object is actually used to send a cancellation, instead of when merely creating the object. The reason to do so is that some clients [2] create a cancel object as part of their connection creation logic (thus having the cancel object ready for later when they need it), so if creating the cancel object returns an error, the whole connection attempt fails. By delaying the error, such clients will still be able to connect to the third party backend implementations in question, but when actually trying to cancel a query, the user will be notified that that is not possible for the server that they are connected to. This commit only changes the behavior of the older PGgetCancel() / PQcancel() functions, not the more modern PQcancelCreate() family of functions. I.e. PQcancelCreate() returns a failed connection object (CONNECTION_BAD) if the server didn't send a cancellation key. Unlike the old PQgetCancel() function, we're not aware of any clients in the field that use PQcancelCreate() during connection startup in a way that would prevent connecting to such servers. [1] AWS RDS Proxy is definitely one of them, and CockroachDB might be another. [2] psycopg2 (but not psycopg3). Author: Jelte Fennema-Nio Reviewed-by: Jacob Champion Backpatch-through: 18 Discussion: https://www.postgresql.org/message-id/20250617.101056.1437027795118961504.ishii%40postgresql.org --- doc/src/sgml/protocol.sgml | 5 +++++ src/interfaces/libpq/fe-cancel.c | 28 +++++++++++++++++++++++++++- 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/doc/src/sgml/protocol.sgml b/doc/src/sgml/protocol.sgml index b115884acb346..e56eac8fd0fa0 100644 --- a/doc/src/sgml/protocol.sgml +++ b/doc/src/sgml/protocol.sgml @@ -537,6 +537,11 @@ The frontend should not respond to this message, but should continue listening for a ReadyForQuery message. + + The PostgreSQL server will always send this + message, but some third party backend implementations of the protocol + that don't support query cancellation are known not to. + diff --git a/src/interfaces/libpq/fe-cancel.c b/src/interfaces/libpq/fe-cancel.c index 65517c5703bca..c872a0267f089 100644 --- a/src/interfaces/libpq/fe-cancel.c +++ b/src/interfaces/libpq/fe-cancel.c @@ -379,7 +379,24 @@ PQgetCancel(PGconn *conn) /* Check that we have received a cancellation key */ if (conn->be_cancel_key_len == 0) - return NULL; + { + /* + * In case there is no cancel key, return an all-zero PGcancel object. + * Actually calling PQcancel on this will fail, but we allow creating + * the PGcancel object anyway. Arguably it would be better return NULL + * to indicate that cancellation is not possible, but there'd be no + * way for the caller to distinguish "out of memory" from "server did + * not send a cancel key". Also, this is how PGgetCancel() has always + * behaved, and if we changed it, some clients would stop working + * altogether with servers that don't support cancellation. (The + * modern PQcancelCreate() function returns a failed connection object + * instead.) + * + * The returned dummy object has cancel_pkt_len == 0; we check for + * that in PQcancel() to identify it as a dummy. + */ + return calloc(1, sizeof(PGcancel)); + } cancel_req_len = offsetof(CancelRequestPacket, cancelAuthCode) + conn->be_cancel_key_len; cancel = malloc(offsetof(PGcancel, cancel_req) + cancel_req_len); @@ -544,6 +561,15 @@ PQcancel(PGcancel *cancel, char *errbuf, int errbufsize) return false; } + if (cancel->cancel_pkt_len == 0) + { + /* This is a dummy PGcancel object, see PQgetCancel */ + strlcpy(errbuf, "PQcancel() -- no cancellation key received", errbufsize); + /* strlcpy probably doesn't change errno, but be paranoid */ + SOCK_ERRNO_SET(save_errno); + return false; + } + /* * We need to open a temporary connection to the postmaster. Do this with * only kernel calls. From 0ed92cf50cc428dad1732a5e604e5450d47acba3 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Fri, 1 Aug 2025 10:06:57 -0700 Subject: [PATCH 408/602] pg_dump: reject combination of "only" and "with" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Álvaro Herrera Discussion: https://postgr.es/m/8ce896d1a05040905cc1a3afbc04e94d8e95669a.camel@j-davis.com Backpatch-through: 18 --- src/bin/pg_dump/pg_dump.c | 19 ++++++++++++++----- src/bin/pg_dump/pg_restore.c | 19 ++++++++++++++----- src/bin/pg_dump/t/002_pg_dump.pl | 18 +++++++++++------- 3 files changed, 39 insertions(+), 17 deletions(-) diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 273117c977c52..b1ac8d7b50994 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -860,6 +860,17 @@ main(int argc, char **argv) if (with_statistics && no_statistics) pg_fatal("options --with-statistics and --no-statistics cannot be used together"); + /* reject conflicting "-only" and "with-" options */ + if (data_only && (with_schema || with_statistics)) + pg_fatal("options %s and %s cannot be used together", + "-a/--data-only", with_schema ? "--with-schema" : "--with-statistics"); + if (schema_only && (with_data || with_statistics)) + pg_fatal("options %s and %s cannot be used together", + "-s/--schema-only", with_data ? "--with-data" : "--with-statistics"); + if (statistics_only && (with_data || with_schema)) + pg_fatal("options %s and %s cannot be used together", + "--statistics-only", with_data ? "--with-data" : "--with-schema"); + if (schema_only && foreign_servers_include_patterns.head != NULL) pg_fatal("options -s/--schema-only and --include-foreign-data cannot be used together"); @@ -873,11 +884,9 @@ main(int argc, char **argv) pg_fatal("option --if-exists requires option -c/--clean"); /* - * Set derivative flags. An "-only" option may be overridden by an - * explicit "with-" option; e.g. "--schema-only --with-statistics" will - * include schema and statistics. Other ambiguous or nonsensical - * combinations, e.g. "--schema-only --no-schema", will have already - * caused an error in one of the checks above. + * Set derivative flags. Ambiguous or nonsensical combinations, e.g. + * "--schema-only --no-schema", will have already caused an error in one + * of the checks above. */ dopt.dumpData = ((dopt.dumpData && !schema_only && !statistics_only) || (data_only || with_data)) && !no_data; diff --git a/src/bin/pg_dump/pg_restore.c b/src/bin/pg_dump/pg_restore.c index b4e1acdb63fbb..2c727b9f1560b 100644 --- a/src/bin/pg_dump/pg_restore.c +++ b/src/bin/pg_dump/pg_restore.c @@ -381,6 +381,17 @@ main(int argc, char **argv) if (with_statistics && no_statistics) pg_fatal("options --with-statistics and --no-statistics cannot be used together"); + /* reject conflicting "only-" and "with-" options */ + if (data_only && (with_schema || with_statistics)) + pg_fatal("options %s and %s cannot be used together", + "-a/--data-only", with_schema ? "--with-schema" : "--with-statistics"); + if (schema_only && (with_data || with_statistics)) + pg_fatal("options %s and %s cannot be used together", + "-s/--schema-only", with_data ? "--with-data" : "--with-statistics"); + if (statistics_only && (with_data || with_schema)) + pg_fatal("options %s and %s cannot be used together", + "--statistics-only", with_data ? "--with-data" : "--with-schema"); + if (data_only && opts->dropSchema) pg_fatal("options -c/--clean and -a/--data-only cannot be used together"); @@ -399,11 +410,9 @@ main(int argc, char **argv) pg_fatal("cannot specify both --single-transaction and multiple jobs"); /* - * Set derivative flags. An "-only" option may be overridden by an - * explicit "with-" option; e.g. "--schema-only --with-statistics" will - * include schema and statistics. Other ambiguous or nonsensical - * combinations, e.g. "--schema-only --no-schema", will have already - * caused an error in one of the checks above. + * Set derivative flags. Ambiguous or nonsensical combinations, e.g. + * "--schema-only --no-schema", will have already caused an error in one + * of the checks above. */ opts->dumpData = ((opts->dumpData && !schema_only && !statistics_only) || (data_only || with_data)) && !no_data; diff --git a/src/bin/pg_dump/t/002_pg_dump.pl b/src/bin/pg_dump/t/002_pg_dump.pl index 6c7ec80e271ce..d597842908e6d 100644 --- a/src/bin/pg_dump/t/002_pg_dump.pl +++ b/src/bin/pg_dump/t/002_pg_dump.pl @@ -799,13 +799,6 @@ 'postgres', ], }, - schema_only_with_statistics => { - dump_cmd => [ - 'pg_dump', '--no-sync', - "--file=$tempdir/schema_only_with_statistics.sql", - '--schema-only', '--with-statistics', 'postgres', - ], - }, no_schema => { dump_cmd => [ 'pg_dump', '--no-sync', @@ -5207,6 +5200,17 @@ qr/\Qpg_dump: error: no matching schemas were found for pattern\E/, 'no matching schemas'); +command_fails_like( + [ + 'pg_dump', + '--port' => $port, + '--strict-names', + '--schema-only', + '--with-statistics', + ], + qr/\Qpg_dump: error: options -s\/--schema-only and --with-statistics cannot be used together\E/, + 'cannot use --schema-only and --with-statistics together'); + command_fails_like( [ 'pg_dump', From a2c6c4ed3145a411c2591ebd7ca14f30dd98b896 Mon Sep 17 00:00:00 2001 From: Masahiko Sawada Date: Fri, 1 Aug 2025 18:02:41 +0000 Subject: [PATCH 409/602] Fix typo in AutoVacLauncherMain(). Author: Yugo Nagata Discussion: https://postgr.es/m/20250802002027.cd35c481f6c6bae7ca2a3e26@sraoss.co.jp --- src/backend/postmaster/autovacuum.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c index 9474095f271a1..8908603464c5c 100644 --- a/src/backend/postmaster/autovacuum.c +++ b/src/backend/postmaster/autovacuum.c @@ -562,10 +562,10 @@ AutoVacLauncherMain(const void *startup_data, size_t startup_data_len) /* * Create the initial database list. The invariant we want this list to - * keep is that it's ordered by decreasing next_time. As soon as an entry - * is updated to a higher time, it will be moved to the front (which is - * correct because the only operation is to add autovacuum_naptime to the - * entry, and time always increases). + * keep is that it's ordered by decreasing next_worker. As soon as an + * entry is updated to a higher time, it will be moved to the front (which + * is correct because the only operation is to add autovacuum_naptime to + * the entry, and time always increases). */ rebuild_database_list(InvalidOid); From 9eb6068fb64c36889102a09c030d1d9f4d832821 Mon Sep 17 00:00:00 2001 From: Nathan Bossart Date: Fri, 1 Aug 2025 16:52:11 -0500 Subject: [PATCH 410/602] Allow resetting unknown custom GUCs with reserved prefixes. Currently, ALTER DATABASE/ROLE/SYSTEM RESET [ALL] with an unknown custom GUC with a prefix reserved by MarkGUCPrefixReserved() errors (unless a superuser runs a RESET ALL variant). This is problematic for cases such as an extension library upgrade that removes a GUC. To fix, simply make sure the relevant code paths explicitly allow it. Note that we require superuser or privileges on the parameter to reset it. This is perhaps a bit more restrictive than is necessary, but it's not clear whether further relaxing the requirements is safe. Oversight in commit 88103567cb. The ALTER SYSTEM fix is dependent on commit 2d870b4aef, which first appeared in v17. Unfortunately, back-patching that commit would introduce ABI breakage, and while that breakage seems unlikely to bother anyone, it doesn't seem worth the risk. Hence, the ALTER SYSTEM part of this commit is omitted on v15 and v16. Reported-by: Mert Alev Reviewed-by: Laurenz Albe Discussion: https://postgr.es/m/18964-ba09dea8c98fccd6%40postgresql.org Backpatch-through: 15 --- contrib/auto_explain/Makefile | 2 ++ contrib/auto_explain/expected/alter_reset.out | 19 ++++++++++++++++ contrib/auto_explain/meson.build | 5 +++++ contrib/auto_explain/sql/alter_reset.sql | 22 +++++++++++++++++++ src/backend/utils/misc/guc.c | 21 +++++++++++++----- 5 files changed, 64 insertions(+), 5 deletions(-) create mode 100644 contrib/auto_explain/expected/alter_reset.out create mode 100644 contrib/auto_explain/sql/alter_reset.sql diff --git a/contrib/auto_explain/Makefile b/contrib/auto_explain/Makefile index efd127d3cae64..94ab28e7c06b9 100644 --- a/contrib/auto_explain/Makefile +++ b/contrib/auto_explain/Makefile @@ -6,6 +6,8 @@ OBJS = \ auto_explain.o PGFILEDESC = "auto_explain - logging facility for execution plans" +REGRESS = alter_reset + TAP_TESTS = 1 ifdef USE_PGXS diff --git a/contrib/auto_explain/expected/alter_reset.out b/contrib/auto_explain/expected/alter_reset.out new file mode 100644 index 0000000000000..ec355189806ae --- /dev/null +++ b/contrib/auto_explain/expected/alter_reset.out @@ -0,0 +1,19 @@ +-- +-- This tests resetting unknown custom GUCs with reserved prefixes. There's +-- nothing specific to auto_explain; this is just a convenient place to put +-- this test. +-- +SELECT current_database() AS datname \gset +CREATE ROLE regress_ae_role; +ALTER DATABASE :"datname" SET auto_explain.bogus = 1; +ALTER ROLE regress_ae_role SET auto_explain.bogus = 1; +ALTER ROLE regress_ae_role IN DATABASE :"datname" SET auto_explain.bogus = 1; +ALTER SYSTEM SET auto_explain.bogus = 1; +LOAD 'auto_explain'; +WARNING: invalid configuration parameter name "auto_explain.bogus", removing it +DETAIL: "auto_explain" is now a reserved prefix. +ALTER DATABASE :"datname" RESET auto_explain.bogus; +ALTER ROLE regress_ae_role RESET auto_explain.bogus; +ALTER ROLE regress_ae_role IN DATABASE :"datname" RESET auto_explain.bogus; +ALTER SYSTEM RESET auto_explain.bogus; +DROP ROLE regress_ae_role; diff --git a/contrib/auto_explain/meson.build b/contrib/auto_explain/meson.build index 92dc9df6f7cac..a9b45cc235f12 100644 --- a/contrib/auto_explain/meson.build +++ b/contrib/auto_explain/meson.build @@ -20,6 +20,11 @@ tests += { 'name': 'auto_explain', 'sd': meson.current_source_dir(), 'bd': meson.current_build_dir(), + 'regress': { + 'sql': [ + 'alter_reset', + ], + }, 'tap': { 'tests': [ 't/001_auto_explain.pl', diff --git a/contrib/auto_explain/sql/alter_reset.sql b/contrib/auto_explain/sql/alter_reset.sql new file mode 100644 index 0000000000000..bf621454ec24a --- /dev/null +++ b/contrib/auto_explain/sql/alter_reset.sql @@ -0,0 +1,22 @@ +-- +-- This tests resetting unknown custom GUCs with reserved prefixes. There's +-- nothing specific to auto_explain; this is just a convenient place to put +-- this test. +-- + +SELECT current_database() AS datname \gset +CREATE ROLE regress_ae_role; + +ALTER DATABASE :"datname" SET auto_explain.bogus = 1; +ALTER ROLE regress_ae_role SET auto_explain.bogus = 1; +ALTER ROLE regress_ae_role IN DATABASE :"datname" SET auto_explain.bogus = 1; +ALTER SYSTEM SET auto_explain.bogus = 1; + +LOAD 'auto_explain'; + +ALTER DATABASE :"datname" RESET auto_explain.bogus; +ALTER ROLE regress_ae_role RESET auto_explain.bogus; +ALTER ROLE regress_ae_role IN DATABASE :"datname" RESET auto_explain.bogus; +ALTER SYSTEM RESET auto_explain.bogus; + +DROP ROLE regress_ae_role; diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 667df448732f2..ce5449f287853 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -4722,8 +4722,13 @@ AlterSystemSetConfigFile(AlterSystemStmt *altersysstmt) * the config file cannot cause postmaster start to fail, so we * don't have to be too tense about possibly installing a bad * value.) + * + * As an exception, we skip this check if this is a RESET command + * for an unknown custom GUC, else there'd be no way for users to + * remove such settings with reserved prefixes. */ - (void) assignable_custom_variable_name(name, false, ERROR); + if (value || !valid_custom_variable_name(name)) + (void) assignable_custom_variable_name(name, false, ERROR); } /* @@ -6711,6 +6716,7 @@ validate_option_array_item(const char *name, const char *value, { struct config_generic *gconf; + bool reset_custom; /* * There are three cases to consider: @@ -6729,16 +6735,21 @@ validate_option_array_item(const char *name, const char *value, * it's assumed to be fully validated.) * * name is not known and can't be created as a placeholder. Throw error, - * unless skipIfNoPermissions is true, in which case return false. + * unless skipIfNoPermissions or reset_custom is true. If reset_custom is + * true, this is a RESET or RESET ALL operation for an unknown custom GUC + * with a reserved prefix, in which case we want to fall through to the + * placeholder case described in the preceding paragraph (else there'd be + * no way for users to remove them). Otherwise, return false. */ - gconf = find_option(name, true, skipIfNoPermissions, ERROR); - if (!gconf) + reset_custom = (!value && valid_custom_variable_name(name)); + gconf = find_option(name, true, skipIfNoPermissions || reset_custom, ERROR); + if (!gconf && !reset_custom) { /* not known, failed to make a placeholder */ return false; } - if (gconf->flags & GUC_CUSTOM_PLACEHOLDER) + if (!gconf || gconf->flags & GUC_CUSTOM_PLACEHOLDER) { /* * We cannot do any meaningful check on the value, so only permissions From 3b3fa949009393541e552b8ae42cc2b03be25549 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Sat, 2 Aug 2025 17:08:45 +0900 Subject: [PATCH 411/602] Fix use-after-free with INSERT ON CONFLICT changes in reorderbuffer.c In ReorderBufferProcessTXN(), used to send the data of a transaction to an output plugin, INSERT ON CONFLICT changes (INTERNAL_SPEC_INSERT) are delayed until a confirmation record arrives (INTERNAL_SPEC_CONFIRM), updating the change being processed. 8c58624df462 has added an extra step after processing a change to update the progress of the transaction, by calling the callback update_progress_txn() based on the LSN stored in a change after a threshold of CHANGES_THRESHOLD (100) is reached. This logic has missed the fact that for an INSERT ON CONFLICT change the data is freed once processed, hence update_progress_txn() could be called pointing to a LSN value that's already been freed. This could result in random crashes, depending on the workload. Per discussion, this issue is fixed by reusing in update_progress_txn() the LSN from the change processed found at the beginning of the loop, meaning that for a INTERNAL_SPEC_CONFIRM change the progress is updated using the LSN of the INTERNAL_SPEC_CONFIRM change, and not the LSN from its INTERNAL_SPEC_INSERT change. This is actually more correct, as we want to update the progress to point to the INTERNAL_SPEC_CONFIRM change. Masahiko Sawada has found a nice trick to reproduce the issue: hardcode CHANGES_THRESHOLD at 1 and run test_decoding (test "ddl" being enough) on an instance running valgrind. The bug has been analyzed by Ethan Mertz, who also originally suggested the solution used in this patch. Issue introduced by 8c58624df462, so backpatch down to v16. Author: Ethan Mertz Co-authored-by: Michael Paquier Reviewed-by: Amit Kapila Reviewed-by: Masahiko Sawada Discussion: https://postgr.es/m/aIsQqDZ7x4LAQ6u1@paquier.xyz Backpatch-through: 16 --- src/backend/replication/logical/reorderbuffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/backend/replication/logical/reorderbuffer.c b/src/backend/replication/logical/reorderbuffer.c index 5febd154b6bae..34cf05668ae84 100644 --- a/src/backend/replication/logical/reorderbuffer.c +++ b/src/backend/replication/logical/reorderbuffer.c @@ -2599,7 +2599,7 @@ ReorderBufferProcessTXN(ReorderBuffer *rb, ReorderBufferTXN *txn, if (++changes_count >= CHANGES_THRESHOLD) { - rb->update_progress_txn(rb, txn, change->lsn); + rb->update_progress_txn(rb, txn, prev_lsn); changes_count = 0; } } From 37e774458542f2fc34ce3610c5fe39cf7b1d4818 Mon Sep 17 00:00:00 2001 From: Etsuro Fujita Date: Sat, 2 Aug 2025 18:30:00 +0900 Subject: [PATCH 412/602] Doc: clarify the restrictions of AFTER triggers with transition tables. It was not very clear that the triggers are only allowed on plain tables (not foreign tables). Also, rephrase the documentation for better readability. Follow up to commit 9e6104c66. Reported-by: Etsuro Fujita Author: Ashutosh Bapat Reviewed-by: Etsuro Fujita Discussion: https://postgr.es/m/CAPmGK16XBs9ptNr8Lk4f-tJZogf6y-Prz%3D8yhvJbb_4dpsc3mQ%40mail.gmail.com Backpatch-through: 13 --- doc/src/sgml/ref/create_trigger.sgml | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/doc/src/sgml/ref/create_trigger.sgml b/doc/src/sgml/ref/create_trigger.sgml index ed6d206ae7143..0d8d463479bc1 100644 --- a/doc/src/sgml/ref/create_trigger.sgml +++ b/doc/src/sgml/ref/create_trigger.sgml @@ -197,9 +197,11 @@ CREATE [ OR REPLACE ] [ CONSTRAINT ] TRIGGER name of the rows inserted, deleted, or modified by the current SQL statement. This feature lets the trigger see a global view of what the statement did, not just one row at a time. This option is only allowed for - an AFTER trigger that is not a constraint trigger; also, if - the trigger is an UPDATE trigger, it must not specify - a column_name list. + an AFTER trigger on a plain table (not a foreign table). + The trigger should not be a constraint trigger. Also, if the trigger is + an UPDATE trigger, it must not specify + a column_name list when using + this option. OLD TABLE may only be specified once, and only for a trigger that can fire on UPDATE or DELETE; it creates a transition relation containing the before-images of all rows From 2106fe25a1c025a75effb7fefcbd47c68d4b9914 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Sat, 2 Aug 2025 19:54:23 +0900 Subject: [PATCH 413/602] Fix typo in foreign_key.sql Introduced by eec0040c4bcd. Author: Chao Li Discussion: https://postgr.es/m/CAEoWx2kKMdtWKQiYNuwG2L41YwHA7G3sUsRfD9esPJwZyX1+Eg@mail.gmail.com Backpatch-through: 18 --- src/test/regress/expected/foreign_key.out | 2 +- src/test/regress/sql/foreign_key.sql | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/test/regress/expected/foreign_key.out b/src/test/regress/expected/foreign_key.out index f9bd252444f53..dc541d61adfa5 100644 --- a/src/test/regress/expected/foreign_key.out +++ b/src/test/regress/expected/foreign_key.out @@ -1750,7 +1750,7 @@ Indexes: Referenced by: TABLE "fk_partitioned_fk" CONSTRAINT "fk_partitioned_fk_a_b_fkey" FOREIGN KEY (a, b) REFERENCES fk_notpartitioned_pk(a, b) --- Check the exsting FK trigger +-- Check the existing FK trigger SELECT conname, tgrelid::regclass as tgrel, regexp_replace(tgname, '[0-9]+', 'N') as tgname, tgtype FROM pg_trigger t JOIN pg_constraint c ON (t.tgconstraint = c.oid) WHERE tgrelid IN (SELECT relid FROM pg_partition_tree('fk_partitioned_fk'::regclass) diff --git a/src/test/regress/sql/foreign_key.sql b/src/test/regress/sql/foreign_key.sql index cfcecb4e911ad..39174ad1eb9a0 100644 --- a/src/test/regress/sql/foreign_key.sql +++ b/src/test/regress/sql/foreign_key.sql @@ -1296,7 +1296,7 @@ UPDATE fk_notpartitioned_pk SET b = 2504 WHERE a = 2500; -- check psql behavior \d fk_notpartitioned_pk --- Check the exsting FK trigger +-- Check the existing FK trigger SELECT conname, tgrelid::regclass as tgrel, regexp_replace(tgname, '[0-9]+', 'N') as tgname, tgtype FROM pg_trigger t JOIN pg_constraint c ON (t.tgconstraint = c.oid) WHERE tgrelid IN (SELECT relid FROM pg_partition_tree('fk_partitioned_fk'::regclass) From 6a46089e458f2d700dd3b8c3f6fc782de933529a Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Sat, 2 Aug 2025 07:51:42 -0700 Subject: [PATCH 414/602] Simplify options in pg_dump and pg_restore. Remove redundant options --with-data and --with-schema, and rename --with-statistics to just --statistics. Reviewed-by: Nathan Bossart Reviewed-by: Fujii Masao Discussion: https://postgr.es/m/f379d0aeefe8effe13302a436bc28f549f09e924.camel@j-davis.com Backpatch-through: 18 --- doc/src/sgml/ref/pg_dump.sgml | 38 +++------- doc/src/sgml/ref/pg_dumpall.sgml | 38 +++------- doc/src/sgml/ref/pg_restore.sgml | 40 +++------- src/bin/pg_dump/pg_dump.c | 43 +++-------- src/bin/pg_dump/pg_dumpall.c | 16 +--- src/bin/pg_dump/pg_restore.c | 35 +++------ src/bin/pg_dump/t/002_pg_dump.pl | 124 +++++++++++++++---------------- src/bin/pg_upgrade/dump.c | 2 +- 8 files changed, 118 insertions(+), 218 deletions(-) diff --git a/doc/src/sgml/ref/pg_dump.sgml b/doc/src/sgml/ref/pg_dump.sgml index 2ae084b5fa6fc..0bc7609bdf815 100644 --- a/doc/src/sgml/ref/pg_dump.sgml +++ b/doc/src/sgml/ref/pg_dump.sgml @@ -1354,6 +1354,15 @@ PostgreSQL documentation + + + + + Dump statistics. + + + + @@ -1440,33 +1449,6 @@ PostgreSQL documentation - - - - - Dump data. This is the default. - - - - - - - - - Dump schema (data definitions). This is the default. - - - - - - - - - Dump statistics. - - - - @@ -1682,7 +1664,7 @@ CREATE DATABASE foo WITH TEMPLATE template0; - If is specified, + If is specified, pg_dump will include most optimizer statistics in the resulting dump file. However, some statistics may not be included, such as those created explicitly with or diff --git a/doc/src/sgml/ref/pg_dumpall.sgml b/doc/src/sgml/ref/pg_dumpall.sgml index f4cbc8288e3ad..364442f00f28e 100644 --- a/doc/src/sgml/ref/pg_dumpall.sgml +++ b/doc/src/sgml/ref/pg_dumpall.sgml @@ -605,6 +605,15 @@ exclude database PATTERN + + + + + Dump statistics. + + + + @@ -640,33 +649,6 @@ exclude database PATTERN - - - - - Dump data. This is the default. - - - - - - - - - Dump schema (data definitions). This is the default. - - - - - - - - - Dump statistics. - - - - @@ -878,7 +860,7 @@ exclude database PATTERN - If is specified, + If is specified, pg_dumpall will include most optimizer statistics in the resulting dump file. However, some statistics may not be included, such as those created explicitly with or diff --git a/doc/src/sgml/ref/pg_restore.sgml b/doc/src/sgml/ref/pg_restore.sgml index 2abe05d47e936..261ead1503955 100644 --- a/doc/src/sgml/ref/pg_restore.sgml +++ b/doc/src/sgml/ref/pg_restore.sgml @@ -815,6 +815,16 @@ PostgreSQL documentation + + + + + Output commands to restore statistics, if the archive contains them. + This is the default. + + + + @@ -873,36 +883,6 @@ PostgreSQL documentation - - - - - Output commands to restore data, if the archive contains them. - This is the default. - - - - - - - - - Output commands to restore schema (data definitions), if the archive - contains them. This is the default. - - - - - - - - - Output commands to restore statistics, if the archive contains them. - This is the default. - - - - diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index b1ac8d7b50994..f3a353a61a58e 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -449,8 +449,6 @@ main(int argc, char **argv) bool data_only = false; bool schema_only = false; bool statistics_only = false; - bool with_data = false; - bool with_schema = false; bool with_statistics = false; bool no_data = false; bool no_schema = false; @@ -514,6 +512,7 @@ main(int argc, char **argv) {"section", required_argument, NULL, 5}, {"serializable-deferrable", no_argument, &dopt.serializable_deferrable, 1}, {"snapshot", required_argument, NULL, 6}, + {"statistics", no_argument, NULL, 22}, {"statistics-only", no_argument, NULL, 18}, {"strict-names", no_argument, &strict_names, 1}, {"use-set-session-authorization", no_argument, &dopt.use_setsessauth, 1}, @@ -528,9 +527,6 @@ main(int argc, char **argv) {"no-toast-compression", no_argument, &dopt.no_toast_compression, 1}, {"no-unlogged-table-data", no_argument, &dopt.no_unlogged_table_data, 1}, {"no-sync", no_argument, NULL, 7}, - {"with-data", no_argument, NULL, 22}, - {"with-schema", no_argument, NULL, 23}, - {"with-statistics", no_argument, NULL, 24}, {"on-conflict-do-nothing", no_argument, &dopt.do_nothing, 1}, {"rows-per-insert", required_argument, NULL, 10}, {"include-foreign-data", required_argument, NULL, 11}, @@ -798,14 +794,6 @@ main(int argc, char **argv) break; case 22: - with_data = true; - break; - - case 23: - with_schema = true; - break; - - case 24: with_statistics = true; break; @@ -852,24 +840,17 @@ main(int argc, char **argv) if (statistics_only && no_statistics) pg_fatal("options --statistics-only and --no-statistics cannot be used together"); - /* reject conflicting "with-" and "no-" options */ - if (with_data && no_data) - pg_fatal("options --with-data and --no-data cannot be used together"); - if (with_schema && no_schema) - pg_fatal("options --with-schema and --no-schema cannot be used together"); + /* reject conflicting "no-" options */ if (with_statistics && no_statistics) - pg_fatal("options --with-statistics and --no-statistics cannot be used together"); + pg_fatal("options --statistics and --no-statistics cannot be used together"); - /* reject conflicting "-only" and "with-" options */ - if (data_only && (with_schema || with_statistics)) - pg_fatal("options %s and %s cannot be used together", - "-a/--data-only", with_schema ? "--with-schema" : "--with-statistics"); - if (schema_only && (with_data || with_statistics)) + /* reject conflicting "-only" options */ + if (data_only && with_statistics) pg_fatal("options %s and %s cannot be used together", - "-s/--schema-only", with_data ? "--with-data" : "--with-statistics"); - if (statistics_only && (with_data || with_schema)) + "-a/--data-only", "--statistics"); + if (schema_only && with_statistics) pg_fatal("options %s and %s cannot be used together", - "--statistics-only", with_data ? "--with-data" : "--with-schema"); + "-s/--schema-only", "--statistics"); if (schema_only && foreign_servers_include_patterns.head != NULL) pg_fatal("options -s/--schema-only and --include-foreign-data cannot be used together"); @@ -889,9 +870,9 @@ main(int argc, char **argv) * of the checks above. */ dopt.dumpData = ((dopt.dumpData && !schema_only && !statistics_only) || - (data_only || with_data)) && !no_data; + data_only) && !no_data; dopt.dumpSchema = ((dopt.dumpSchema && !data_only && !statistics_only) || - (schema_only || with_schema)) && !no_schema; + schema_only) && !no_schema; dopt.dumpStatistics = ((dopt.dumpStatistics && !schema_only && !data_only) || (statistics_only || with_statistics)) && !no_statistics; @@ -1364,6 +1345,7 @@ help(const char *progname) printf(_(" --sequence-data include sequence data in dump\n")); printf(_(" --serializable-deferrable wait until the dump can run without anomalies\n")); printf(_(" --snapshot=SNAPSHOT use given snapshot for the dump\n")); + printf(_(" --statistics dump the statistics\n")); printf(_(" --statistics-only dump only the statistics, not schema or data\n")); printf(_(" --strict-names require table and/or schema include patterns to\n" " match at least one entity each\n")); @@ -1372,9 +1354,6 @@ help(const char *progname) printf(_(" --use-set-session-authorization\n" " use SET SESSION AUTHORIZATION commands instead of\n" " ALTER OWNER commands to set ownership\n")); - printf(_(" --with-data dump the data\n")); - printf(_(" --with-schema dump the schema\n")); - printf(_(" --with-statistics dump the statistics\n")); printf(_("\nConnection options:\n")); printf(_(" -d, --dbname=DBNAME database to dump\n")); diff --git a/src/bin/pg_dump/pg_dumpall.c b/src/bin/pg_dump/pg_dumpall.c index 87d10df07c411..27aa1b656989c 100644 --- a/src/bin/pg_dump/pg_dumpall.c +++ b/src/bin/pg_dump/pg_dumpall.c @@ -105,8 +105,6 @@ static int no_subscriptions = 0; static int no_toast_compression = 0; static int no_unlogged_table_data = 0; static int no_role_passwords = 0; -static int with_data = 0; -static int with_schema = 0; static int with_statistics = 0; static int server_version; static int load_via_partition_root = 0; @@ -180,11 +178,9 @@ main(int argc, char *argv[]) {"no-sync", no_argument, NULL, 4}, {"no-toast-compression", no_argument, &no_toast_compression, 1}, {"no-unlogged-table-data", no_argument, &no_unlogged_table_data, 1}, - {"with-data", no_argument, &with_data, 1}, - {"with-schema", no_argument, &with_schema, 1}, - {"with-statistics", no_argument, &with_statistics, 1}, {"on-conflict-do-nothing", no_argument, &on_conflict_do_nothing, 1}, {"rows-per-insert", required_argument, NULL, 7}, + {"statistics", no_argument, &with_statistics, 1}, {"statistics-only", no_argument, &statistics_only, 1}, {"filter", required_argument, NULL, 8}, {"sequence-data", no_argument, &sequence_data, 1}, @@ -475,12 +471,8 @@ main(int argc, char *argv[]) appendPQExpBufferStr(pgdumpopts, " --no-toast-compression"); if (no_unlogged_table_data) appendPQExpBufferStr(pgdumpopts, " --no-unlogged-table-data"); - if (with_data) - appendPQExpBufferStr(pgdumpopts, " --with-data"); - if (with_schema) - appendPQExpBufferStr(pgdumpopts, " --with-schema"); if (with_statistics) - appendPQExpBufferStr(pgdumpopts, " --with-statistics"); + appendPQExpBufferStr(pgdumpopts, " --statistics"); if (on_conflict_do_nothing) appendPQExpBufferStr(pgdumpopts, " --on-conflict-do-nothing"); if (statistics_only) @@ -712,13 +704,11 @@ help(void) printf(_(" --quote-all-identifiers quote all identifiers, even if not key words\n")); printf(_(" --rows-per-insert=NROWS number of rows per INSERT; implies --inserts\n")); printf(_(" --sequence-data include sequence data in dump\n")); + printf(_(" --statistics dump the statistics\n")); printf(_(" --statistics-only dump only the statistics, not schema or data\n")); printf(_(" --use-set-session-authorization\n" " use SET SESSION AUTHORIZATION commands instead of\n" " ALTER OWNER commands to set ownership\n")); - printf(_(" --with-data dump the data\n")); - printf(_(" --with-schema dump the schema\n")); - printf(_(" --with-statistics dump the statistics\n")); printf(_("\nConnection options:\n")); printf(_(" -d, --dbname=CONNSTR connect using connection string\n")); diff --git a/src/bin/pg_dump/pg_restore.c b/src/bin/pg_dump/pg_restore.c index 2c727b9f1560b..6c129278bc52b 100644 --- a/src/bin/pg_dump/pg_restore.c +++ b/src/bin/pg_dump/pg_restore.c @@ -82,8 +82,6 @@ main(int argc, char **argv) static int no_subscriptions = 0; static int strict_names = 0; static int statistics_only = 0; - static int with_data = 0; - static int with_schema = 0; static int with_statistics = 0; struct option cmdopts[] = { @@ -139,9 +137,7 @@ main(int argc, char **argv) {"no-security-labels", no_argument, &no_security_labels, 1}, {"no-subscriptions", no_argument, &no_subscriptions, 1}, {"no-statistics", no_argument, &no_statistics, 1}, - {"with-data", no_argument, &with_data, 1}, - {"with-schema", no_argument, &with_schema, 1}, - {"with-statistics", no_argument, &with_statistics, 1}, + {"statistics", no_argument, &with_statistics, 1}, {"statistics-only", no_argument, &statistics_only, 1}, {"filter", required_argument, NULL, 4}, @@ -373,24 +369,17 @@ main(int argc, char **argv) if (statistics_only && no_statistics) pg_fatal("options --statistics-only and --no-statistics cannot be used together"); - /* reject conflicting "with-" and "no-" options */ - if (with_data && no_data) - pg_fatal("options --with-data and --no-data cannot be used together"); - if (with_schema && no_schema) - pg_fatal("options --with-schema and --no-schema cannot be used together"); + /* reject conflicting "no-" options */ if (with_statistics && no_statistics) - pg_fatal("options --with-statistics and --no-statistics cannot be used together"); + pg_fatal("options --statistics and --no-statistics cannot be used together"); - /* reject conflicting "only-" and "with-" options */ - if (data_only && (with_schema || with_statistics)) + /* reject conflicting "only-" options */ + if (data_only && with_statistics) pg_fatal("options %s and %s cannot be used together", - "-a/--data-only", with_schema ? "--with-schema" : "--with-statistics"); - if (schema_only && (with_data || with_statistics)) + "-a/--data-only", "--statistics"); + if (schema_only && with_statistics) pg_fatal("options %s and %s cannot be used together", - "-s/--schema-only", with_data ? "--with-data" : "--with-statistics"); - if (statistics_only && (with_data || with_schema)) - pg_fatal("options %s and %s cannot be used together", - "--statistics-only", with_data ? "--with-data" : "--with-schema"); + "-s/--schema-only", "--statistics"); if (data_only && opts->dropSchema) pg_fatal("options -c/--clean and -a/--data-only cannot be used together"); @@ -415,9 +404,9 @@ main(int argc, char **argv) * of the checks above. */ opts->dumpData = ((opts->dumpData && !schema_only && !statistics_only) || - (data_only || with_data)) && !no_data; + data_only) && !no_data; opts->dumpSchema = ((opts->dumpSchema && !data_only && !statistics_only) || - (schema_only || with_schema)) && !no_schema; + schema_only) && !no_schema; opts->dumpStatistics = ((opts->dumpStatistics && !schema_only && !data_only) || (statistics_only || with_statistics)) && !no_statistics; @@ -558,6 +547,7 @@ usage(const char *progname) printf(_(" --no-table-access-method do not restore table access methods\n")); printf(_(" --no-tablespaces do not restore tablespace assignments\n")); printf(_(" --section=SECTION restore named section (pre-data, data, or post-data)\n")); + printf(_(" --statistics restore the statistics\n")); printf(_(" --statistics-only restore only the statistics, not schema or data\n")); printf(_(" --strict-names require table and/or schema include patterns to\n" " match at least one entity each\n")); @@ -565,9 +555,6 @@ usage(const char *progname) printf(_(" --use-set-session-authorization\n" " use SET SESSION AUTHORIZATION commands instead of\n" " ALTER OWNER commands to set ownership\n")); - printf(_(" --with-data restore the data\n")); - printf(_(" --with-schema restore the schema\n")); - printf(_(" --with-statistics restore the statistics\n")); printf(_("\nConnection options:\n")); printf(_(" -h, --host=HOSTNAME database server host or socket directory\n")); diff --git a/src/bin/pg_dump/t/002_pg_dump.pl b/src/bin/pg_dump/t/002_pg_dump.pl index d597842908e6d..a86b38466de14 100644 --- a/src/bin/pg_dump/t/002_pg_dump.pl +++ b/src/bin/pg_dump/t/002_pg_dump.pl @@ -68,7 +68,7 @@ '--no-data', '--sequence-data', '--binary-upgrade', - '--with-statistics', + '--statistics', '--dbname' => 'postgres', # alternative way to specify database ], restore_cmd => [ @@ -76,7 +76,7 @@ '--format' => 'custom', '--verbose', '--file' => "$tempdir/binary_upgrade.sql", - '--with-statistics', + '--statistics', "$tempdir/binary_upgrade.dump", ], }, @@ -90,13 +90,13 @@ '--format' => 'custom', '--compress' => '1', '--file' => "$tempdir/compression_gzip_custom.dump", - '--with-statistics', + '--statistics', 'postgres', ], restore_cmd => [ 'pg_restore', '--file' => "$tempdir/compression_gzip_custom.sql", - '--with-statistics', + '--statistics', "$tempdir/compression_gzip_custom.dump", ], command_like => { @@ -119,7 +119,7 @@ '--format' => 'directory', '--compress' => 'gzip:1', '--file' => "$tempdir/compression_gzip_dir", - '--with-statistics', + '--statistics', 'postgres', ], # Give coverage for manually compressed blobs.toc files during @@ -137,7 +137,7 @@ 'pg_restore', '--jobs' => '2', '--file' => "$tempdir/compression_gzip_dir.sql", - '--with-statistics', + '--statistics', "$tempdir/compression_gzip_dir", ], }, @@ -150,7 +150,7 @@ '--format' => 'plain', '--compress' => '1', '--file' => "$tempdir/compression_gzip_plain.sql.gz", - '--with-statistics', + '--statistics', 'postgres', ], # Decompress the generated file to run through the tests. @@ -169,13 +169,13 @@ '--format' => 'custom', '--compress' => 'lz4', '--file' => "$tempdir/compression_lz4_custom.dump", - '--with-statistics', + '--statistics', 'postgres', ], restore_cmd => [ 'pg_restore', '--file' => "$tempdir/compression_lz4_custom.sql", - '--with-statistics', + '--statistics', "$tempdir/compression_lz4_custom.dump", ], command_like => { @@ -198,7 +198,7 @@ '--format' => 'directory', '--compress' => 'lz4:1', '--file' => "$tempdir/compression_lz4_dir", - '--with-statistics', + '--statistics', 'postgres', ], # Verify that data files were compressed @@ -210,7 +210,7 @@ 'pg_restore', '--jobs' => '2', '--file' => "$tempdir/compression_lz4_dir.sql", - '--with-statistics', + '--statistics', "$tempdir/compression_lz4_dir", ], }, @@ -223,7 +223,7 @@ '--format' => 'plain', '--compress' => 'lz4', '--file' => "$tempdir/compression_lz4_plain.sql.lz4", - '--with-statistics', + '--statistics', 'postgres', ], # Decompress the generated file to run through the tests. @@ -245,13 +245,13 @@ '--format' => 'custom', '--compress' => 'zstd', '--file' => "$tempdir/compression_zstd_custom.dump", - '--with-statistics', + '--statistics', 'postgres', ], restore_cmd => [ 'pg_restore', '--file' => "$tempdir/compression_zstd_custom.sql", - '--with-statistics', + '--statistics', "$tempdir/compression_zstd_custom.dump", ], command_like => { @@ -273,7 +273,7 @@ '--format' => 'directory', '--compress' => 'zstd:1', '--file' => "$tempdir/compression_zstd_dir", - '--with-statistics', + '--statistics', 'postgres', ], # Give coverage for manually compressed blobs.toc files during @@ -294,7 +294,7 @@ 'pg_restore', '--jobs' => '2', '--file' => "$tempdir/compression_zstd_dir.sql", - '--with-statistics', + '--statistics', "$tempdir/compression_zstd_dir", ], }, @@ -308,7 +308,7 @@ '--format' => 'plain', '--compress' => 'zstd:long', '--file' => "$tempdir/compression_zstd_plain.sql.zst", - '--with-statistics', + '--statistics', 'postgres', ], # Decompress the generated file to run through the tests. @@ -327,7 +327,7 @@ 'pg_dump', '--no-sync', '--file' => "$tempdir/clean.sql", '--clean', - '--with-statistics', + '--statistics', '--dbname' => 'postgres', # alternative way to specify database ], }, @@ -338,7 +338,7 @@ '--clean', '--if-exists', '--encoding' => 'UTF8', # no-op, just for testing - '--with-statistics', + '--statistics', 'postgres', ], }, @@ -357,7 +357,7 @@ '--create', '--no-reconnect', # no-op, just for testing '--verbose', - '--with-statistics', + '--statistics', 'postgres', ], }, @@ -376,7 +376,7 @@ dump_cmd => [ 'pg_dump', '--no-sync', '--file' => "$tempdir/defaults.sql", - '--with-statistics', + '--statistics', 'postgres', ], }, @@ -385,7 +385,7 @@ dump_cmd => [ 'pg_dump', '--no-sync', '--file' => "$tempdir/defaults_no_public.sql", - '--with-statistics', + '--statistics', 'regress_pg_dump_test', ], }, @@ -395,7 +395,7 @@ 'pg_dump', '--no-sync', '--clean', '--file' => "$tempdir/defaults_no_public_clean.sql", - '--with-statistics', + '--statistics', 'regress_pg_dump_test', ], }, @@ -404,7 +404,7 @@ dump_cmd => [ 'pg_dump', '--no-sync', '--file' => "$tempdir/defaults_public_owner.sql", - '--with-statistics', + '--statistics', 'regress_public_owner', ], }, @@ -419,14 +419,14 @@ 'pg_dump', '--format' => 'custom', '--file' => "$tempdir/defaults_custom_format.dump", - '--with-statistics', + '--statistics', 'postgres', ], restore_cmd => [ 'pg_restore', '--format' => 'custom', '--file' => "$tempdir/defaults_custom_format.sql", - '--with-statistics', + '--statistics', "$tempdir/defaults_custom_format.dump", ], command_like => { @@ -451,14 +451,14 @@ 'pg_dump', '--format' => 'directory', '--file' => "$tempdir/defaults_dir_format", - '--with-statistics', + '--statistics', 'postgres', ], restore_cmd => [ 'pg_restore', '--format' => 'directory', '--file' => "$tempdir/defaults_dir_format.sql", - '--with-statistics', + '--statistics', "$tempdir/defaults_dir_format", ], command_like => { @@ -484,13 +484,13 @@ '--format' => 'directory', '--jobs' => 2, '--file' => "$tempdir/defaults_parallel", - '--with-statistics', + '--statistics', 'postgres', ], restore_cmd => [ 'pg_restore', '--file' => "$tempdir/defaults_parallel.sql", - '--with-statistics', + '--statistics', "$tempdir/defaults_parallel", ], }, @@ -502,14 +502,14 @@ 'pg_dump', '--format' => 'tar', '--file' => "$tempdir/defaults_tar_format.tar", - '--with-statistics', + '--statistics', 'postgres', ], restore_cmd => [ 'pg_restore', '--format' => 'tar', '--file' => "$tempdir/defaults_tar_format.sql", - '--with-statistics', + '--statistics', "$tempdir/defaults_tar_format.tar", ], }, @@ -518,7 +518,7 @@ 'pg_dump', '--no-sync', '--file' => "$tempdir/exclude_dump_test_schema.sql", '--exclude-schema' => 'dump_test', - '--with-statistics', + '--statistics', 'postgres', ], }, @@ -527,7 +527,7 @@ 'pg_dump', '--no-sync', '--file' => "$tempdir/exclude_test_table.sql", '--exclude-table' => 'dump_test.test_table', - '--with-statistics', + '--statistics', 'postgres', ], }, @@ -536,7 +536,7 @@ 'pg_dump', '--no-sync', '--file' => "$tempdir/exclude_measurement.sql", '--exclude-table-and-children' => 'dump_test.measurement', - '--with-statistics', + '--statistics', 'postgres', ], }, @@ -546,7 +546,7 @@ '--file' => "$tempdir/exclude_measurement_data.sql", '--exclude-table-data-and-children' => 'dump_test.measurement', '--no-unlogged-table-data', - '--with-statistics', + '--statistics', 'postgres', ], }, @@ -556,7 +556,7 @@ '--file' => "$tempdir/exclude_test_table_data.sql", '--exclude-table-data' => 'dump_test.test_table', '--no-unlogged-table-data', - '--with-statistics', + '--statistics', 'postgres', ], }, @@ -575,7 +575,7 @@ '--file' => "$tempdir/pg_dumpall_globals.sql", '--globals-only', '--no-sync', - '--with-statistics', + '--statistics', ], }, pg_dumpall_globals_clean => { @@ -585,14 +585,14 @@ '--globals-only', '--clean', '--no-sync', - '--with-statistics', + '--statistics', ], }, pg_dumpall_dbprivs => { dump_cmd => [ 'pg_dumpall', '--no-sync', '--file' => "$tempdir/pg_dumpall_dbprivs.sql", - '--with-statistics', + '--statistics', ], }, pg_dumpall_exclude => { @@ -602,7 +602,7 @@ '--file' => "$tempdir/pg_dumpall_exclude.sql", '--exclude-database' => '*dump_test*', '--no-sync', - '--with-statistics', + '--statistics', ], }, no_toast_compression => { @@ -610,7 +610,7 @@ 'pg_dump', '--no-sync', '--file' => "$tempdir/no_toast_compression.sql", '--no-toast-compression', - '--with-statistics', + '--statistics', 'postgres', ], }, @@ -619,7 +619,7 @@ 'pg_dump', '--no-sync', '--file' => "$tempdir/no_large_objects.sql", '--no-large-objects', - '--with-statistics', + '--statistics', 'postgres', ], }, @@ -628,7 +628,7 @@ 'pg_dump', '--no-sync', '--file' => "$tempdir/no_policies.sql", '--no-policies', - '--with-statistics', + '--statistics', 'postgres', ], }, @@ -637,7 +637,7 @@ 'pg_dump', '--no-sync', '--file' => "$tempdir/no_privs.sql", '--no-privileges', - '--with-statistics', + '--statistics', 'postgres', ], }, @@ -646,7 +646,7 @@ 'pg_dump', '--no-sync', '--file' => "$tempdir/no_owner.sql", '--no-owner', - '--with-statistics', + '--statistics', 'postgres', ], }, @@ -655,7 +655,7 @@ 'pg_dump', '--no-sync', '--file' => "$tempdir/no_table_access_method.sql", '--no-table-access-method', - '--with-statistics', + '--statistics', 'postgres', ], }, @@ -664,7 +664,7 @@ 'pg_dump', '--no-sync', '--file' => "$tempdir/only_dump_test_schema.sql", '--schema' => 'dump_test', - '--with-statistics', + '--statistics', 'postgres', ], }, @@ -675,7 +675,7 @@ '--table' => 'dump_test.test_table', '--lock-wait-timeout' => (1000 * $PostgreSQL::Test::Utils::timeout_default), - '--with-statistics', + '--statistics', 'postgres', ], }, @@ -686,7 +686,7 @@ '--table-and-children' => 'dump_test.measurement', '--lock-wait-timeout' => (1000 * $PostgreSQL::Test::Utils::timeout_default), - '--with-statistics', + '--statistics', 'postgres', ], }, @@ -696,7 +696,7 @@ '--file' => "$tempdir/role.sql", '--role' => 'regress_dump_test_role', '--schema' => 'dump_test_second_schema', - '--with-statistics', + '--statistics', 'postgres', ], }, @@ -709,13 +709,13 @@ '--file' => "$tempdir/role_parallel", '--role' => 'regress_dump_test_role', '--schema' => 'dump_test_second_schema', - '--with-statistics', + '--statistics', 'postgres', ], restore_cmd => [ 'pg_restore', '--file' => "$tempdir/role_parallel.sql", - '--with-statistics', + '--statistics', "$tempdir/role_parallel", ], }, @@ -744,7 +744,7 @@ 'pg_dump', '--no-sync', '--file' => "$tempdir/section_pre_data.sql", '--section' => 'pre-data', - '--with-statistics', + '--statistics', 'postgres', ], }, @@ -753,7 +753,7 @@ 'pg_dump', '--no-sync', '--file' => "$tempdir/section_data.sql", '--section' => 'data', - '--with-statistics', + '--statistics', 'postgres', ], }, @@ -762,7 +762,7 @@ 'pg_dump', '--no-sync', '--file' => "$tempdir/section_post_data.sql", '--section' => 'post-data', - '--with-statistics', + '--statistics', 'postgres', ], }, @@ -773,7 +773,7 @@ '--schema' => 'dump_test', '--large-objects', '--no-large-objects', - '--with-statistics', + '--statistics', 'postgres', ], }, @@ -789,7 +789,7 @@ 'pg_dump', '--no-sync', "--file=$tempdir/no_data_no_schema.sql", '--no-data', '--no-schema', 'postgres', - '--with-statistics', + '--statistics', ], }, statistics_only => { @@ -803,7 +803,7 @@ dump_cmd => [ 'pg_dump', '--no-sync', "--file=$tempdir/no_schema.sql", '--no-schema', - '--with-statistics', 'postgres', + '--statistics', 'postgres', ], },); @@ -5206,10 +5206,10 @@ '--port' => $port, '--strict-names', '--schema-only', - '--with-statistics', + '--statistics', ], - qr/\Qpg_dump: error: options -s\/--schema-only and --with-statistics cannot be used together\E/, - 'cannot use --schema-only and --with-statistics together'); + qr/\Qpg_dump: error: options -s\/--schema-only and --statistics cannot be used together\E/, + 'cannot use --schema-only and --statistics together'); command_fails_like( [ diff --git a/src/bin/pg_upgrade/dump.c b/src/bin/pg_upgrade/dump.c index 183f08ce1e86f..55f6e7b4d9c3e 100644 --- a/src/bin/pg_upgrade/dump.c +++ b/src/bin/pg_upgrade/dump.c @@ -58,7 +58,7 @@ generate_old_dump(void) (user_opts.transfer_mode == TRANSFER_MODE_SWAP) ? "" : "--sequence-data", log_opts.verbose ? "--verbose" : "", - user_opts.do_statistics ? "--with-statistics" : "--no-statistics", + user_opts.do_statistics ? "--statistics" : "--no-statistics", log_opts.dumpdir, sql_file_name, escaped_connstr.data); From 12efa48978c6dba5eca1b95758127181783fb217 Mon Sep 17 00:00:00 2001 From: Fujii Masao Date: Sun, 3 Aug 2025 10:49:03 +0900 Subject: [PATCH 415/602] Fix assertion failure in pgbench when handling multiple pipeline sync messages. Previously, when running pgbench in pipeline mode with a custom script that triggered retriable errors (e.g., serialization errors), an assertion failure could occur: Assertion failed: (res == ((void*)0)), function discardUntilSync, file pgbench.c, line 3515. The root cause was that pgbench incorrectly assumed only a single pipeline sync message would be received at the end. In reality, multiple pipeline sync messages can be sent and must be handled properly. This commit fixes the issue by updating pgbench to correctly process multiple pipeline sync messages, preventing the assertion failure. Back-patch to v15, where the bug was introduced. Author: Fujii Masao Reviewed-by: Tatsuo Ishii Discussion: https://postgr.es/m/CAHGQGwFAX56Tfx+1ppo431OSWiLLuW72HaGzZ39NkLkop6bMzQ@mail.gmail.com Backpatch-through: 15 --- src/bin/pgbench/pgbench.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c index 497a936c141f3..125f3c7bbbe5b 100644 --- a/src/bin/pgbench/pgbench.c +++ b/src/bin/pgbench/pgbench.c @@ -3495,6 +3495,8 @@ doRetry(CState *st, pg_time_usec_t *now) static int discardUntilSync(CState *st) { + bool received_sync = false; + /* send a sync */ if (!PQpipelineSync(st->con)) { @@ -3509,10 +3511,21 @@ discardUntilSync(CState *st) PGresult *res = PQgetResult(st->con); if (PQresultStatus(res) == PGRES_PIPELINE_SYNC) + received_sync = true; + else if (received_sync) { - PQclear(res); - res = PQgetResult(st->con); + /* + * PGRES_PIPELINE_SYNC must be followed by another + * PGRES_PIPELINE_SYNC or NULL; otherwise, assert failure. + */ Assert(res == NULL); + + /* + * Reset ongoing sync count to 0 since all PGRES_PIPELINE_SYNC + * results have been discarded. + */ + st->num_syncs = 0; + PQclear(res); break; } PQclear(res); From bb049a79d3447e97c0d4fa220600c423c4474bf9 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sat, 2 Aug 2025 18:26:19 -0400 Subject: [PATCH 416/602] Improve our support for Valgrind's leak tracking. When determining whether an allocated chunk is still reachable, Valgrind will consider only pointers within what it believes to be allocated chunks. Normally, all of a block obtained from malloc() would be considered "allocated" --- but it turns out that if we use VALGRIND_MEMPOOL_ALLOC to designate sub-section(s) of a malloc'ed block as allocated, all the rest of that malloc'ed block is ignored. This leads to lots of false positives of course. In particular, in any multi-malloc-block context, all but the primary block were reported as leaked. We also had a problem with context "ident" strings, which were reported as leaked unless there was some other pointer to them besides the one in the context header. To fix, we need to use VALGRIND_MEMPOOL_ALLOC to designate a context's management structs (the context struct itself and any per-block headers) as allocated chunks. That forces moving the VALGRIND_CREATE_MEMPOOL/VALGRIND_DESTROY_MEMPOOL calls into the per-context-type code, so that the pool identifier can be made as soon as we've allocated the initial block, but otherwise it's fairly straightforward. Note that in Valgrind's eyes there is no distinction between these allocations and the allocations that the mmgr modules hand out to user code. That's fine for now, but perhaps someday we'll want to do better yet. When reading this patch, it's helpful to start with the comments added at the head of mcxt.c. Author: Andres Freund Co-authored-by: Tom Lane Discussion: https://postgr.es/m/285483.1746756246@sss.pgh.pa.us Discussion: https://postgr.es/m/20210317181531.7oggpqevzz6bka3g@alap3.anarazel.de --- src/backend/utils/mmgr/aset.c | 71 +++++++++++++++++++++++++++-- src/backend/utils/mmgr/bump.c | 31 ++++++++++++- src/backend/utils/mmgr/generation.c | 29 ++++++++++++ src/backend/utils/mmgr/mcxt.c | 23 +++++++--- src/backend/utils/mmgr/slab.c | 32 +++++++++++++ src/include/utils/memdebug.h | 1 + 6 files changed, 177 insertions(+), 10 deletions(-) diff --git a/src/backend/utils/mmgr/aset.c b/src/backend/utils/mmgr/aset.c index 666ecd8f78d0e..9ef109ca586bd 100644 --- a/src/backend/utils/mmgr/aset.c +++ b/src/backend/utils/mmgr/aset.c @@ -103,6 +103,8 @@ #define ALLOC_BLOCKHDRSZ MAXALIGN(sizeof(AllocBlockData)) #define ALLOC_CHUNKHDRSZ sizeof(MemoryChunk) +#define FIRST_BLOCKHDRSZ (MAXALIGN(sizeof(AllocSetContext)) + \ + ALLOC_BLOCKHDRSZ) typedef struct AllocBlockData *AllocBlock; /* forward reference */ @@ -458,6 +460,21 @@ AllocSetContextCreateInternal(MemoryContext parent, * we'd leak the header/initial block if we ereport in this stretch. */ + /* Create a vpool associated with the context */ + VALGRIND_CREATE_MEMPOOL(set, 0, false); + + /* + * Create a vchunk covering both the AllocSetContext struct and the keeper + * block's header. (Perhaps it would be more sensible for these to be two + * separate vchunks, but doing that seems to tickle bugs in some versions + * of Valgrind.) We must have these vchunks, and also a vchunk for each + * subsequently-added block header, so that Valgrind considers the + * pointers within them while checking for leaked memory. Note that + * Valgrind doesn't distinguish between these vchunks and those created by + * mcxt.c for the user-accessible-data chunks we allocate. + */ + VALGRIND_MEMPOOL_ALLOC(set, set, FIRST_BLOCKHDRSZ); + /* Fill in the initial block's block header */ block = KeeperBlock(set); block->aset = set; @@ -585,6 +602,14 @@ AllocSetReset(MemoryContext context) #ifdef CLOBBER_FREED_MEMORY wipe_mem(block, block->freeptr - ((char *) block)); #endif + + /* + * We need to free the block header's vchunk explicitly, although + * the user-data vchunks within will go away in the TRIM below. + * Otherwise Valgrind complains about leaked allocations. + */ + VALGRIND_MEMPOOL_FREE(set, block); + free(block); } block = next; @@ -592,6 +617,14 @@ AllocSetReset(MemoryContext context) Assert(context->mem_allocated == keepersize); + /* + * Instruct Valgrind to throw away all the vchunks associated with this + * context, except for the one covering the AllocSetContext and + * keeper-block header. This gets rid of the vchunks for whatever user + * data is getting discarded by the context reset. + */ + VALGRIND_MEMPOOL_TRIM(set, set, FIRST_BLOCKHDRSZ); + /* Reset block size allocation sequence, too */ set->nextBlockSize = set->initBlockSize; } @@ -648,6 +681,9 @@ AllocSetDelete(MemoryContext context) freelist->first_free = (AllocSetContext *) oldset->header.nextchild; freelist->num_free--; + /* Destroy the context's vpool --- see notes below */ + VALGRIND_DESTROY_MEMPOOL(oldset); + /* All that remains is to free the header/initial block */ free(oldset); } @@ -675,13 +711,24 @@ AllocSetDelete(MemoryContext context) #endif if (!IsKeeperBlock(set, block)) + { + /* As in AllocSetReset, free block-header vchunks explicitly */ + VALGRIND_MEMPOOL_FREE(set, block); free(block); + } block = next; } Assert(context->mem_allocated == keepersize); + /* + * Destroy the vpool. We don't seem to need to explicitly free the + * initial block's header vchunk, nor any user-data vchunks that Valgrind + * still knows about; they'll all go away automatically. + */ + VALGRIND_DESTROY_MEMPOOL(set); + /* Finally, free the context header, including the keeper block */ free(set); } @@ -716,6 +763,9 @@ AllocSetAllocLarge(MemoryContext context, Size size, int flags) if (block == NULL) return MemoryContextAllocationFailure(context, size, flags); + /* Make a vchunk covering the new block's header */ + VALGRIND_MEMPOOL_ALLOC(set, block, ALLOC_BLOCKHDRSZ); + context->mem_allocated += blksize; block->aset = set; @@ -922,6 +972,9 @@ AllocSetAllocFromNewBlock(MemoryContext context, Size size, int flags, if (block == NULL) return MemoryContextAllocationFailure(context, size, flags); + /* Make a vchunk covering the new block's header */ + VALGRIND_MEMPOOL_ALLOC(set, block, ALLOC_BLOCKHDRSZ); + context->mem_allocated += blksize; block->aset = set; @@ -1104,6 +1157,10 @@ AllocSetFree(void *pointer) #ifdef CLOBBER_FREED_MEMORY wipe_mem(block, block->freeptr - ((char *) block)); #endif + + /* As in AllocSetReset, free block-header vchunks explicitly */ + VALGRIND_MEMPOOL_FREE(set, block); + free(block); } else @@ -1184,6 +1241,7 @@ AllocSetRealloc(void *pointer, Size size, int flags) * realloc() to make the containing block bigger, or smaller, with * minimum space wastage. */ + AllocBlock newblock; Size chksize; Size blksize; Size oldblksize; @@ -1223,14 +1281,21 @@ AllocSetRealloc(void *pointer, Size size, int flags) blksize = chksize + ALLOC_BLOCKHDRSZ + ALLOC_CHUNKHDRSZ; oldblksize = block->endptr - ((char *) block); - block = (AllocBlock) realloc(block, blksize); - if (block == NULL) + newblock = (AllocBlock) realloc(block, blksize); + if (newblock == NULL) { /* Disallow access to the chunk header. */ VALGRIND_MAKE_MEM_NOACCESS(chunk, ALLOC_CHUNKHDRSZ); return MemoryContextAllocationFailure(&set->header, size, flags); } + /* + * Move the block-header vchunk explicitly. (mcxt.c will take care of + * moving the vchunk for the user data.) + */ + VALGRIND_MEMPOOL_CHANGE(set, block, newblock, ALLOC_BLOCKHDRSZ); + block = newblock; + /* updated separately, not to underflow when (oldblksize > blksize) */ set->header.mem_allocated -= oldblksize; set->header.mem_allocated += blksize; @@ -1294,7 +1359,7 @@ AllocSetRealloc(void *pointer, Size size, int flags) /* Ensure any padding bytes are marked NOACCESS. */ VALGRIND_MAKE_MEM_NOACCESS((char *) pointer + size, chksize - size); - /* Disallow access to the chunk header . */ + /* Disallow access to the chunk header. */ VALGRIND_MAKE_MEM_NOACCESS(chunk, ALLOC_CHUNKHDRSZ); return pointer; diff --git a/src/backend/utils/mmgr/bump.c b/src/backend/utils/mmgr/bump.c index f7a37d1b3e86c..2805d55a2eca9 100644 --- a/src/backend/utils/mmgr/bump.c +++ b/src/backend/utils/mmgr/bump.c @@ -45,7 +45,9 @@ #include "utils/memutils_memorychunk.h" #include "utils/memutils_internal.h" -#define Bump_BLOCKHDRSZ MAXALIGN(sizeof(BumpBlock)) +#define Bump_BLOCKHDRSZ MAXALIGN(sizeof(BumpBlock)) +#define FIRST_BLOCKHDRSZ (MAXALIGN(sizeof(BumpContext)) + \ + Bump_BLOCKHDRSZ) /* No chunk header unless built with MEMORY_CONTEXT_CHECKING */ #ifdef MEMORY_CONTEXT_CHECKING @@ -189,6 +191,12 @@ BumpContextCreate(MemoryContext parent, const char *name, Size minContextSize, * Avoid writing code that can fail between here and MemoryContextCreate; * we'd leak the header and initial block if we ereport in this stretch. */ + + /* See comments about Valgrind interactions in aset.c */ + VALGRIND_CREATE_MEMPOOL(set, 0, false); + /* This vchunk covers the BumpContext and the keeper block header */ + VALGRIND_MEMPOOL_ALLOC(set, set, FIRST_BLOCKHDRSZ); + dlist_init(&set->blocks); /* Fill in the initial block's block header */ @@ -262,6 +270,14 @@ BumpReset(MemoryContext context) BumpBlockFree(set, block); } + /* + * Instruct Valgrind to throw away all the vchunks associated with this + * context, except for the one covering the BumpContext and keeper-block + * header. This gets rid of the vchunks for whatever user data is getting + * discarded by the context reset. + */ + VALGRIND_MEMPOOL_TRIM(set, set, FIRST_BLOCKHDRSZ); + /* Reset block size allocation sequence, too */ set->nextBlockSize = set->initBlockSize; @@ -279,6 +295,10 @@ BumpDelete(MemoryContext context) { /* Reset to release all releasable BumpBlocks */ BumpReset(context); + + /* Destroy the vpool -- see notes in aset.c */ + VALGRIND_DESTROY_MEMPOOL(context); + /* And free the context header and keeper block */ free(context); } @@ -318,6 +338,9 @@ BumpAllocLarge(MemoryContext context, Size size, int flags) if (block == NULL) return MemoryContextAllocationFailure(context, size, flags); + /* Make a vchunk covering the new block's header */ + VALGRIND_MEMPOOL_ALLOC(set, block, Bump_BLOCKHDRSZ); + context->mem_allocated += blksize; /* the block is completely full */ @@ -455,6 +478,9 @@ BumpAllocFromNewBlock(MemoryContext context, Size size, int flags, if (block == NULL) return MemoryContextAllocationFailure(context, size, flags); + /* Make a vchunk covering the new block's header */ + VALGRIND_MEMPOOL_ALLOC(set, block, Bump_BLOCKHDRSZ); + context->mem_allocated += blksize; /* initialize the new block */ @@ -606,6 +632,9 @@ BumpBlockFree(BumpContext *set, BumpBlock *block) wipe_mem(block, ((char *) block->endptr - (char *) block)); #endif + /* As in aset.c, free block-header vchunks explicitly */ + VALGRIND_MEMPOOL_FREE(set, block); + free(block); } diff --git a/src/backend/utils/mmgr/generation.c b/src/backend/utils/mmgr/generation.c index 18679ad4f1e41..cfafc9bf0829d 100644 --- a/src/backend/utils/mmgr/generation.c +++ b/src/backend/utils/mmgr/generation.c @@ -45,6 +45,8 @@ #define Generation_BLOCKHDRSZ MAXALIGN(sizeof(GenerationBlock)) #define Generation_CHUNKHDRSZ sizeof(MemoryChunk) +#define FIRST_BLOCKHDRSZ (MAXALIGN(sizeof(GenerationContext)) + \ + Generation_BLOCKHDRSZ) #define Generation_CHUNK_FRACTION 8 @@ -221,6 +223,12 @@ GenerationContextCreate(MemoryContext parent, * Avoid writing code that can fail between here and MemoryContextCreate; * we'd leak the header if we ereport in this stretch. */ + + /* See comments about Valgrind interactions in aset.c */ + VALGRIND_CREATE_MEMPOOL(set, 0, false); + /* This vchunk covers the GenerationContext and the keeper block header */ + VALGRIND_MEMPOOL_ALLOC(set, set, FIRST_BLOCKHDRSZ); + dlist_init(&set->blocks); /* Fill in the initial block's block header */ @@ -309,6 +317,14 @@ GenerationReset(MemoryContext context) GenerationBlockFree(set, block); } + /* + * Instruct Valgrind to throw away all the vchunks associated with this + * context, except for the one covering the GenerationContext and + * keeper-block header. This gets rid of the vchunks for whatever user + * data is getting discarded by the context reset. + */ + VALGRIND_MEMPOOL_TRIM(set, set, FIRST_BLOCKHDRSZ); + /* set it so new allocations to make use of the keeper block */ set->block = KeeperBlock(set); @@ -329,6 +345,10 @@ GenerationDelete(MemoryContext context) { /* Reset to release all releasable GenerationBlocks */ GenerationReset(context); + + /* Destroy the vpool -- see notes in aset.c */ + VALGRIND_DESTROY_MEMPOOL(context); + /* And free the context header and keeper block */ free(context); } @@ -365,6 +385,9 @@ GenerationAllocLarge(MemoryContext context, Size size, int flags) if (block == NULL) return MemoryContextAllocationFailure(context, size, flags); + /* Make a vchunk covering the new block's header */ + VALGRIND_MEMPOOL_ALLOC(set, block, Generation_BLOCKHDRSZ); + context->mem_allocated += blksize; /* block with a single (used) chunk */ @@ -487,6 +510,9 @@ GenerationAllocFromNewBlock(MemoryContext context, Size size, int flags, if (block == NULL) return MemoryContextAllocationFailure(context, size, flags); + /* Make a vchunk covering the new block's header */ + VALGRIND_MEMPOOL_ALLOC(set, block, Generation_BLOCKHDRSZ); + context->mem_allocated += blksize; /* initialize the new block */ @@ -677,6 +703,9 @@ GenerationBlockFree(GenerationContext *set, GenerationBlock *block) wipe_mem(block, block->blksize); #endif + /* As in aset.c, free block-header vchunks explicitly */ + VALGRIND_MEMPOOL_FREE(set, block); + free(block); } diff --git a/src/backend/utils/mmgr/mcxt.c b/src/backend/utils/mmgr/mcxt.c index ce01dce9861da..073aa6c4fc5b3 100644 --- a/src/backend/utils/mmgr/mcxt.c +++ b/src/backend/utils/mmgr/mcxt.c @@ -8,6 +8,23 @@ * context-type-specific operations via the function pointers in a * context's MemoryContextMethods struct. * + * A note about Valgrind support: when USE_VALGRIND is defined, we provide + * support for memory leak tracking at the allocation-unit level. Valgrind + * does leak detection by tracking allocated "chunks", which can be grouped + * into "pools". The "chunk" terminology is overloaded, since we use that + * word for our allocation units, and it's sometimes important to distinguish + * those from the Valgrind objects that describe them. To reduce confusion, + * let's use the terms "vchunk" and "vpool" for the Valgrind objects. + * + * We use a separate vpool for each memory context. The context-type-specific + * code is responsible for creating and deleting the vpools, and also for + * creating vchunks to cover its management data structures such as block + * headers. (There must be a vchunk that includes every pointer we want + * Valgrind to consider for leak-tracking purposes.) This module creates + * and deletes the vchunks that cover the caller-visible allocated chunks. + * However, the context-type-specific code must handle cleaning up those + * vchunks too during memory context reset operations. + * * * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California @@ -418,8 +435,6 @@ MemoryContextResetOnly(MemoryContext context) context->methods->reset(context); context->isReset = true; - VALGRIND_DESTROY_MEMPOOL(context); - VALGRIND_CREATE_MEMPOOL(context, 0, false); } } @@ -526,8 +541,6 @@ MemoryContextDeleteOnly(MemoryContext context) context->ident = NULL; context->methods->delete_context(context); - - VALGRIND_DESTROY_MEMPOOL(context); } /* @@ -1170,8 +1183,6 @@ MemoryContextCreate(MemoryContext node, node->nextchild = NULL; node->allowInCritSection = false; } - - VALGRIND_CREATE_MEMPOOL(node, 0, false); } /* diff --git a/src/backend/utils/mmgr/slab.c b/src/backend/utils/mmgr/slab.c index d32c0d318fbf4..0e35abcf5a055 100644 --- a/src/backend/utils/mmgr/slab.c +++ b/src/backend/utils/mmgr/slab.c @@ -377,6 +377,11 @@ SlabContextCreate(MemoryContext parent, * we'd leak the header if we ereport in this stretch. */ + /* See comments about Valgrind interactions in aset.c */ + VALGRIND_CREATE_MEMPOOL(slab, 0, false); + /* This vchunk covers the SlabContext only */ + VALGRIND_MEMPOOL_ALLOC(slab, slab, sizeof(SlabContext)); + /* Fill in SlabContext-specific header fields */ slab->chunkSize = (uint32) chunkSize; slab->fullChunkSize = (uint32) fullChunkSize; @@ -451,6 +456,10 @@ SlabReset(MemoryContext context) #ifdef CLOBBER_FREED_MEMORY wipe_mem(block, slab->blockSize); #endif + + /* As in aset.c, free block-header vchunks explicitly */ + VALGRIND_MEMPOOL_FREE(slab, block); + free(block); context->mem_allocated -= slab->blockSize; } @@ -467,11 +476,23 @@ SlabReset(MemoryContext context) #ifdef CLOBBER_FREED_MEMORY wipe_mem(block, slab->blockSize); #endif + + /* As in aset.c, free block-header vchunks explicitly */ + VALGRIND_MEMPOOL_FREE(slab, block); + free(block); context->mem_allocated -= slab->blockSize; } } + /* + * Instruct Valgrind to throw away all the vchunks associated with this + * context, except for the one covering the SlabContext. This gets rid of + * the vchunks for whatever user data is getting discarded by the context + * reset. + */ + VALGRIND_MEMPOOL_TRIM(slab, slab, sizeof(SlabContext)); + slab->curBlocklistIndex = 0; Assert(context->mem_allocated == 0); @@ -486,6 +507,10 @@ SlabDelete(MemoryContext context) { /* Reset to release all the SlabBlocks */ SlabReset(context); + + /* Destroy the vpool -- see notes in aset.c */ + VALGRIND_DESTROY_MEMPOOL(context); + /* And free the context header */ free(context); } @@ -567,6 +592,9 @@ SlabAllocFromNewBlock(MemoryContext context, Size size, int flags) if (unlikely(block == NULL)) return MemoryContextAllocationFailure(context, size, flags); + /* Make a vchunk covering the new block's header */ + VALGRIND_MEMPOOL_ALLOC(slab, block, Slab_BLOCKHDRSZ); + block->slab = slab; context->mem_allocated += slab->blockSize; @@ -795,6 +823,10 @@ SlabFree(void *pointer) #ifdef CLOBBER_FREED_MEMORY wipe_mem(block, slab->blockSize); #endif + + /* As in aset.c, free block-header vchunks explicitly */ + VALGRIND_MEMPOOL_FREE(slab, block); + free(block); slab->header.mem_allocated -= slab->blockSize; } diff --git a/src/include/utils/memdebug.h b/src/include/utils/memdebug.h index 7309271834b9f..80692dcef9382 100644 --- a/src/include/utils/memdebug.h +++ b/src/include/utils/memdebug.h @@ -29,6 +29,7 @@ #define VALGRIND_MEMPOOL_ALLOC(context, addr, size) do {} while (0) #define VALGRIND_MEMPOOL_FREE(context, addr) do {} while (0) #define VALGRIND_MEMPOOL_CHANGE(context, optr, nptr, size) do {} while (0) +#define VALGRIND_MEMPOOL_TRIM(context, addr, size) do {} while (0) #endif From 9e9190154ef204a4e814dcc99f763398f7094667 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sat, 2 Aug 2025 18:31:39 -0400 Subject: [PATCH 417/602] Fix MemoryContextAllocAligned's interaction with Valgrind. Arrange that only the "aligned chunk" part of the allocated space is included in a Valgrind vchunk. This suppresses complaints about that vchunk being possibly lost because PG is retaining only pointers to the aligned chunk. Also make sure that trailing wasted space is marked NOACCESS. As a tiny performance improvement, arrange that MCXT_ALLOC_ZERO zeroes only the returned "aligned chunk", not the wasted padding space. In passing, fix GetLocalBufferStorage to use MemoryContextAllocAligned instead of rolling its own implementation, which was equally broken according to Valgrind. Author: Tom Lane Reviewed-by: Andres Freund Discussion: https://postgr.es/m/285483.1746756246@sss.pgh.pa.us --- src/backend/storage/buffer/localbuf.c | 9 +++-- src/backend/utils/mmgr/alignedalloc.c | 18 +++++++++ src/backend/utils/mmgr/mcxt.c | 54 ++++++++++++++++----------- 3 files changed, 56 insertions(+), 25 deletions(-) diff --git a/src/backend/storage/buffer/localbuf.c b/src/backend/storage/buffer/localbuf.c index 3da9c41ee1d7a..3c0d20f4659d2 100644 --- a/src/backend/storage/buffer/localbuf.c +++ b/src/backend/storage/buffer/localbuf.c @@ -932,10 +932,11 @@ GetLocalBufferStorage(void) num_bufs = Min(num_bufs, MaxAllocSize / BLCKSZ); /* Buffers should be I/O aligned. */ - cur_block = (char *) - TYPEALIGN(PG_IO_ALIGN_SIZE, - MemoryContextAlloc(LocalBufferContext, - num_bufs * BLCKSZ + PG_IO_ALIGN_SIZE)); + cur_block = MemoryContextAllocAligned(LocalBufferContext, + num_bufs * BLCKSZ, + PG_IO_ALIGN_SIZE, + 0); + next_buf_in_block = 0; num_bufs_in_block = num_bufs; } diff --git a/src/backend/utils/mmgr/alignedalloc.c b/src/backend/utils/mmgr/alignedalloc.c index 7eea695de62c5..b1be742691497 100644 --- a/src/backend/utils/mmgr/alignedalloc.c +++ b/src/backend/utils/mmgr/alignedalloc.c @@ -45,6 +45,15 @@ AlignedAllocFree(void *pointer) GetMemoryChunkContext(unaligned)->name, chunk); #endif + /* + * Create a dummy vchunk covering the start of the unaligned chunk, but + * not overlapping the aligned chunk. This will be freed while pfree'ing + * the unaligned chunk, keeping Valgrind happy. Then when we return to + * the outer pfree, that will clean up the vchunk for the aligned chunk. + */ + VALGRIND_MEMPOOL_ALLOC(GetMemoryChunkContext(unaligned), unaligned, + (char *) pointer - (char *) unaligned); + /* Recursively pfree the unaligned chunk */ pfree(unaligned); } @@ -123,6 +132,15 @@ AlignedAllocRealloc(void *pointer, Size size, int flags) VALGRIND_MAKE_MEM_DEFINED(pointer, old_size); memcpy(newptr, pointer, Min(size, old_size)); + /* + * Create a dummy vchunk covering the start of the old unaligned chunk, + * but not overlapping the aligned chunk. This will be freed while + * pfree'ing the old unaligned chunk, keeping Valgrind happy. Then when + * we return to repalloc, it will move the vchunk for the aligned chunk. + */ + VALGRIND_MEMPOOL_ALLOC(ctx, unaligned, + (char *) pointer - (char *) unaligned); + pfree(unaligned); return newptr; diff --git a/src/backend/utils/mmgr/mcxt.c b/src/backend/utils/mmgr/mcxt.c index 073aa6c4fc5b3..47fd774c7d280 100644 --- a/src/backend/utils/mmgr/mcxt.c +++ b/src/backend/utils/mmgr/mcxt.c @@ -1465,7 +1465,13 @@ MemoryContextAllocAligned(MemoryContext context, void *unaligned; void *aligned; - /* wouldn't make much sense to waste that much space */ + /* + * Restrict alignto to ensure that it can fit into the "value" field of + * the redirection MemoryChunk, and that the distance back to the start of + * the unaligned chunk will fit into the space available for that. This + * isn't a limitation in practice, since it wouldn't make much sense to + * waste that much space. + */ Assert(alignto < (128 * 1024 * 1024)); /* ensure alignto is a power of 2 */ @@ -1502,10 +1508,15 @@ MemoryContextAllocAligned(MemoryContext context, alloc_size += 1; #endif - /* perform the actual allocation */ - unaligned = MemoryContextAllocExtended(context, alloc_size, flags); + /* + * Perform the actual allocation, but do not pass down MCXT_ALLOC_ZERO. + * This ensures that wasted bytes beyond the aligned chunk do not become + * DEFINED. + */ + unaligned = MemoryContextAllocExtended(context, alloc_size, + flags & ~MCXT_ALLOC_ZERO); - /* set the aligned pointer */ + /* compute the aligned pointer */ aligned = (void *) TYPEALIGN(alignto, (char *) unaligned + sizeof(MemoryChunk)); @@ -1533,12 +1544,23 @@ MemoryContextAllocAligned(MemoryContext context, set_sentinel(aligned, size); #endif - /* Mark the bytes before the redirection header as noaccess */ - VALGRIND_MAKE_MEM_NOACCESS(unaligned, - (char *) alignedchunk - (char *) unaligned); + /* + * MemoryContextAllocExtended marked the whole unaligned chunk as a + * vchunk. Undo that, instead making just the aligned chunk be a vchunk. + * This prevents Valgrind from complaining that the vchunk is possibly + * leaked, since only pointers to the aligned chunk will exist. + * + * After these calls, the aligned chunk will be marked UNDEFINED, and all + * the rest of the unaligned chunk (the redirection chunk header, the + * padding bytes before it, and any wasted trailing bytes) will be marked + * NOACCESS, which is what we want. + */ + VALGRIND_MEMPOOL_FREE(context, unaligned); + VALGRIND_MEMPOOL_ALLOC(context, aligned, size); - /* Disallow access to the redirection chunk header. */ - VALGRIND_MAKE_MEM_NOACCESS(alignedchunk, sizeof(MemoryChunk)); + /* Now zero (and make DEFINED) just the aligned chunk, if requested */ + if ((flags & MCXT_ALLOC_ZERO) != 0) + MemSetAligned(aligned, 0, size); return aligned; } @@ -1572,16 +1594,12 @@ void pfree(void *pointer) { #ifdef USE_VALGRIND - MemoryContextMethodID method = GetMemoryChunkMethodID(pointer); MemoryContext context = GetMemoryChunkContext(pointer); #endif MCXT_METHOD(pointer, free_p) (pointer); -#ifdef USE_VALGRIND - if (method != MCTX_ALIGNED_REDIRECT_ID) - VALGRIND_MEMPOOL_FREE(context, pointer); -#endif + VALGRIND_MEMPOOL_FREE(context, pointer); } /* @@ -1591,9 +1609,6 @@ pfree(void *pointer) void * repalloc(void *pointer, Size size) { -#ifdef USE_VALGRIND - MemoryContextMethodID method = GetMemoryChunkMethodID(pointer); -#endif #if defined(USE_ASSERT_CHECKING) || defined(USE_VALGRIND) MemoryContext context = GetMemoryChunkContext(pointer); #endif @@ -1616,10 +1631,7 @@ repalloc(void *pointer, Size size) */ ret = MCXT_METHOD(pointer, realloc) (pointer, size, 0); -#ifdef USE_VALGRIND - if (method != MCTX_ALIGNED_REDIRECT_ID) - VALGRIND_MEMPOOL_CHANGE(context, pointer, ret, size); -#endif + VALGRIND_MEMPOOL_CHANGE(context, pointer, ret, size); return ret; } From e78d1d6d47dc7f04fb59fddfc38bab73ec8f1e82 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sat, 2 Aug 2025 19:07:53 -0400 Subject: [PATCH 418/602] Fix assorted pretty-trivial memory leaks in the backend. In the current system architecture, none of these are worth obsessing over; most are once-per-process leaks. However, Valgrind complains about all of them, and if we get to using threads rather than processes for backend sessions, it will become more interesting to avoid per-session leaks. * Fix leaks in StartupXLOG() and ShutdownWalRecovery(). * Fix leakage of pq_mq_handle in a parallel worker. While at it, move mq_putmessage's "Assert(pq_mq_handle != NULL)" to someplace where it's not trivially useless. * Fix leak in logicalrep_worker_detach(). * Don't leak the startup-packet buffer in ProcessStartupPacket(). * Fix leak in evtcache.c's DecodeTextArrayToBitmapset(). If the presented array is toasted, this neglected to free the detoasted copy, which was then leaked into EventTriggerCacheContext. * I'm distressed by the amount of code that BuildEventTriggerCache is willing to run while switched into a long-lived cache context. Although the detoasted array is the only leak that Valgrind reports, let's tighten things up while we're here. (DecodeTextArrayToBitmapset is still run in the cache context, so doing this doesn't remove the need for the detoast fix. But it reduces the surface area for other leaks.) * load_domaintype_info() intentionally leaked some intermediate cruft into the long-lived DomainConstraintCache's memory context, reasoning that the amount of leakage will typically not be much so it's not worth doing a copyObject() of the final tree to avoid that. But Valgrind knows nothing of engineering tradeoffs and complains anyway. On the whole, the copyObject doesn't cost that much and this is surely not a performance-critical code path, so let's do it the clean way. * MarkGUCPrefixReserved didn't bother to clean up removed placeholder GUCs at all, which shows up as a leak in one regression test. It seems appropriate for it to do as much cleanup as define_custom_variable does when replacing placeholders, so factor that code out into a helper function. define_custom_variable's logic was one brick shy of a load too: it forgot to free the separate allocation for the placeholder's name. Author: Tom Lane Reviewed-by: Andres Freund Reviewed-by: Richard Guo Discussion: https://postgr.es/m/285483.1746756246@sss.pgh.pa.us --- src/backend/access/transam/xlog.c | 33 ++++++++++++------- src/backend/access/transam/xlogrecovery.c | 1 + src/backend/libpq/pqmq.c | 16 ++++++--- src/backend/replication/logical/launcher.c | 2 ++ src/backend/tcop/backend_startup.c | 33 +++++++++++++------ src/backend/utils/cache/evtcache.c | 16 +++++---- src/backend/utils/cache/typcache.c | 13 ++++---- src/backend/utils/misc/guc.c | 38 ++++++++++++++-------- 8 files changed, 99 insertions(+), 53 deletions(-) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index b0891998b243f..5553c20fee8ce 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -703,7 +703,7 @@ static void InitControlFile(uint64 sysidentifier, uint32 data_checksum_version); static void WriteControlFile(void); static void ReadControlFile(void); static void UpdateControlFile(void); -static char *str_time(pg_time_t tnow); +static char *str_time(pg_time_t tnow, char *buf, size_t bufsize); static int get_sync_bit(int method); @@ -5371,11 +5371,9 @@ BootStrapXLOG(uint32 data_checksum_version) } static char * -str_time(pg_time_t tnow) +str_time(pg_time_t tnow, char *buf, size_t bufsize) { - char *buf = palloc(128); - - pg_strftime(buf, 128, + pg_strftime(buf, bufsize, "%Y-%m-%d %H:%M:%S %Z", pg_localtime(&tnow, log_timezone)); @@ -5618,6 +5616,7 @@ StartupXLOG(void) XLogRecPtr missingContrecPtr; TransactionId oldestActiveXID; bool promoted = false; + char timebuf[128]; /* * We should have an aux process resource owner to use, and we should not @@ -5646,25 +5645,29 @@ StartupXLOG(void) */ ereport(IsPostmasterEnvironment ? LOG : NOTICE, (errmsg("database system was shut down at %s", - str_time(ControlFile->time)))); + str_time(ControlFile->time, + timebuf, sizeof(timebuf))))); break; case DB_SHUTDOWNED_IN_RECOVERY: ereport(LOG, (errmsg("database system was shut down in recovery at %s", - str_time(ControlFile->time)))); + str_time(ControlFile->time, + timebuf, sizeof(timebuf))))); break; case DB_SHUTDOWNING: ereport(LOG, (errmsg("database system shutdown was interrupted; last known up at %s", - str_time(ControlFile->time)))); + str_time(ControlFile->time, + timebuf, sizeof(timebuf))))); break; case DB_IN_CRASH_RECOVERY: ereport(LOG, (errmsg("database system was interrupted while in recovery at %s", - str_time(ControlFile->time)), + str_time(ControlFile->time, + timebuf, sizeof(timebuf))), errhint("This probably means that some data is corrupted and" " you will have to use the last backup for recovery."))); break; @@ -5672,7 +5675,8 @@ StartupXLOG(void) case DB_IN_ARCHIVE_RECOVERY: ereport(LOG, (errmsg("database system was interrupted while in recovery at log time %s", - str_time(ControlFile->checkPointCopy.time)), + str_time(ControlFile->checkPointCopy.time, + timebuf, sizeof(timebuf))), errhint("If this has occurred more than once some data might be corrupted" " and you might need to choose an earlier recovery target."))); break; @@ -5680,7 +5684,8 @@ StartupXLOG(void) case DB_IN_PRODUCTION: ereport(LOG, (errmsg("database system was interrupted; last known up at %s", - str_time(ControlFile->time)))); + str_time(ControlFile->time, + timebuf, sizeof(timebuf))))); break; default: @@ -6325,6 +6330,12 @@ StartupXLOG(void) */ CompleteCommitTsInitialization(); + /* Clean up EndOfWalRecoveryInfo data to appease Valgrind leak checking */ + if (endOfRecoveryInfo->lastPage) + pfree(endOfRecoveryInfo->lastPage); + pfree(endOfRecoveryInfo->recoveryStopReason); + pfree(endOfRecoveryInfo); + /* * All done with end-of-recovery actions. * diff --git a/src/backend/access/transam/xlogrecovery.c b/src/backend/access/transam/xlogrecovery.c index e8f3ba00caae7..f23ec8969c27d 100644 --- a/src/backend/access/transam/xlogrecovery.c +++ b/src/backend/access/transam/xlogrecovery.c @@ -1626,6 +1626,7 @@ ShutdownWalRecovery(void) close(readFile); readFile = -1; } + pfree(xlogreader->private_data); XLogReaderFree(xlogreader); XLogPrefetcherFree(xlogprefetcher); diff --git a/src/backend/libpq/pqmq.c b/src/backend/libpq/pqmq.c index f1a08bc32ca17..5f39949a36773 100644 --- a/src/backend/libpq/pqmq.c +++ b/src/backend/libpq/pqmq.c @@ -23,7 +23,7 @@ #include "tcop/tcopprot.h" #include "utils/builtins.h" -static shm_mq_handle *pq_mq_handle; +static shm_mq_handle *pq_mq_handle = NULL; static bool pq_mq_busy = false; static pid_t pq_mq_parallel_leader_pid = 0; static ProcNumber pq_mq_parallel_leader_proc_number = INVALID_PROC_NUMBER; @@ -66,7 +66,11 @@ pq_redirect_to_shm_mq(dsm_segment *seg, shm_mq_handle *mqh) static void pq_cleanup_redirect_to_shm_mq(dsm_segment *seg, Datum arg) { - pq_mq_handle = NULL; + if (pq_mq_handle != NULL) + { + pfree(pq_mq_handle); + pq_mq_handle = NULL; + } whereToSendOutput = DestNone; } @@ -131,8 +135,11 @@ mq_putmessage(char msgtype, const char *s, size_t len) if (pq_mq_busy) { if (pq_mq_handle != NULL) + { shm_mq_detach(pq_mq_handle); - pq_mq_handle = NULL; + pfree(pq_mq_handle); + pq_mq_handle = NULL; + } return EOF; } @@ -152,8 +159,6 @@ mq_putmessage(char msgtype, const char *s, size_t len) iov[1].data = s; iov[1].len = len; - Assert(pq_mq_handle != NULL); - for (;;) { /* @@ -161,6 +166,7 @@ mq_putmessage(char msgtype, const char *s, size_t len) * that the shared memory value is updated before we send the parallel * message signal right after this. */ + Assert(pq_mq_handle != NULL); result = shm_mq_sendv(pq_mq_handle, iov, 2, true, true); if (pq_mq_parallel_leader_pid != 0) diff --git a/src/backend/replication/logical/launcher.c b/src/backend/replication/logical/launcher.c index 742d9ba68e900..37377f7eb636c 100644 --- a/src/backend/replication/logical/launcher.c +++ b/src/backend/replication/logical/launcher.c @@ -790,6 +790,8 @@ logicalrep_worker_detach(void) } LWLockRelease(LogicalRepWorkerLock); + + list_free(workers); } /* Block concurrent access. */ diff --git a/src/backend/tcop/backend_startup.c b/src/backend/tcop/backend_startup.c index ad0af5edc1f21..14d5fc0b1965a 100644 --- a/src/backend/tcop/backend_startup.c +++ b/src/backend/tcop/backend_startup.c @@ -492,7 +492,7 @@ static int ProcessStartupPacket(Port *port, bool ssl_done, bool gss_done) { int32 len; - char *buf; + char *buf = NULL; ProtocolVersion proto; MemoryContext oldcontext; @@ -516,7 +516,7 @@ ProcessStartupPacket(Port *port, bool ssl_done, bool gss_done) * scanners, which may be less benign, but it's not really our job to * notice those.) */ - return STATUS_ERROR; + goto fail; } if (pq_getbytes(((char *) &len) + 1, 3) == EOF) @@ -526,7 +526,7 @@ ProcessStartupPacket(Port *port, bool ssl_done, bool gss_done) ereport(COMMERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION), errmsg("incomplete startup packet"))); - return STATUS_ERROR; + goto fail; } len = pg_ntoh32(len); @@ -538,7 +538,7 @@ ProcessStartupPacket(Port *port, bool ssl_done, bool gss_done) ereport(COMMERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION), errmsg("invalid length of startup packet"))); - return STATUS_ERROR; + goto fail; } /* @@ -554,7 +554,7 @@ ProcessStartupPacket(Port *port, bool ssl_done, bool gss_done) ereport(COMMERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION), errmsg("incomplete startup packet"))); - return STATUS_ERROR; + goto fail; } pq_endmsgread(); @@ -568,7 +568,7 @@ ProcessStartupPacket(Port *port, bool ssl_done, bool gss_done) { ProcessCancelRequestPacket(port, buf, len); /* Not really an error, but we don't want to proceed further */ - return STATUS_ERROR; + goto fail; } if (proto == NEGOTIATE_SSL_CODE && !ssl_done) @@ -607,14 +607,16 @@ ProcessStartupPacket(Port *port, bool ssl_done, bool gss_done) ereport(COMMERROR, (errcode_for_socket_access(), errmsg("failed to send SSL negotiation response: %m"))); - return STATUS_ERROR; /* close the connection */ + goto fail; /* close the connection */ } #ifdef USE_SSL if (SSLok == 'S' && secure_open_server(port) == -1) - return STATUS_ERROR; + goto fail; #endif + pfree(buf); + /* * At this point we should have no data already buffered. If we do, * it was received before we performed the SSL handshake, so it wasn't @@ -661,14 +663,16 @@ ProcessStartupPacket(Port *port, bool ssl_done, bool gss_done) ereport(COMMERROR, (errcode_for_socket_access(), errmsg("failed to send GSSAPI negotiation response: %m"))); - return STATUS_ERROR; /* close the connection */ + goto fail; /* close the connection */ } #ifdef ENABLE_GSS if (GSSok == 'G' && secure_open_gssapi(port) == -1) - return STATUS_ERROR; + goto fail; #endif + pfree(buf); + /* * At this point we should have no data already buffered. If we do, * it was received before we performed the GSS handshake, so it wasn't @@ -863,7 +867,16 @@ ProcessStartupPacket(Port *port, bool ssl_done, bool gss_done) */ MemoryContextSwitchTo(oldcontext); + pfree(buf); + return STATUS_OK; + +fail: + /* be tidy, just to avoid Valgrind complaints */ + if (buf) + pfree(buf); + + return STATUS_ERROR; } /* diff --git a/src/backend/utils/cache/evtcache.c b/src/backend/utils/cache/evtcache.c index ce596bf563856..b9d5a5998be50 100644 --- a/src/backend/utils/cache/evtcache.c +++ b/src/backend/utils/cache/evtcache.c @@ -78,7 +78,6 @@ BuildEventTriggerCache(void) { HASHCTL ctl; HTAB *cache; - MemoryContext oldcontext; Relation rel; Relation irel; SysScanDesc scan; @@ -110,9 +109,6 @@ BuildEventTriggerCache(void) (Datum) 0); } - /* Switch to correct memory context. */ - oldcontext = MemoryContextSwitchTo(EventTriggerCacheContext); - /* Prevent the memory context from being nuked while we're rebuilding. */ EventTriggerCacheState = ETCS_REBUILD_STARTED; @@ -145,6 +141,7 @@ BuildEventTriggerCache(void) bool evttags_isnull; EventTriggerCacheEntry *entry; bool found; + MemoryContext oldcontext; /* Get next tuple. */ tup = systable_getnext_ordered(scan, ForwardScanDirection); @@ -171,6 +168,9 @@ BuildEventTriggerCache(void) else continue; + /* Switch to correct memory context. */ + oldcontext = MemoryContextSwitchTo(EventTriggerCacheContext); + /* Allocate new cache item. */ item = palloc0(sizeof(EventTriggerCacheItem)); item->fnoid = form->evtfoid; @@ -188,6 +188,9 @@ BuildEventTriggerCache(void) entry->triggerlist = lappend(entry->triggerlist, item); else entry->triggerlist = list_make1(item); + + /* Restore previous memory context. */ + MemoryContextSwitchTo(oldcontext); } /* Done with pg_event_trigger scan. */ @@ -195,9 +198,6 @@ BuildEventTriggerCache(void) index_close(irel, AccessShareLock); relation_close(rel, AccessShareLock); - /* Restore previous memory context. */ - MemoryContextSwitchTo(oldcontext); - /* Install new cache. */ EventTriggerCache = cache; @@ -240,6 +240,8 @@ DecodeTextArrayToBitmapset(Datum array) } pfree(elems); + if ((Pointer) arr != DatumGetPointer(array)) + pfree(arr); return bms; } diff --git a/src/backend/utils/cache/typcache.c b/src/backend/utils/cache/typcache.c index f9aec38a11fb3..6a347698edffe 100644 --- a/src/backend/utils/cache/typcache.c +++ b/src/backend/utils/cache/typcache.c @@ -1171,9 +1171,6 @@ load_domaintype_info(TypeCacheEntry *typentry) elog(ERROR, "domain \"%s\" constraint \"%s\" has NULL conbin", NameStr(typTup->typname), NameStr(c->conname)); - /* Convert conbin to C string in caller context */ - constring = TextDatumGetCString(val); - /* Create the DomainConstraintCache object and context if needed */ if (dcc == NULL) { @@ -1189,9 +1186,8 @@ load_domaintype_info(TypeCacheEntry *typentry) dcc->dccRefCount = 0; } - /* Create node trees in DomainConstraintCache's context */ - oldcxt = MemoryContextSwitchTo(dcc->dccContext); - + /* Convert conbin to a node tree, still in caller's context */ + constring = TextDatumGetCString(val); check_expr = (Expr *) stringToNode(constring); /* @@ -1206,10 +1202,13 @@ load_domaintype_info(TypeCacheEntry *typentry) */ check_expr = expression_planner(check_expr); + /* Create only the minimally needed stuff in dccContext */ + oldcxt = MemoryContextSwitchTo(dcc->dccContext); + r = makeNode(DomainConstraintState); r->constrainttype = DOM_CONSTRAINT_CHECK; r->name = pstrdup(NameStr(c->conname)); - r->check_expr = check_expr; + r->check_expr = copyObject(check_expr); r->check_exprstate = NULL; MemoryContextSwitchTo(oldcxt); diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index ce5449f287853..e404c345e6eab 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -249,6 +249,7 @@ static void reapply_stacked_values(struct config_generic *variable, const char *curvalue, GucContext curscontext, GucSource cursource, Oid cursrole); +static void free_placeholder(struct config_string *pHolder); static bool validate_option_array_item(const char *name, const char *value, bool skipIfNoPermissions); static void write_auto_conf_file(int fd, const char *filename, ConfigVariable *head); @@ -5023,16 +5024,8 @@ define_custom_variable(struct config_generic *variable) set_config_sourcefile(name, pHolder->gen.sourcefile, pHolder->gen.sourceline); - /* - * Free up as much as we conveniently can of the placeholder structure. - * (This neglects any stack items, so it's possible for some memory to be - * leaked. Since this can only happen once per session per variable, it - * doesn't seem worth spending much code on.) - */ - set_string_field(pHolder, pHolder->variable, NULL); - set_string_field(pHolder, &pHolder->reset_val, NULL); - - guc_free(pHolder); + /* Now we can free the no-longer-referenced placeholder variable */ + free_placeholder(pHolder); } /* @@ -5131,6 +5124,25 @@ reapply_stacked_values(struct config_generic *variable, } } +/* + * Free up a no-longer-referenced placeholder GUC variable. + * + * This neglects any stack items, so it's possible for some memory to be + * leaked. Since this can only happen once per session per variable, it + * doesn't seem worth spending much code on. + */ +static void +free_placeholder(struct config_string *pHolder) +{ + /* Placeholders are always STRING type, so free their values */ + Assert(pHolder->gen.vartype == PGC_STRING); + set_string_field(pHolder, pHolder->variable, NULL); + set_string_field(pHolder, &pHolder->reset_val, NULL); + + guc_free(unconstify(char *, pHolder->gen.name)); + guc_free(pHolder); +} + /* * Functions for extensions to call to define their custom GUC variables. */ @@ -5291,9 +5303,7 @@ MarkGUCPrefixReserved(const char *className) /* * Check for existing placeholders. We must actually remove invalid - * placeholders, else future parallel worker startups will fail. (We - * don't bother trying to free associated memory, since this shouldn't - * happen often.) + * placeholders, else future parallel worker startups will fail. */ hash_seq_init(&status, guc_hashtab); while ((hentry = (GUCHashEntry *) hash_seq_search(&status)) != NULL) @@ -5317,6 +5327,8 @@ MarkGUCPrefixReserved(const char *className) NULL); /* Remove it from any lists it's in, too */ RemoveGUCFromLists(var); + /* And free it */ + free_placeholder((struct config_string *) var); } } From db01c90b2f024298b08dca8aed6b43a2347dee0e Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sat, 2 Aug 2025 19:32:12 -0400 Subject: [PATCH 419/602] Silence Valgrind leakage complaints in more-or-less-hackish ways. These changes don't actually fix any leaks. They just make sure that Valgrind will find pointers to data structures that remain allocated at process exit, and thus not falsely complain about leaks. In particular, we are trying to avoid situations where there is no pointer to the beginning of an allocated block (except possibly within the block itself, which Valgrind won't count). * Because dynahash.c never frees hashtable storage except by deleting the whole hashtable context, it doesn't bother to track the individual blocks of elements allocated by element_alloc(). This results in "possibly lost" complaints from Valgrind except when the first element of each block is actively in use. (Otherwise it'll be on a freelist, but very likely only reachable via "interior pointers" within element blocks, which doesn't satisfy Valgrind.) To fix, if we're building with USE_VALGRIND, expend an extra pointer's worth of space in each element block so that we can chain them all together from the HTAB header. Skip this in shared hashtables though: Valgrind doesn't track those, and we'd need additional locking to make it safe to manipulate a shared chain. While here, update a comment obsoleted by 9c911ec06. * Put the dlist_node fields of catctup and catclist structs first. This ensures that the dlist pointers point to the starts of these palloc blocks, and thus that Valgrind won't consider them "possibly lost". * The postmaster's PMChild structs and the autovac launcher's avl_dbase structs also have the dlist_node-is-not-first problem, but putting it first still wouldn't silence the warning because we bulk-allocate those structs in an array, so that Valgrind sees a single allocation. Commonly the first array element will be pointed to only from some later element, so that the reference would be an interior pointer even if it pointed to the array start. (This is the same issue as for dynahash elements.) Since these are pretty simple data structures, I don't feel too bad about faking out Valgrind by just keeping a static pointer to the array start. (This is all quite hacky, and it's not hard to imagine usages where we'd need some other idea in order to have reasonable leak tracking of structures that are only accessible via dlist_node lists. But these changes seem to be enough to silence this class of leakage complaints for the moment.) * Free a couple of data structures manually near the end of an autovacuum worker's run when USE_VALGRIND, and ensure that the final vac_update_datfrozenxid() call is done in a non-permanent context. This doesn't have any real effect on the process's total memory consumption, since we're going to exit as soon as that last transaction is done. But it does pacify Valgrind. * Valgrind complains about the postmaster's socket-files and lock-files lists being leaked, which we can silence by just not nulling out the static pointers to them. * Valgrind seems not to consider the global "environ" variable as a valid root pointer; so when we allocate a new environment array, it claims that data is leaked. To fix that, keep our own statically-allocated copy of the pointer, similarly to the previous item. Author: Tom Lane Reviewed-by: Andres Freund Discussion: https://postgr.es/m/285483.1746756246@sss.pgh.pa.us --- src/backend/libpq/pqcomm.c | 1 - src/backend/postmaster/autovacuum.c | 26 ++++++++++++++- src/backend/postmaster/pmchild.c | 18 +++++++++- src/backend/utils/hash/dynahash.c | 52 +++++++++++++++++++++++++---- src/backend/utils/init/miscinit.c | 1 - src/backend/utils/misc/ps_status.c | 16 +++++++++ src/include/utils/catcache.h | 23 ++++++++----- 7 files changed, 118 insertions(+), 19 deletions(-) diff --git a/src/backend/libpq/pqcomm.c b/src/backend/libpq/pqcomm.c index e5171467de18d..25f739a6a17d4 100644 --- a/src/backend/libpq/pqcomm.c +++ b/src/backend/libpq/pqcomm.c @@ -858,7 +858,6 @@ RemoveSocketFiles(void) (void) unlink(sock_path); } /* Since we're about to exit, no need to reclaim storage */ - sock_paths = NIL; } diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c index 8908603464c5c..ff96b36d71025 100644 --- a/src/backend/postmaster/autovacuum.c +++ b/src/backend/postmaster/autovacuum.c @@ -310,6 +310,16 @@ static AutoVacuumShmemStruct *AutoVacuumShmem; static dlist_head DatabaseList = DLIST_STATIC_INIT(DatabaseList); static MemoryContext DatabaseListCxt = NULL; +/* + * Dummy pointer to persuade Valgrind that we've not leaked the array of + * avl_dbase structs. Make it global to ensure the compiler doesn't + * optimize it away. + */ +#ifdef USE_VALGRIND +extern avl_dbase *avl_dbase_array; +avl_dbase *avl_dbase_array; +#endif + /* Pointer to my own WorkerInfo, valid on each worker */ static WorkerInfo MyWorkerInfo = NULL; @@ -1020,6 +1030,10 @@ rebuild_database_list(Oid newdb) /* put all the hash elements into an array */ dbary = palloc(nelems * sizeof(avl_dbase)); + /* keep Valgrind quiet */ +#ifdef USE_VALGRIND + avl_dbase_array = dbary; +#endif i = 0; hash_seq_init(&seq, dbhash); @@ -2565,8 +2579,18 @@ do_autovacuum(void) /* * We leak table_toast_map here (among other things), but since we're - * going away soon, it's not a problem. + * going away soon, it's not a problem normally. But when using Valgrind, + * release some stuff to reduce complaints about leaked storage. */ +#ifdef USE_VALGRIND + hash_destroy(table_toast_map); + FreeTupleDesc(pg_class_desc); + if (bstrategy) + pfree(bstrategy); +#endif + + /* Run the rest in xact context, mainly to avoid Valgrind leak warnings */ + MemoryContextSwitchTo(TopTransactionContext); /* * Update pg_database.datfrozenxid, and truncate pg_xact if possible. We diff --git a/src/backend/postmaster/pmchild.c b/src/backend/postmaster/pmchild.c index cde1d23a4ca8b..584bb58c8abaf 100644 --- a/src/backend/postmaster/pmchild.c +++ b/src/backend/postmaster/pmchild.c @@ -59,6 +59,17 @@ NON_EXEC_STATIC int num_pmchild_slots = 0; */ dlist_head ActiveChildList; +/* + * Dummy pointer to persuade Valgrind that we've not leaked the array of + * PMChild structs. Make it global to ensure the compiler doesn't + * optimize it away. + */ +#ifdef USE_VALGRIND +extern PMChild *pmchild_array; +PMChild *pmchild_array; +#endif + + /* * MaxLivePostmasterChildren * @@ -125,8 +136,13 @@ InitPostmasterChildSlots(void) for (int i = 0; i < BACKEND_NUM_TYPES; i++) num_pmchild_slots += pmchild_pools[i].size; - /* Initialize them */ + /* Allocate enough slots, and make sure Valgrind doesn't complain */ slots = palloc(num_pmchild_slots * sizeof(PMChild)); +#ifdef USE_VALGRIND + pmchild_array = slots; +#endif + + /* Initialize them */ slotno = 0; for (int btype = 0; btype < BACKEND_NUM_TYPES; btype++) { diff --git a/src/backend/utils/hash/dynahash.c b/src/backend/utils/hash/dynahash.c index 42e9be274fc6a..81da03629f0d2 100644 --- a/src/backend/utils/hash/dynahash.c +++ b/src/backend/utils/hash/dynahash.c @@ -22,10 +22,11 @@ * lookup key's hash value as a partition number --- this will work because * of the way calc_bucket() maps hash values to bucket numbers. * - * For hash tables in shared memory, the memory allocator function should - * match malloc's semantics of returning NULL on failure. For hash tables - * in local memory, we typically use palloc() which will throw error on - * failure. The code in this file has to cope with both cases. + * The memory allocator function should match malloc's semantics of returning + * NULL on failure. (This is essential for hash tables in shared memory. + * For hash tables in local memory, we used to use palloc() which will throw + * error on failure; but we no longer do, so it's untested whether this + * module will still cope with that behavior.) * * dynahash.c provides support for these types of lookup keys: * @@ -98,6 +99,7 @@ #include "access/xact.h" #include "common/hashfn.h" +#include "lib/ilist.h" #include "port/pg_bitutils.h" #include "storage/shmem.h" #include "storage/spin.h" @@ -236,6 +238,16 @@ struct HTAB Size keysize; /* hash key length in bytes */ long ssize; /* segment size --- must be power of 2 */ int sshift; /* segment shift = log2(ssize) */ + + /* + * In a USE_VALGRIND build, non-shared hashtables keep an slist chain of + * all the element blocks they have allocated. This pacifies Valgrind, + * which would otherwise often claim that the element blocks are "possibly + * lost" for lack of any non-interior pointers to their starts. + */ +#ifdef USE_VALGRIND + slist_head element_blocks; +#endif }; /* @@ -1712,6 +1724,8 @@ element_alloc(HTAB *hashp, int nelem, int freelist_idx) { HASHHDR *hctl = hashp->hctl; Size elementSize; + Size requestSize; + char *allocedBlock; HASHELEMENT *firstElement; HASHELEMENT *tmpElement; HASHELEMENT *prevElement; @@ -1723,12 +1737,38 @@ element_alloc(HTAB *hashp, int nelem, int freelist_idx) /* Each element has a HASHELEMENT header plus user data. */ elementSize = MAXALIGN(sizeof(HASHELEMENT)) + MAXALIGN(hctl->entrysize); + requestSize = nelem * elementSize; + + /* Add space for slist_node list link if we need one. */ +#ifdef USE_VALGRIND + if (!hashp->isshared) + requestSize += MAXALIGN(sizeof(slist_node)); +#endif + + /* Allocate the memory. */ CurrentDynaHashCxt = hashp->hcxt; - firstElement = (HASHELEMENT *) hashp->alloc(nelem * elementSize); + allocedBlock = hashp->alloc(requestSize); - if (!firstElement) + if (!allocedBlock) return false; + /* + * If USE_VALGRIND, each allocated block of elements of a non-shared + * hashtable is chained into a list, so that Valgrind won't think it's + * been leaked. + */ +#ifdef USE_VALGRIND + if (hashp->isshared) + firstElement = (HASHELEMENT *) allocedBlock; + else + { + slist_push_head(&hashp->element_blocks, (slist_node *) allocedBlock); + firstElement = (HASHELEMENT *) (allocedBlock + MAXALIGN(sizeof(slist_node))); + } +#else + firstElement = (HASHELEMENT *) allocedBlock; +#endif + /* prepare to link all the new entries into the freelist */ prevElement = NULL; tmpElement = firstElement; diff --git a/src/backend/utils/init/miscinit.c b/src/backend/utils/init/miscinit.c index 43b4dbccc3de6..65d8cbfaed585 100644 --- a/src/backend/utils/init/miscinit.c +++ b/src/backend/utils/init/miscinit.c @@ -1183,7 +1183,6 @@ UnlinkLockFiles(int status, Datum arg) /* Should we complain if the unlink fails? */ } /* Since we're about to exit, no need to reclaim storage */ - lock_files = NIL; /* * Lock file removal should always be the last externally visible action diff --git a/src/backend/utils/misc/ps_status.c b/src/backend/utils/misc/ps_status.c index e08b26e8c14f2..4df25944deb33 100644 --- a/src/backend/utils/misc/ps_status.c +++ b/src/backend/utils/misc/ps_status.c @@ -100,6 +100,17 @@ static void flush_ps_display(void); static int save_argc; static char **save_argv; +/* + * Valgrind seems not to consider the global "environ" variable as a valid + * root pointer; so when we allocate a new environment array, it claims that + * data is leaked. To fix that, keep our own statically-allocated copy of the + * pointer. (Oddly, this doesn't seem to be a problem for "argv".) + */ +#if defined(PS_USE_CLOBBER_ARGV) && defined(USE_VALGRIND) +extern char **ps_status_new_environ; +char **ps_status_new_environ; +#endif + /* * Call this early in startup to save the original argc/argv values. @@ -206,6 +217,11 @@ save_ps_display_args(int argc, char **argv) } new_environ[i] = NULL; environ = new_environ; + + /* See notes about Valgrind above. */ +#ifdef USE_VALGRIND + ps_status_new_environ = new_environ; +#endif } /* diff --git a/src/include/utils/catcache.h b/src/include/utils/catcache.h index 277ec33c00bac..00808e23f49b8 100644 --- a/src/include/utils/catcache.h +++ b/src/include/utils/catcache.h @@ -87,6 +87,14 @@ typedef struct catcache typedef struct catctup { + /* + * Each tuple in a cache is a member of a dlist that stores the elements + * of its hash bucket. We keep each dlist in LRU order to speed repeated + * lookups. Keep the dlist_node field first so that Valgrind understands + * the struct is reachable. + */ + dlist_node cache_elem; /* list member of per-bucket list */ + int ct_magic; /* for identifying CatCTup entries */ #define CT_MAGIC 0x57261502 @@ -98,13 +106,6 @@ typedef struct catctup */ Datum keys[CATCACHE_MAXKEYS]; - /* - * Each tuple in a cache is a member of a dlist that stores the elements - * of its hash bucket. We keep each dlist in LRU order to speed repeated - * lookups. - */ - dlist_node cache_elem; /* list member of per-bucket list */ - /* * A tuple marked "dead" must not be returned by subsequent searches. * However, it won't be physically deleted from the cache until its @@ -158,13 +159,17 @@ typedef struct catctup */ typedef struct catclist { + /* + * Keep the dlist_node field first so that Valgrind understands the struct + * is reachable. + */ + dlist_node cache_elem; /* list member of per-catcache list */ + int cl_magic; /* for identifying CatCList entries */ #define CL_MAGIC 0x52765103 uint32 hash_value; /* hash value for lookup keys */ - dlist_node cache_elem; /* list member of per-catcache list */ - /* * Lookup keys for the entry, with the first nkeys elements being valid. * All by-reference are separately allocated. From 9f18fa9995628fef752d704d874eeed0bab815e5 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sat, 2 Aug 2025 19:39:03 -0400 Subject: [PATCH 420/602] Reduce leakage during PL/pgSQL function compilation. format_procedure leaks memory, so run it in a short-lived context not the session-lifespan cache context for the PL/pgSQL function. parse_datatype called the core parser in the function's cache context, thus leaking potentially a lot of storage into that context. We were also being a bit careless with the TypeName structures made in that code path and others. Most of the time we don't need to retain the TypeName, so make sure it is made in the short-lived temp context, and copy it only if we do need to retain it. These are far from the only leaks in PL/pgSQL compilation, but they're the biggest as far as I've seen, and further improvement looks like it'd require delicate and bug-prone surgery. Author: Tom Lane Reviewed-by: Andres Freund Discussion: https://postgr.es/m/285483.1746756246@sss.pgh.pa.us --- src/pl/plpgsql/src/pl_comp.c | 28 ++++++++++++++++++++++------ src/pl/plpgsql/src/pl_gram.y | 8 +++++++- 2 files changed, 29 insertions(+), 7 deletions(-) diff --git a/src/pl/plpgsql/src/pl_comp.c b/src/pl/plpgsql/src/pl_comp.c index ee961425a5b7e..f6976689a6927 100644 --- a/src/pl/plpgsql/src/pl_comp.c +++ b/src/pl/plpgsql/src/pl_comp.c @@ -177,6 +177,7 @@ plpgsql_compile_callback(FunctionCallInfo fcinfo, yyscan_t scanner; Datum prosrcdatum; char *proc_source; + char *proc_signature; HeapTuple typeTup; Form_pg_type typeStruct; PLpgSQL_variable *var; @@ -223,6 +224,9 @@ plpgsql_compile_callback(FunctionCallInfo fcinfo, plpgsql_check_syntax = forValidator; plpgsql_curr_compile = function; + /* format_procedure leaks memory, so run it in temp context */ + proc_signature = format_procedure(fcinfo->flinfo->fn_oid); + /* * All the permanent output of compilation (e.g. parse tree) is kept in a * per-function memory context, so it can be reclaimed easily. @@ -237,7 +241,7 @@ plpgsql_compile_callback(FunctionCallInfo fcinfo, ALLOCSET_DEFAULT_SIZES); plpgsql_compile_tmp_cxt = MemoryContextSwitchTo(func_cxt); - function->fn_signature = format_procedure(fcinfo->flinfo->fn_oid); + function->fn_signature = pstrdup(proc_signature); MemoryContextSetIdentifier(func_cxt, function->fn_signature); function->fn_oid = fcinfo->flinfo->fn_oid; function->fn_input_collation = fcinfo->fncollation; @@ -1673,6 +1677,11 @@ plpgsql_parse_wordrowtype(char *ident) { Oid classOid; Oid typOid; + TypeName *typName; + MemoryContext oldCxt; + + /* Avoid memory leaks in long-term function context */ + oldCxt = MemoryContextSwitchTo(plpgsql_compile_tmp_cxt); /* * Look up the relation. Note that because relation rowtypes have the @@ -1695,9 +1704,12 @@ plpgsql_parse_wordrowtype(char *ident) errmsg("relation \"%s\" does not have a composite type", ident))); + typName = makeTypeName(ident); + + MemoryContextSwitchTo(oldCxt); + /* Build and return the row type struct */ - return plpgsql_build_datatype(typOid, -1, InvalidOid, - makeTypeName(ident)); + return plpgsql_build_datatype(typOid, -1, InvalidOid, typName); } /* ---------- @@ -1711,6 +1723,7 @@ plpgsql_parse_cwordrowtype(List *idents) Oid classOid; Oid typOid; RangeVar *relvar; + TypeName *typName; MemoryContext oldCxt; /* @@ -1733,11 +1746,12 @@ plpgsql_parse_cwordrowtype(List *idents) errmsg("relation \"%s\" does not have a composite type", relvar->relname))); + typName = makeTypeNameFromNameList(idents); + MemoryContextSwitchTo(oldCxt); /* Build and return the row type struct */ - return plpgsql_build_datatype(typOid, -1, InvalidOid, - makeTypeNameFromNameList(idents)); + return plpgsql_build_datatype(typOid, -1, InvalidOid, typName); } /* @@ -1952,6 +1966,8 @@ plpgsql_build_recfield(PLpgSQL_rec *rec, const char *fldname) * origtypname is the parsed form of what the user wrote as the type name. * It can be NULL if the type could not be a composite type, or if it was * identified by OID to begin with (e.g., it's a function argument type). + * origtypname is in short-lived storage and must be copied if we choose + * to incorporate it into the function's parse tree. */ PLpgSQL_type * plpgsql_build_datatype(Oid typeOid, int32 typmod, @@ -2070,7 +2086,7 @@ build_datatype(HeapTuple typeTup, int32 typmod, errmsg("type %s is not composite", format_type_be(typ->typoid)))); - typ->origtypname = origtypname; + typ->origtypname = copyObject(origtypname); typ->tcache = typentry; typ->tupdesc_id = typentry->tupDesc_identifier; } diff --git a/src/pl/plpgsql/src/pl_gram.y b/src/pl/plpgsql/src/pl_gram.y index 7b672ea5179a6..17568d82554d2 100644 --- a/src/pl/plpgsql/src/pl_gram.y +++ b/src/pl/plpgsql/src/pl_gram.y @@ -3853,6 +3853,7 @@ parse_datatype(const char *string, int location, yyscan_t yyscanner) int32 typmod; sql_error_callback_arg cbarg; ErrorContextCallback syntax_errcontext; + MemoryContext oldCxt; cbarg.location = location; cbarg.yyscanner = yyscanner; @@ -3862,9 +3863,14 @@ parse_datatype(const char *string, int location, yyscan_t yyscanner) syntax_errcontext.previous = error_context_stack; error_context_stack = &syntax_errcontext; - /* Let the main parser try to parse it under standard SQL rules */ + /* + * Let the main parser try to parse it under standard SQL rules. The + * parser leaks memory, so run it in temp context. + */ + oldCxt = MemoryContextSwitchTo(plpgsql_compile_tmp_cxt); typeName = typeStringToTypeName(string, NULL); typenameTypeIdAndMod(NULL, typeName, &type_id, &typmod); + MemoryContextSwitchTo(oldCxt); /* Restore former ereport callback */ error_context_stack = syntax_errcontext.previous; From 2c7b4ad24dda86a73d80df063e9a56c3ecb1e4bb Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sat, 2 Aug 2025 19:43:04 -0400 Subject: [PATCH 421/602] Suppress complaints about leaks in function cache loading. PL/pgSQL and SQL-function parsing leak some stuff into the long-lived function cache context. This isn't really a huge practical problem, since it's not a large amount of data and the cruft will be recovered if we have to re-parse the function. It's not clear that it's worth working any harder than the previous patch did to eliminate these leak complaints, so instead silence them with a suppression rule. This suppression rule also hides the fact that CachedFunction structs are intentionally leaked in some cases because we're unsure if any fn_extra pointers remain. That might be nice to do something about eventually, but it's not clear how. Author: Tom Lane Reviewed-by: Andres Freund Discussion: https://postgr.es/m/285483.1746756246@sss.pgh.pa.us --- src/tools/valgrind.supp | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/src/tools/valgrind.supp b/src/tools/valgrind.supp index 2ad5b81526d3f..fad20c8f70804 100644 --- a/src/tools/valgrind.supp +++ b/src/tools/valgrind.supp @@ -194,3 +194,24 @@ Memcheck:Addr8 fun:pg_numa_touch_mem_if_required } + + +# Memory-leak suppressions +# Note that a suppression rule will silence complaints about memory blocks +# allocated in matching places, but it won't prevent "indirectly lost" +# complaints about blocks that are only reachable via the suppressed blocks. + +# Suppress complaints about stuff leaked during function cache loading. +# Both the PL/pgSQL and SQL-function parsing processes generate some cruft +# within the function's cache context, which doesn't seem worth the trouble +# to get rid of. Moreover, there are cases where CachedFunction structs +# are intentionally leaked because we're unsure if any fn_extra pointers +# remain. +{ + hide_function_cache_leaks + Memcheck:Leak + match-leak-kinds: definite,possible,indirect + + ... + fun:cached_function_compile +} From 7f6ededa764b287ba593a2bb7fd566df8053213e Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sat, 2 Aug 2025 19:43:53 -0400 Subject: [PATCH 422/602] Suppress complaints about leaks in TS dictionary loading. Like the situation with function cache loading, text search dictionary loading functions tend to leak some cruft into the dictionary's long-lived cache context. To judge by the examples in the core regression tests, not very many bytes are at stake. Moreover, I don't see a way to prevent such leaks without changing the API for TS template initialization functions: right now they do not have to worry about making sure that their results are long-lived. Hence, I think we should install a suppression rule rather than trying to fix this completely. However, I did grab some low-hanging fruit: several places were leaking the result of get_tsearch_config_filename. This seems worth doing mostly because they are inconsistent with other dictionaries that were freeing it already. Author: Tom Lane Reviewed-by: Andres Freund Discussion: https://postgr.es/m/285483.1746756246@sss.pgh.pa.us --- src/backend/tsearch/dict_ispell.c | 18 ++++++++++++------ src/backend/tsearch/dict_synonym.c | 1 + src/backend/tsearch/dict_thesaurus.c | 7 ++++--- src/backend/utils/cache/ts_cache.c | 4 +++- src/tools/valgrind.supp | 12 ++++++++++++ 5 files changed, 32 insertions(+), 10 deletions(-) diff --git a/src/backend/tsearch/dict_ispell.c b/src/backend/tsearch/dict_ispell.c index 63bd193a78a89..debfbf956cc1f 100644 --- a/src/backend/tsearch/dict_ispell.c +++ b/src/backend/tsearch/dict_ispell.c @@ -47,24 +47,30 @@ dispell_init(PG_FUNCTION_ARGS) if (strcmp(defel->defname, "dictfile") == 0) { + char *filename; + if (dictloaded) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("multiple DictFile parameters"))); - NIImportDictionary(&(d->obj), - get_tsearch_config_filename(defGetString(defel), - "dict")); + filename = get_tsearch_config_filename(defGetString(defel), + "dict"); + NIImportDictionary(&(d->obj), filename); + pfree(filename); dictloaded = true; } else if (strcmp(defel->defname, "afffile") == 0) { + char *filename; + if (affloaded) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("multiple AffFile parameters"))); - NIImportAffixes(&(d->obj), - get_tsearch_config_filename(defGetString(defel), - "affix")); + filename = get_tsearch_config_filename(defGetString(defel), + "affix"); + NIImportAffixes(&(d->obj), filename); + pfree(filename); affloaded = true; } else if (strcmp(defel->defname, "stopwords") == 0) diff --git a/src/backend/tsearch/dict_synonym.c b/src/backend/tsearch/dict_synonym.c index 0da5a9d686802..c2773eb01adee 100644 --- a/src/backend/tsearch/dict_synonym.c +++ b/src/backend/tsearch/dict_synonym.c @@ -199,6 +199,7 @@ dsynonym_init(PG_FUNCTION_ARGS) } tsearch_readline_end(&trst); + pfree(filename); d->len = cur; qsort(d->syn, d->len, sizeof(Syn), compareSyn); diff --git a/src/backend/tsearch/dict_thesaurus.c b/src/backend/tsearch/dict_thesaurus.c index 1bebe36a6910e..1e6bbde1ca7d8 100644 --- a/src/backend/tsearch/dict_thesaurus.c +++ b/src/backend/tsearch/dict_thesaurus.c @@ -167,17 +167,17 @@ addWrd(DictThesaurus *d, char *b, char *e, uint32 idsubst, uint16 nwrd, uint16 p static void thesaurusRead(const char *filename, DictThesaurus *d) { + char *real_filename = get_tsearch_config_filename(filename, "ths"); tsearch_readline_state trst; uint32 idsubst = 0; bool useasis = false; char *line; - filename = get_tsearch_config_filename(filename, "ths"); - if (!tsearch_readline_begin(&trst, filename)) + if (!tsearch_readline_begin(&trst, real_filename)) ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("could not open thesaurus file \"%s\": %m", - filename))); + real_filename))); while ((line = tsearch_readline(&trst)) != NULL) { @@ -297,6 +297,7 @@ thesaurusRead(const char *filename, DictThesaurus *d) d->nsubst = idsubst; tsearch_readline_end(&trst); + pfree(real_filename); } static TheLexeme * diff --git a/src/backend/utils/cache/ts_cache.c b/src/backend/utils/cache/ts_cache.c index 18cccd778fd8c..e8ae53238d07a 100644 --- a/src/backend/utils/cache/ts_cache.c +++ b/src/backend/utils/cache/ts_cache.c @@ -321,7 +321,9 @@ lookup_ts_dictionary_cache(Oid dictId) /* * Init method runs in dictionary's private memory context, and we - * make sure the options are stored there too + * make sure the options are stored there too. This typically + * results in a small amount of memory leakage, but it's not worth + * complicating the API for tmplinit functions to avoid it. */ oldcontext = MemoryContextSwitchTo(entry->dictCtx); diff --git a/src/tools/valgrind.supp b/src/tools/valgrind.supp index fad20c8f70804..3880007dfb3bb 100644 --- a/src/tools/valgrind.supp +++ b/src/tools/valgrind.supp @@ -215,3 +215,15 @@ ... fun:cached_function_compile } + +# Suppress complaints about stuff leaked during TS dictionary loading. +# Not very much is typically lost there, and preventing it would +# require a risky API change for TS tmplinit functions. +{ + hide_ts_dictionary_leaks + Memcheck:Leak + match-leak-kinds: definite,possible,indirect + + ... + fun:lookup_ts_dictionary_cache +} From b102c8c4733cf76ff0635dc440ee8dd11487ed95 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sat, 2 Aug 2025 19:44:10 -0400 Subject: [PATCH 423/602] Silence complaints about leaks in PlanCacheComputeResultDesc. CompleteCachedPlan intentionally doesn't worry about small leaks from PlanCacheComputeResultDesc. However, Valgrind knows nothing of engineering tradeoffs and complains anyway. Silence it by doing things the hard way if USE_VALGRIND. I don't really love this patch, because it makes the handling of plansource->resultDesc different from the handling of query dependencies and search_path just above, which likewise are willing to accept small leaks into the cached plan's context. However, those cases aren't provoking Valgrind complaints. (Perhaps in a CLOBBER_CACHE_ALWAYS build, they would?) For the moment, this makes the src/pl/plpgsql tests leak-free according to Valgrind. Author: Tom Lane Reviewed-by: Andres Freund Discussion: https://postgr.es/m/285483.1746756246@sss.pgh.pa.us --- src/backend/utils/cache/plancache.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/src/backend/utils/cache/plancache.c b/src/backend/utils/cache/plancache.c index 0c506d320b137..6661d2c6b7391 100644 --- a/src/backend/utils/cache/plancache.c +++ b/src/backend/utils/cache/plancache.c @@ -463,8 +463,7 @@ CompleteCachedPlan(CachedPlanSource *plansource, /* * Save the final parameter types (or other parameter specification data) - * into the source_context, as well as our other parameters. Also save - * the result tuple descriptor. + * into the source_context, as well as our other parameters. */ MemoryContextSwitchTo(source_context); @@ -480,9 +479,25 @@ CompleteCachedPlan(CachedPlanSource *plansource, plansource->parserSetupArg = parserSetupArg; plansource->cursor_options = cursor_options; plansource->fixed_result = fixed_result; - plansource->resultDesc = PlanCacheComputeResultDesc(querytree_list); + /* + * Also save the result tuple descriptor. PlanCacheComputeResultDesc may + * leak some cruft; normally we just accept that to save a copy step, but + * in USE_VALGRIND mode be tidy by running it in the caller's context. + */ +#ifdef USE_VALGRIND + MemoryContextSwitchTo(oldcxt); + plansource->resultDesc = PlanCacheComputeResultDesc(querytree_list); + if (plansource->resultDesc) + { + MemoryContextSwitchTo(source_context); + plansource->resultDesc = CreateTupleDescCopy(plansource->resultDesc); + MemoryContextSwitchTo(oldcxt); + } +#else + plansource->resultDesc = PlanCacheComputeResultDesc(querytree_list); MemoryContextSwitchTo(oldcxt); +#endif plansource->is_complete = true; plansource->is_valid = true; From 4fbfdde58e4cd091f88737dffa241b08c23d8829 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sat, 2 Aug 2025 21:26:21 -0400 Subject: [PATCH 424/602] Avoid leakage of zero-length arrays in partition_bounds_copy(). If ndatums is zero, the code would allocate zero-length boundKinds and boundDatums chunks, which would have nothing pointing to them, leading to Valgrind complaints. Rearrange the code to avoid the useless pallocs, and also to not bother computing byval/typlen when they aren't used. I'm unsure why I didn't see this in my Valgrind testing back in May. This code hasn't changed since then, but maybe we added a regression test that reaches this edge case. Or possibly I just failed to notice the reports, which do say "0 bytes lost". Author: Tom Lane Discussion: https://postgr.es/m/285483.1746756246@sss.pgh.pa.us --- src/backend/partitioning/partbounds.c | 57 ++++++++++++++------------- 1 file changed, 29 insertions(+), 28 deletions(-) diff --git a/src/backend/partitioning/partbounds.c b/src/backend/partitioning/partbounds.c index 4bdc2941efb21..822cf4ec451a4 100644 --- a/src/backend/partitioning/partbounds.c +++ b/src/backend/partitioning/partbounds.c @@ -1007,9 +1007,6 @@ partition_bounds_copy(PartitionBoundInfo src, int ndatums; int nindexes; int partnatts; - bool hash_part; - int natts; - Datum *boundDatums; dest = (PartitionBoundInfo) palloc(sizeof(PartitionBoundInfoData)); @@ -1023,7 +1020,7 @@ partition_bounds_copy(PartitionBoundInfo src, dest->datums = (Datum **) palloc(sizeof(Datum *) * ndatums); - if (src->kind != NULL) + if (src->kind != NULL && ndatums > 0) { PartitionRangeDatumKind *boundKinds; @@ -1058,36 +1055,40 @@ partition_bounds_copy(PartitionBoundInfo src, * For hash partitioning, datums array will have two elements - modulus * and remainder. */ - hash_part = (key->strategy == PARTITION_STRATEGY_HASH); - natts = hash_part ? 2 : partnatts; - boundDatums = palloc(ndatums * natts * sizeof(Datum)); - - for (i = 0; i < ndatums; i++) + if (ndatums > 0) { - int j; - - dest->datums[i] = &boundDatums[i * natts]; + bool hash_part = (key->strategy == PARTITION_STRATEGY_HASH); + int natts = hash_part ? 2 : partnatts; + Datum *boundDatums = palloc(ndatums * natts * sizeof(Datum)); - for (j = 0; j < natts; j++) + for (i = 0; i < ndatums; i++) { - bool byval; - int typlen; + int j; - if (hash_part) - { - typlen = sizeof(int32); /* Always int4 */ - byval = true; /* int4 is pass-by-value */ - } - else + dest->datums[i] = &boundDatums[i * natts]; + + for (j = 0; j < natts; j++) { - byval = key->parttypbyval[j]; - typlen = key->parttyplen[j]; - } + if (dest->kind == NULL || + dest->kind[i][j] == PARTITION_RANGE_DATUM_VALUE) + { + bool byval; + int typlen; - if (dest->kind == NULL || - dest->kind[i][j] == PARTITION_RANGE_DATUM_VALUE) - dest->datums[i][j] = datumCopy(src->datums[i][j], - byval, typlen); + if (hash_part) + { + typlen = sizeof(int32); /* Always int4 */ + byval = true; /* int4 is pass-by-value */ + } + else + { + byval = key->parttypbyval[j]; + typlen = key->parttyplen[j]; + } + dest->datums[i][j] = datumCopy(src->datums[i][j], + byval, typlen); + } + } } } From 5c8eda1f72a2b0a8c48ada9b872eb5ef581f7c81 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sun, 3 Aug 2025 13:01:17 -0400 Subject: [PATCH 425/602] Take a little more care in set_backtrace(). Coverity complained that the "errtrace" string is leaked if we return early because backtrace_symbols fails. Another criticism that could be leveled at this is that not providing any hint of what happened is user-unfriendly. Fix that. The odds of a leak here are small, and typically it wouldn't matter anyway since the leak will be in ErrorContext which will soon get reset. So I'm not feeling a need to back-patch. --- src/backend/utils/error/elog.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/backend/utils/error/elog.c b/src/backend/utils/error/elog.c index 47af743990fe9..afce1a8e1f003 100644 --- a/src/backend/utils/error/elog.c +++ b/src/backend/utils/error/elog.c @@ -1128,12 +1128,15 @@ set_backtrace(ErrorData *edata, int num_skip) nframes = backtrace(buf, lengthof(buf)); strfrms = backtrace_symbols(buf, nframes); - if (strfrms == NULL) - return; - - for (int i = num_skip; i < nframes; i++) - appendStringInfo(&errtrace, "\n%s", strfrms[i]); - free(strfrms); + if (strfrms != NULL) + { + for (int i = num_skip; i < nframes; i++) + appendStringInfo(&errtrace, "\n%s", strfrms[i]); + free(strfrms); + } + else + appendStringInfoString(&errtrace, + "insufficient memory for backtrace generation"); } #else appendStringInfoString(&errtrace, From fd5a1a0c3e566f7fc860838084466a1c25944281 Mon Sep 17 00:00:00 2001 From: Amit Kapila Date: Mon, 4 Aug 2025 04:02:47 +0000 Subject: [PATCH 426/602] Detect and report update_deleted conflicts. This enhancement builds upon the infrastructure introduced in commit 228c370868, which enables the preservation of deleted tuples and their origin information on the subscriber. This capability is crucial for handling concurrent transactions replicated from remote nodes. The update introduces support for detecting update_deleted conflicts during the application of update operations on the subscriber. When an update operation fails to locate the target row-typically because it has been concurrently deleted-we perform an additional table scan. This scan uses the SnapshotAny mechanism and we do this additional scan only when the retain_dead_tuples option is enabled for the relevant subscription. The goal of this scan is to locate the most recently deleted tuple-matching the old column values from the remote update-that has not yet been removed by VACUUM and is still visible according to our slot (i.e., its deletion is not older than conflict-detection-slot's xmin). If such a tuple is found, the system reports an update_deleted conflict, including the origin and transaction details responsible for the deletion. This provides a groundwork for more robust and accurate conflict resolution process, preventing unexpected behavior by correctly identifying cases where a remote update clashes with a deletion from another origin. Author: Zhijie Hou Reviewed-by: shveta malik Reviewed-by: Nisha Moond Reviewed-by: Dilip Kumar Reviewed-by: Hayato Kuroda Reviewed-by: Amit Kapila Discussion: https://postgr.es/m/OS0PR01MB5716BE80DAEB0EE2A6A5D1F5949D2@OS0PR01MB5716.jpnprd01.prod.outlook.com --- doc/src/sgml/catalogs.sgml | 3 +- doc/src/sgml/logical-replication.sgml | 16 ++ doc/src/sgml/monitoring.sgml | 11 + doc/src/sgml/ref/create_subscription.sgml | 9 +- src/backend/catalog/system_views.sql | 1 + src/backend/executor/execReplication.c | 251 ++++++++++++++++++++- src/backend/replication/logical/conflict.c | 22 ++ src/backend/replication/logical/worker.c | 186 +++++++++++++-- src/backend/utils/adt/pgstatfuncs.c | 14 +- src/include/catalog/catversion.h | 2 +- src/include/catalog/pg_proc.dat | 6 +- src/include/executor/executor.h | 14 +- src/include/replication/conflict.h | 3 + src/include/replication/worker_internal.h | 5 +- src/test/regress/expected/rules.out | 3 +- src/test/subscription/t/035_conflicts.pl | 66 +++++- 16 files changed, 566 insertions(+), 46 deletions(-) diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index 97f547b3cc4b2..da8a78825809f 100644 --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -8087,7 +8087,8 @@ SCRAM-SHA-256$<iteration count>:&l subretaindeadtuples bool - If true, the information (e.g., dead tuples, commit timestamps, and + If true, the detection of is + enabled and the information (e.g., dead tuples, commit timestamps, and origins) on the subscriber that is useful for conflict detection is retained. diff --git a/doc/src/sgml/logical-replication.sgml b/doc/src/sgml/logical-replication.sgml index fcac55aefe665..a0761cfee3f6d 100644 --- a/doc/src/sgml/logical-replication.sgml +++ b/doc/src/sgml/logical-replication.sgml @@ -1804,6 +1804,22 @@ Publications: + + update_deleted + + + The tuple to be updated was concurrently deleted by another origin. The + update will simply be skipped in this scenario. Note that this conflict + can only be detected when + track_commit_timestamp + and retain_dead_tuples + are enabled. Note that if a tuple cannot be found due to the table being + truncated, only a update_missing conflict will + arise. Additionally, if the tuple was deleted by the same origin, an + update_missing conflict will arise. + + + update_missing diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml index 823afe1b30b22..fa78031ccbbf0 100644 --- a/doc/src/sgml/monitoring.sgml +++ b/doc/src/sgml/monitoring.sgml @@ -2223,6 +2223,17 @@ description | Waiting for a newly initialized WAL file to reach durable storage + + + confl_update_deleted bigint + + + Number of times the tuple to be updated was concurrently deleted by + another source during the application of changes. See + for details about this conflict. + + + confl_update_missing bigint diff --git a/doc/src/sgml/ref/create_subscription.sgml b/doc/src/sgml/ref/create_subscription.sgml index b8cd15f32806b..247c5bd260410 100644 --- a/doc/src/sgml/ref/create_subscription.sgml +++ b/doc/src/sgml/ref/create_subscription.sgml @@ -445,10 +445,11 @@ CREATE SUBSCRIPTION subscription_namefalse. - If set to true, a physical replication slot named - pg_conflict_detection will be - created on the subscriber to prevent the conflict information from - being removed. + If set to true, the detection of + is enabled, and a physical + replication slot named pg_conflict_detection + created on the subscriber to prevent the information for detecting + conflicts from being removed. diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index f6eca09ee153a..77c693f630e4b 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -1399,6 +1399,7 @@ CREATE VIEW pg_stat_subscription_stats AS ss.confl_insert_exists, ss.confl_update_origin_differs, ss.confl_update_exists, + ss.confl_update_deleted, ss.confl_update_missing, ss.confl_delete_origin_differs, ss.confl_delete_missing, diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c index f262e7a66f771..68184f5d671e2 100644 --- a/src/backend/executor/execReplication.c +++ b/src/backend/executor/execReplication.c @@ -14,12 +14,14 @@ #include "postgres.h" +#include "access/commit_ts.h" #include "access/genam.h" #include "access/gist.h" #include "access/relscan.h" #include "access/tableam.h" #include "access/transam.h" #include "access/xact.h" +#include "access/heapam.h" #include "catalog/pg_am_d.h" #include "commands/trigger.h" #include "executor/executor.h" @@ -36,7 +38,7 @@ static bool tuples_equal(TupleTableSlot *slot1, TupleTableSlot *slot2, - TypeCacheEntry **eq); + TypeCacheEntry **eq, Bitmapset *columns); /* * Setup a ScanKey for a search in the relation 'rel' for a tuple 'key' that @@ -221,7 +223,7 @@ RelationFindReplTupleByIndex(Relation rel, Oid idxoid, if (eq == NULL) eq = palloc0(sizeof(*eq) * outslot->tts_tupleDescriptor->natts); - if (!tuples_equal(outslot, searchslot, eq)) + if (!tuples_equal(outslot, searchslot, eq, NULL)) continue; } @@ -277,10 +279,13 @@ RelationFindReplTupleByIndex(Relation rel, Oid idxoid, /* * Compare the tuples in the slots by checking if they have equal values. + * + * If 'columns' is not null, only the columns specified within it will be + * considered for the equality check, ignoring all other columns. */ static bool tuples_equal(TupleTableSlot *slot1, TupleTableSlot *slot2, - TypeCacheEntry **eq) + TypeCacheEntry **eq, Bitmapset *columns) { int attrnum; @@ -305,6 +310,14 @@ tuples_equal(TupleTableSlot *slot1, TupleTableSlot *slot2, if (att->attisdropped || att->attgenerated) continue; + /* + * Ignore columns that are not listed for checking. + */ + if (columns && + !bms_is_member(att->attnum - FirstLowInvalidHeapAttributeNumber, + columns)) + continue; + /* * If one value is NULL and other is not, then they are certainly not * equal @@ -380,7 +393,7 @@ RelationFindReplTupleSeq(Relation rel, LockTupleMode lockmode, /* Try to find the tuple */ while (table_scan_getnextslot(scan, ForwardScanDirection, scanslot)) { - if (!tuples_equal(scanslot, searchslot, eq)) + if (!tuples_equal(scanslot, searchslot, eq, NULL)) continue; found = true; @@ -455,6 +468,236 @@ BuildConflictIndexInfo(ResultRelInfo *resultRelInfo, Oid conflictindex) } } +/* + * If the tuple is recently dead and was deleted by a transaction with a newer + * commit timestamp than previously recorded, update the associated transaction + * ID, commit time, and origin. This helps ensure that conflict detection uses + * the most recent and relevant deletion metadata. + */ +static void +update_most_recent_deletion_info(TupleTableSlot *scanslot, + TransactionId oldestxmin, + TransactionId *delete_xid, + TimestampTz *delete_time, + RepOriginId *delete_origin) +{ + BufferHeapTupleTableSlot *hslot; + HeapTuple tuple; + Buffer buf; + bool recently_dead = false; + TransactionId xmax; + TimestampTz localts; + RepOriginId localorigin; + + hslot = (BufferHeapTupleTableSlot *) scanslot; + + tuple = ExecFetchSlotHeapTuple(scanslot, false, NULL); + buf = hslot->buffer; + + LockBuffer(buf, BUFFER_LOCK_SHARE); + + /* + * We do not consider HEAPTUPLE_DEAD status because it indicates either + * tuples whose inserting transaction was aborted (meaning there is no + * commit timestamp or origin), or tuples deleted by a transaction older + * than oldestxmin, making it safe to ignore them during conflict + * detection (See comments atop worker.c for details). + */ + if (HeapTupleSatisfiesVacuum(tuple, oldestxmin, buf) == HEAPTUPLE_RECENTLY_DEAD) + recently_dead = true; + + LockBuffer(buf, BUFFER_LOCK_UNLOCK); + + if (!recently_dead) + return; + + xmax = HeapTupleHeaderGetUpdateXid(tuple->t_data); + if (!TransactionIdIsValid(xmax)) + return; + + /* Select the dead tuple with the most recent commit timestamp */ + if (TransactionIdGetCommitTsData(xmax, &localts, &localorigin) && + TimestampDifferenceExceeds(*delete_time, localts, 0)) + { + *delete_xid = xmax; + *delete_time = localts; + *delete_origin = localorigin; + } +} + +/* + * Searches the relation 'rel' for the most recently deleted tuple that matches + * the values in 'searchslot' and is not yet removable by VACUUM. The function + * returns the transaction ID, origin, and commit timestamp of the transaction + * that deleted this tuple. + * + * 'oldestxmin' acts as a cutoff transaction ID. Tuples deleted by transactions + * with IDs >= 'oldestxmin' are considered recently dead and are eligible for + * conflict detection. + * + * Instead of stopping at the first match, we scan all matching dead tuples to + * identify most recent deletion. This is crucial because only the latest + * deletion is relevant for resolving conflicts. + * + * For example, consider a scenario on the subscriber where a row is deleted, + * re-inserted, and then deleted again only on the subscriber: + * + * - (pk, 1) - deleted at 9:00, + * - (pk, 1) - deleted at 9:02, + * + * Now, a remote update arrives: (pk, 1) -> (pk, 2), timestamped at 9:01. + * + * If we mistakenly return the older deletion (9:00), the system may wrongly + * apply the remote update using a last-update-wins strategy. Instead, we must + * recognize the more recent deletion at 9:02 and skip the update. See + * comments atop worker.c for details. Note, as of now, conflict resolution + * is not implemented. Consequently, the system may incorrectly report the + * older tuple as the conflicted one, leading to misleading results. + * + * The commit timestamp of the deleting transaction is used to determine which + * tuple was deleted most recently. + */ +bool +RelationFindDeletedTupleInfoSeq(Relation rel, TupleTableSlot *searchslot, + TransactionId oldestxmin, + TransactionId *delete_xid, + RepOriginId *delete_origin, + TimestampTz *delete_time) +{ + TupleTableSlot *scanslot; + TableScanDesc scan; + TypeCacheEntry **eq; + Bitmapset *indexbitmap; + TupleDesc desc PG_USED_FOR_ASSERTS_ONLY = RelationGetDescr(rel); + + Assert(equalTupleDescs(desc, searchslot->tts_tupleDescriptor)); + + *delete_xid = InvalidTransactionId; + *delete_origin = InvalidRepOriginId; + *delete_time = 0; + + /* + * If the relation has a replica identity key or a primary key that is + * unusable for locating deleted tuples (see + * IsIndexUsableForFindingDeletedTuple), a full table scan becomes + * necessary. In such cases, comparing the entire tuple is not required, + * since the remote tuple might not include all column values. Instead, + * the indexed columns alone are suffcient to identify the target tuple + * (see logicalrep_rel_mark_updatable). + */ + indexbitmap = RelationGetIndexAttrBitmap(rel, + INDEX_ATTR_BITMAP_IDENTITY_KEY); + + /* fallback to PK if no replica identity */ + if (!indexbitmap) + indexbitmap = RelationGetIndexAttrBitmap(rel, + INDEX_ATTR_BITMAP_PRIMARY_KEY); + + eq = palloc0(sizeof(*eq) * searchslot->tts_tupleDescriptor->natts); + + /* + * Start a heap scan using SnapshotAny to identify dead tuples that are + * not visible under a standard MVCC snapshot. Tuples from transactions + * not yet committed or those just committed prior to the scan are + * excluded in update_most_recent_deletion_info(). + */ + scan = table_beginscan(rel, SnapshotAny, 0, NULL); + scanslot = table_slot_create(rel, NULL); + + table_rescan(scan, NULL); + + /* Try to find the tuple */ + while (table_scan_getnextslot(scan, ForwardScanDirection, scanslot)) + { + if (!tuples_equal(scanslot, searchslot, eq, indexbitmap)) + continue; + + update_most_recent_deletion_info(scanslot, oldestxmin, delete_xid, + delete_time, delete_origin); + } + + table_endscan(scan); + ExecDropSingleTupleTableSlot(scanslot); + + return *delete_time != 0; +} + +/* + * Similar to RelationFindDeletedTupleInfoSeq() but using index scan to locate + * the deleted tuple. + */ +bool +RelationFindDeletedTupleInfoByIndex(Relation rel, Oid idxoid, + TupleTableSlot *searchslot, + TransactionId oldestxmin, + TransactionId *delete_xid, + RepOriginId *delete_origin, + TimestampTz *delete_time) +{ + Relation idxrel; + ScanKeyData skey[INDEX_MAX_KEYS]; + int skey_attoff; + IndexScanDesc scan; + TupleTableSlot *scanslot; + TypeCacheEntry **eq = NULL; + bool isIdxSafeToSkipDuplicates; + TupleDesc desc PG_USED_FOR_ASSERTS_ONLY = RelationGetDescr(rel); + + Assert(equalTupleDescs(desc, searchslot->tts_tupleDescriptor)); + Assert(OidIsValid(idxoid)); + + *delete_xid = InvalidTransactionId; + *delete_time = 0; + *delete_origin = InvalidRepOriginId; + + isIdxSafeToSkipDuplicates = (GetRelationIdentityOrPK(rel) == idxoid); + + scanslot = table_slot_create(rel, NULL); + + idxrel = index_open(idxoid, RowExclusiveLock); + + /* Build scan key. */ + skey_attoff = build_replindex_scan_key(skey, rel, idxrel, searchslot); + + /* + * Start an index scan using SnapshotAny to identify dead tuples that are + * not visible under a standard MVCC snapshot. Tuples from transactions + * not yet committed or those just committed prior to the scan are + * excluded in update_most_recent_deletion_info(). + */ + scan = index_beginscan(rel, idxrel, SnapshotAny, NULL, skey_attoff, 0); + + index_rescan(scan, skey, skey_attoff, NULL, 0); + + /* Try to find the tuple */ + while (index_getnext_slot(scan, ForwardScanDirection, scanslot)) + { + /* + * Avoid expensive equality check if the index is primary key or + * replica identity index. + */ + if (!isIdxSafeToSkipDuplicates) + { + if (eq == NULL) + eq = palloc0(sizeof(*eq) * scanslot->tts_tupleDescriptor->natts); + + if (!tuples_equal(scanslot, searchslot, eq, NULL)) + continue; + } + + update_most_recent_deletion_info(scanslot, oldestxmin, delete_xid, + delete_time, delete_origin); + } + + index_endscan(scan); + + index_close(idxrel, NoLock); + + ExecDropSingleTupleTableSlot(scanslot); + + return *delete_time != 0; +} + /* * Find the tuple that violates the passed unique index (conflictindex). * diff --git a/src/backend/replication/logical/conflict.c b/src/backend/replication/logical/conflict.c index 97c4e26b58654..2fd3e8bbda50b 100644 --- a/src/backend/replication/logical/conflict.c +++ b/src/backend/replication/logical/conflict.c @@ -29,6 +29,7 @@ static const char *const ConflictTypeNames[] = { [CT_UPDATE_EXISTS] = "update_exists", [CT_UPDATE_MISSING] = "update_missing", [CT_DELETE_ORIGIN_DIFFERS] = "delete_origin_differs", + [CT_UPDATE_DELETED] = "update_deleted", [CT_DELETE_MISSING] = "delete_missing", [CT_MULTIPLE_UNIQUE_CONFLICTS] = "multiple_unique_conflicts" }; @@ -176,6 +177,7 @@ errcode_apply_conflict(ConflictType type) case CT_UPDATE_ORIGIN_DIFFERS: case CT_UPDATE_MISSING: case CT_DELETE_ORIGIN_DIFFERS: + case CT_UPDATE_DELETED: case CT_DELETE_MISSING: return errcode(ERRCODE_T_R_SERIALIZATION_FAILURE); } @@ -261,6 +263,26 @@ errdetail_apply_conflict(EState *estate, ResultRelInfo *relinfo, break; + case CT_UPDATE_DELETED: + if (localts) + { + if (localorigin == InvalidRepOriginId) + appendStringInfo(&err_detail, _("The row to be updated was deleted locally in transaction %u at %s."), + localxmin, timestamptz_to_str(localts)); + else if (replorigin_by_oid(localorigin, true, &origin_name)) + appendStringInfo(&err_detail, _("The row to be updated was deleted by a different origin \"%s\" in transaction %u at %s."), + origin_name, localxmin, timestamptz_to_str(localts)); + + /* The origin that modified this row has been removed. */ + else + appendStringInfo(&err_detail, _("The row to be updated was deleted by a non-existent origin in transaction %u at %s."), + localxmin, timestamptz_to_str(localts)); + } + else + appendStringInfo(&err_detail, _("The row to be updated was deleted.")); + + break; + case CT_UPDATE_MISSING: appendStringInfoString(&err_detail, _("Could not find the row to be updated.")); break; diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c index b59221c4d0636..89e241c839280 100644 --- a/src/backend/replication/logical/worker.c +++ b/src/backend/replication/logical/worker.c @@ -138,9 +138,9 @@ * Each apply worker that enabled retain_dead_tuples option maintains a * non-removable transaction ID (oldest_nonremovable_xid) in shared memory to * prevent dead rows from being removed prematurely when the apply worker still - * needs them to detect conflicts reliably. This helps to retain the required - * commit_ts module information, which further helps to detect - * update_origin_differs and delete_origin_differs conflicts reliably, as + * needs them to detect update_deleted conflicts. Additionally, this helps to + * retain the required commit_ts module information, which further helps to + * detect update_origin_differs and delete_origin_differs conflicts reliably, as * otherwise, vacuum freeze could remove the required information. * * The logical replication launcher manages an internal replication slot named @@ -185,10 +185,10 @@ * transactions that occurred concurrently with the tuple DELETE, any * subsequent UPDATE from a remote node should have a later timestamp. In such * cases, it is acceptable to detect an update_missing scenario and convert the - * UPDATE to an INSERT when applying it. But, detecting concurrent remote - * transactions with earlier timestamps than the DELETE is necessary, as the - * UPDATEs in remote transactions should be ignored if their timestamp is - * earlier than that of the dead tuples. + * UPDATE to an INSERT when applying it. But, for concurrent remote + * transactions with earlier timestamps than the DELETE, detecting + * update_deleted is necessary, as the UPDATEs in remote transactions should be + * ignored if their timestamp is earlier than that of the dead tuples. * * Note that advancing the non-removable transaction ID is not supported if the * publisher is also a physical standby. This is because the logical walsender @@ -576,6 +576,12 @@ static bool FindReplTupleInLocalRel(ApplyExecutionData *edata, Relation localrel Oid localidxoid, TupleTableSlot *remoteslot, TupleTableSlot **localslot); +static bool FindDeletedTupleInLocalRel(Relation localrel, + Oid localidxoid, + TupleTableSlot *remoteslot, + TransactionId *delete_xid, + RepOriginId *delete_origin, + TimestampTz *delete_time); static void apply_handle_tuple_routing(ApplyExecutionData *edata, TupleTableSlot *remoteslot, LogicalRepTupleData *newtup, @@ -2912,17 +2918,31 @@ apply_handle_update_internal(ApplyExecutionData *edata, } else { + ConflictType type; TupleTableSlot *newslot = localslot; + /* + * Detecting whether the tuple was recently deleted or never existed + * is crucial to avoid misleading the user during confict handling. + */ + if (FindDeletedTupleInLocalRel(localrel, localindexoid, remoteslot, + &conflicttuple.xmin, + &conflicttuple.origin, + &conflicttuple.ts) && + conflicttuple.origin != replorigin_session_origin) + type = CT_UPDATE_DELETED; + else + type = CT_UPDATE_MISSING; + /* Store the new tuple for conflict reporting */ slot_store_data(newslot, relmapentry, newtup); /* - * The tuple to be updated could not be found. Do nothing except for - * emitting a log message. + * The tuple to be updated could not be found or was deleted. Do + * nothing except for emitting a log message. */ - ReportApplyConflict(estate, relinfo, LOG, CT_UPDATE_MISSING, - remoteslot, newslot, list_make1(&conflicttuple)); + ReportApplyConflict(estate, relinfo, LOG, type, remoteslot, newslot, + list_make1(&conflicttuple)); } /* Cleanup. */ @@ -3142,6 +3162,112 @@ FindReplTupleInLocalRel(ApplyExecutionData *edata, Relation localrel, return found; } +/* + * Determine whether the index can reliably locate the deleted tuple in the + * local relation. + * + * An index may exclude deleted tuples if it was re-indexed or re-created during + * change application. Therefore, an index is considered usable only if the + * conflict detection slot.xmin (conflict_detection_xmin) is greater than the + * index tuple's xmin. This ensures that any tuples deleted prior to the index + * creation or re-indexing are not relevant for conflict detection in the + * current apply worker. + * + * Note that indexes may also be excluded if they were modified by other DDL + * operations, such as ALTER INDEX. However, this is acceptable, as the + * likelihood of such DDL changes coinciding with the need to scan dead + * tuples for the update_deleted is low. + */ +static bool +IsIndexUsableForFindingDeletedTuple(Oid localindexoid, + TransactionId conflict_detection_xmin) +{ + HeapTuple index_tuple; + TransactionId index_xmin; + + index_tuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(localindexoid)); + + if (!HeapTupleIsValid(index_tuple)) /* should not happen */ + elog(ERROR, "cache lookup failed for index %u", localindexoid); + + /* + * No need to check for a frozen transaction ID, as + * TransactionIdPrecedes() manages it internally, treating it as falling + * behind the conflict_detection_xmin. + */ + index_xmin = HeapTupleHeaderGetXmin(index_tuple->t_data); + + ReleaseSysCache(index_tuple); + + return TransactionIdPrecedes(index_xmin, conflict_detection_xmin); +} + +/* + * Attempts to locate a deleted tuple in the local relation that matches the + * values of the tuple received from the publication side (in 'remoteslot'). + * The search is performed using either the replica identity index, primary + * key, other available index, or a sequential scan if necessary. + * + * Returns true if the deleted tuple is found. If found, the transaction ID, + * origin, and commit timestamp of the deletion are stored in '*delete_xid', + * '*delete_origin', and '*delete_time' respectively. + */ +static bool +FindDeletedTupleInLocalRel(Relation localrel, Oid localidxoid, + TupleTableSlot *remoteslot, + TransactionId *delete_xid, RepOriginId *delete_origin, + TimestampTz *delete_time) +{ + TransactionId oldestxmin; + ReplicationSlot *slot; + + /* + * Return false if either dead tuples are not retained or commit timestamp + * data is not available. + */ + if (!MySubscription->retaindeadtuples || !track_commit_timestamp) + return false; + + /* + * For conflict detection, we use the conflict slot's xmin value instead + * of invoking GetOldestNonRemovableTransactionId(). The slot.xmin acts as + * a threshold to identify tuples that were recently deleted. These tuples + * are not visible to concurrent transactions, but we log an + * update_deleted conflict if such a tuple matches the remote update being + * applied. + * + * Although GetOldestNonRemovableTransactionId() can return a value older + * than the slot's xmin, for our current purpose it is acceptable to treat + * tuples deleted by transactions prior to slot.xmin as update_missing + * conflicts. + * + * Ideally, we would use oldest_nonremovable_xid, which is directly + * maintained by the leader apply worker. However, this value is not + * available to table synchronization or parallel apply workers, making + * slot.xmin a practical alternative in those contexts. + */ + slot = SearchNamedReplicationSlot(CONFLICT_DETECTION_SLOT, true); + + Assert(slot); + + SpinLockAcquire(&slot->mutex); + oldestxmin = slot->data.xmin; + SpinLockRelease(&slot->mutex); + + Assert(TransactionIdIsValid(oldestxmin)); + + if (OidIsValid(localidxoid) && + IsIndexUsableForFindingDeletedTuple(localidxoid, oldestxmin)) + return RelationFindDeletedTupleInfoByIndex(localrel, localidxoid, + remoteslot, oldestxmin, + delete_xid, delete_origin, + delete_time); + else + return RelationFindDeletedTupleInfoSeq(localrel, remoteslot, + oldestxmin, delete_xid, + delete_origin, delete_time); +} + /* * This handles insert, update, delete on a partitioned table. */ @@ -3260,18 +3386,35 @@ apply_handle_tuple_routing(ApplyExecutionData *edata, remoteslot_part, &localslot); if (!found) { + ConflictType type; TupleTableSlot *newslot = localslot; + /* + * Detecting whether the tuple was recently deleted or + * never existed is crucial to avoid misleading the user + * during confict handling. + */ + if (FindDeletedTupleInLocalRel(partrel, + part_entry->localindexoid, + remoteslot_part, + &conflicttuple.xmin, + &conflicttuple.origin, + &conflicttuple.ts) && + conflicttuple.origin != replorigin_session_origin) + type = CT_UPDATE_DELETED; + else + type = CT_UPDATE_MISSING; + /* Store the new tuple for conflict reporting */ slot_store_data(newslot, part_entry, newtup); /* - * The tuple to be updated could not be found. Do nothing - * except for emitting a log message. + * The tuple to be updated could not be found or was + * deleted. Do nothing except for emitting a log message. */ ReportApplyConflict(estate, partrelinfo, LOG, - CT_UPDATE_MISSING, remoteslot_part, - newslot, list_make1(&conflicttuple)); + type, remoteslot_part, newslot, + list_make1(&conflicttuple)); return; } @@ -4172,8 +4315,8 @@ can_advance_nonremovable_xid(RetainDeadTuplesData *rdt_data) { /* * It is sufficient to manage non-removable transaction ID for a - * subscription by the main apply worker to detect conflicts reliably even - * for table sync or parallel apply workers. + * subscription by the main apply worker to detect update_deleted reliably + * even for table sync or parallel apply workers. */ if (!am_leader_apply_worker()) return false; @@ -4374,10 +4517,11 @@ wait_for_local_flush(RetainDeadTuplesData *rdt_data) * We expect the publisher and subscriber clocks to be in sync using time * sync service like NTP. Otherwise, we will advance this worker's * oldest_nonremovable_xid prematurely, leading to the removal of rows - * required to detect conflicts reliably. This check primarily addresses - * scenarios where the publisher's clock falls behind; if the publisher's - * clock is ahead, subsequent transactions will naturally bear later - * commit timestamps, conforming to the design outlined atop worker.c. + * required to detect update_deleted reliably. This check primarily + * addresses scenarios where the publisher's clock falls behind; if the + * publisher's clock is ahead, subsequent transactions will naturally bear + * later commit timestamps, conforming to the design outlined atop + * worker.c. * * XXX Consider waiting for the publisher's clock to catch up with the * subscriber's before proceeding to the next phase. diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c index 1c12ddbae493c..c756c2bebaaa0 100644 --- a/src/backend/utils/adt/pgstatfuncs.c +++ b/src/backend/utils/adt/pgstatfuncs.c @@ -2171,7 +2171,7 @@ pg_stat_get_replication_slot(PG_FUNCTION_ARGS) Datum pg_stat_get_subscription_stats(PG_FUNCTION_ARGS) { -#define PG_STAT_GET_SUBSCRIPTION_STATS_COLS 11 +#define PG_STAT_GET_SUBSCRIPTION_STATS_COLS 12 Oid subid = PG_GETARG_OID(0); TupleDesc tupdesc; Datum values[PG_STAT_GET_SUBSCRIPTION_STATS_COLS] = {0}; @@ -2197,15 +2197,17 @@ pg_stat_get_subscription_stats(PG_FUNCTION_ARGS) INT8OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 6, "confl_update_exists", INT8OID, -1, 0); - TupleDescInitEntry(tupdesc, (AttrNumber) 7, "confl_update_missing", + TupleDescInitEntry(tupdesc, (AttrNumber) 7, "confl_update_deleted", INT8OID, -1, 0); - TupleDescInitEntry(tupdesc, (AttrNumber) 8, "confl_delete_origin_differs", + TupleDescInitEntry(tupdesc, (AttrNumber) 8, "confl_update_missing", INT8OID, -1, 0); - TupleDescInitEntry(tupdesc, (AttrNumber) 9, "confl_delete_missing", + TupleDescInitEntry(tupdesc, (AttrNumber) 9, "confl_delete_origin_differs", INT8OID, -1, 0); - TupleDescInitEntry(tupdesc, (AttrNumber) 10, "confl_multiple_unique_conflicts", + TupleDescInitEntry(tupdesc, (AttrNumber) 10, "confl_delete_missing", INT8OID, -1, 0); - TupleDescInitEntry(tupdesc, (AttrNumber) 11, "stats_reset", + TupleDescInitEntry(tupdesc, (AttrNumber) 11, "confl_multiple_unique_conflicts", + INT8OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 12, "stats_reset", TIMESTAMPTZOID, -1, 0); BlessTupleDesc(tupdesc); diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index 5173d422d468a..750a9d8a09b25 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -57,6 +57,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 202507231 +#define CATALOG_VERSION_NO 202508041 #endif diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 3ee8fed7e537f..118d6da1ace0e 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -5688,9 +5688,9 @@ { oid => '6231', descr => 'statistics: information about subscription stats', proname => 'pg_stat_get_subscription_stats', provolatile => 's', proparallel => 'r', prorettype => 'record', proargtypes => 'oid', - proallargtypes => '{oid,oid,int8,int8,int8,int8,int8,int8,int8,int8,int8,timestamptz}', - proargmodes => '{i,o,o,o,o,o,o,o,o,o,o,o}', - proargnames => '{subid,subid,apply_error_count,sync_error_count,confl_insert_exists,confl_update_origin_differs,confl_update_exists,confl_update_missing,confl_delete_origin_differs,confl_delete_missing,confl_multiple_unique_conflicts,stats_reset}', + proallargtypes => '{oid,oid,int8,int8,int8,int8,int8,int8,int8,int8,int8,int8,timestamptz}', + proargmodes => '{i,o,o,o,o,o,o,o,o,o,o,o,o}', + proargnames => '{subid,subid,apply_error_count,sync_error_count,confl_insert_exists,confl_update_origin_differs,confl_update_exists,confl_update_deleted,confl_update_missing,confl_delete_origin_differs,confl_delete_missing,confl_multiple_unique_conflicts,stats_reset}', prosrc => 'pg_stat_get_subscription_stats' }, { oid => '6118', descr => 'statistics: information about subscription', proname => 'pg_stat_get_subscription', prorows => '10', proisstrict => 'f', diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h index 104b059544dd3..a71502efeed75 100644 --- a/src/include/executor/executor.h +++ b/src/include/executor/executor.h @@ -14,6 +14,7 @@ #ifndef EXECUTOR_H #define EXECUTOR_H +#include "datatype/timestamp.h" #include "executor/execdesc.h" #include "fmgr.h" #include "nodes/lockoptions.h" @@ -759,7 +760,18 @@ extern bool RelationFindReplTupleByIndex(Relation rel, Oid idxoid, TupleTableSlot *outslot); extern bool RelationFindReplTupleSeq(Relation rel, LockTupleMode lockmode, TupleTableSlot *searchslot, TupleTableSlot *outslot); - +extern bool RelationFindDeletedTupleInfoSeq(Relation rel, + TupleTableSlot *searchslot, + TransactionId oldestxmin, + TransactionId *delete_xid, + RepOriginId *delete_origin, + TimestampTz *delete_time); +extern bool RelationFindDeletedTupleInfoByIndex(Relation rel, Oid idxoid, + TupleTableSlot *searchslot, + TransactionId oldestxmin, + TransactionId *delete_xid, + RepOriginId *delete_origin, + TimestampTz *delete_time); extern void ExecSimpleRelationInsert(ResultRelInfo *resultRelInfo, EState *estate, TupleTableSlot *slot); extern void ExecSimpleRelationUpdate(ResultRelInfo *resultRelInfo, diff --git a/src/include/replication/conflict.h b/src/include/replication/conflict.h index 6c59125f25657..ff3cb8416ecff 100644 --- a/src/include/replication/conflict.h +++ b/src/include/replication/conflict.h @@ -32,6 +32,9 @@ typedef enum /* The updated row value violates unique constraint */ CT_UPDATE_EXISTS, + /* The row to be updated was concurrently deleted by a different origin */ + CT_UPDATE_DELETED, + /* The row to be updated is missing */ CT_UPDATE_MISSING, diff --git a/src/include/replication/worker_internal.h b/src/include/replication/worker_internal.h index 0c7b8440a61e3..7c0204dd6f4ce 100644 --- a/src/include/replication/worker_internal.h +++ b/src/include/replication/worker_internal.h @@ -87,8 +87,9 @@ typedef struct LogicalRepWorker bool parallel_apply; /* - * The changes made by this and later transactions must be retained to - * ensure reliable conflict detection during the apply phase. + * Changes made by this transaction and subsequent ones must be preserved. + * This ensures that update_deleted conflicts can be accurately detected + * during the apply phase of logical replication by this worker. * * The logical replication launcher manages an internal replication slot * named "pg_conflict_detection". It asynchronously collects this ID to diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out index dce8c672b40fe..6509fda77a994 100644 --- a/src/test/regress/expected/rules.out +++ b/src/test/regress/expected/rules.out @@ -2179,13 +2179,14 @@ pg_stat_subscription_stats| SELECT ss.subid, ss.confl_insert_exists, ss.confl_update_origin_differs, ss.confl_update_exists, + ss.confl_update_deleted, ss.confl_update_missing, ss.confl_delete_origin_differs, ss.confl_delete_missing, ss.confl_multiple_unique_conflicts, ss.stats_reset FROM pg_subscription s, - LATERAL pg_stat_get_subscription_stats(s.oid) ss(subid, apply_error_count, sync_error_count, confl_insert_exists, confl_update_origin_differs, confl_update_exists, confl_update_missing, confl_delete_origin_differs, confl_delete_missing, confl_multiple_unique_conflicts, stats_reset); + LATERAL pg_stat_get_subscription_stats(s.oid) ss(subid, apply_error_count, sync_error_count, confl_insert_exists, confl_update_origin_differs, confl_update_exists, confl_update_deleted, confl_update_missing, confl_delete_origin_differs, confl_delete_missing, confl_multiple_unique_conflicts, stats_reset); pg_stat_sys_indexes| SELECT relid, indexrelid, schemaname, diff --git a/src/test/subscription/t/035_conflicts.pl b/src/test/subscription/t/035_conflicts.pl index 976d53a870e5e..36aeb14c563af 100644 --- a/src/test/subscription/t/035_conflicts.pl +++ b/src/test/subscription/t/035_conflicts.pl @@ -150,7 +150,9 @@ # Setup a bidirectional logical replication between node_A & node_B ############################################################################### -# Initialize nodes. +# Initialize nodes. Enable the track_commit_timestamp on both nodes to detect +# the conflict when attempting to update a row that was previously modified by +# a different origin. # node_A. Increase the log_min_messages setting to DEBUG2 to debug test # failures. Disable autovacuum to avoid generating xid that could affect the @@ -158,7 +160,8 @@ my $node_A = $node_publisher; $node_A->append_conf( 'postgresql.conf', - qq{autovacuum = off + qq{track_commit_timestamp = on + autovacuum = off log_min_messages = 'debug2'}); $node_A->restart; @@ -270,6 +273,8 @@ ############################################################################### # Check that dead tuples on node A cannot be cleaned by VACUUM until the # concurrent transactions on Node B have been applied and flushed on Node A. +# Also, check that an update_deleted conflict is detected when updating a row +# that was deleted by a different origin. ############################################################################### # Insert a record @@ -288,6 +293,8 @@ "SELECT count(*) = 0 FROM pg_stat_activity WHERE backend_type = 'logical replication apply worker'" ); +my $log_location = -s $node_B->logfile; + $node_B->safe_psql('postgres', "UPDATE tab SET b = 3 WHERE a = 1;"); $node_A->safe_psql('postgres', "DELETE FROM tab WHERE a = 1;"); @@ -299,10 +306,30 @@ qr/1 are dead but not yet removable/, 'the deleted column is non-removable'); +# Ensure the DELETE is replayed on Node B +$node_A->wait_for_catchup($subname_BA); + +# Check the conflict detected on Node B +my $logfile = slurp_file($node_B->logfile(), $log_location); +ok( $logfile =~ + qr/conflict detected on relation "public.tab": conflict=delete_origin_differs.* +.*DETAIL:.* Deleting the row that was modified locally in transaction [0-9]+ at .* +.*Existing local tuple \(1, 3\); replica identity \(a\)=\(1\)/, + 'delete target row was modified in tab'); + +$log_location = -s $node_A->logfile; + $node_A->safe_psql( 'postgres', "ALTER SUBSCRIPTION $subname_AB ENABLE;"); $node_B->wait_for_catchup($subname_AB); +$logfile = slurp_file($node_A->logfile(), $log_location); +ok( $logfile =~ + qr/conflict detected on relation "public.tab": conflict=update_deleted.* +.*DETAIL:.* The row to be updated was deleted locally in transaction [0-9]+ at .* +.*Remote tuple \(1, 3\); replica identity \(a\)=\(1\)/, + 'update target row was deleted in tab'); + # Remember the next transaction ID to be assigned my $next_xid = $node_A->safe_psql('postgres', "SELECT txid_current() + 1;"); @@ -324,6 +351,41 @@ qr/1 removed, 1 remain, 0 are dead but not yet removable/, 'the deleted column is removed'); +############################################################################### +# Ensure that the deleted tuple needed to detect an update_deleted conflict is +# accessible via a sequential table scan. +############################################################################### + +# Drop the primary key from tab on node A and set REPLICA IDENTITY to FULL to +# enforce sequential scanning of the table. +$node_A->safe_psql('postgres', "ALTER TABLE tab REPLICA IDENTITY FULL"); +$node_B->safe_psql('postgres', "ALTER TABLE tab REPLICA IDENTITY FULL"); +$node_A->safe_psql('postgres', "ALTER TABLE tab DROP CONSTRAINT tab_pkey;"); + +# Disable the logical replication from node B to node A +$node_A->safe_psql('postgres', "ALTER SUBSCRIPTION $subname_AB DISABLE"); + +# Wait for the apply worker to stop +$node_A->poll_query_until('postgres', + "SELECT count(*) = 0 FROM pg_stat_activity WHERE backend_type = 'logical replication apply worker'" +); + +$node_B->safe_psql('postgres', "UPDATE tab SET b = 4 WHERE a = 2;"); +$node_A->safe_psql('postgres', "DELETE FROM tab WHERE a = 2;"); + +$log_location = -s $node_A->logfile; + +$node_A->safe_psql( + 'postgres', "ALTER SUBSCRIPTION $subname_AB ENABLE;"); +$node_B->wait_for_catchup($subname_AB); + +$logfile = slurp_file($node_A->logfile(), $log_location); +ok( $logfile =~ + qr/conflict detected on relation "public.tab": conflict=update_deleted.* +.*DETAIL:.* The row to be updated was deleted locally in transaction [0-9]+ at .* +.*Remote tuple \(2, 4\); replica identity full \(2, 2\)/, + 'update target row was deleted in tab'); + ############################################################################### # Check that the replication slot pg_conflict_detection is dropped after # removing all the subscriptions. From bca9a1900c87df86dd10d227910050cf85000c53 Mon Sep 17 00:00:00 2001 From: David Rowley Date: Mon, 4 Aug 2025 17:43:22 +1200 Subject: [PATCH 427/602] Fix incorrect comment regarding mod_since_analyze Author: Yugo Nagata Discussion: https://postgr.es/m/20250804140120.280c2d6a9d2ea687cd167743@sraoss.co.jp --- src/backend/commands/analyze.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index 7111d5d5334f2..40d66537ad730 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -690,8 +690,8 @@ do_analyze_rel(Relation onerel, const VacuumParams params, * only do it for inherited stats. (We're never called for not-inherited * stats on partitioned tables anyway.) * - * Reset the changes_since_analyze counter only if we analyzed all - * columns; otherwise, there is still work for auto-analyze to do. + * Reset the mod_since_analyze counter only if we analyzed all columns; + * otherwise, there is still work for auto-analyze to do. */ if (!inh) pgstat_report_analyze(onerel, totalrows, totaldeadrows, From 126665289fa8e0f7b30165674983f079a5896d91 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Herrera?= Date: Mon, 4 Aug 2025 13:26:45 +0200 Subject: [PATCH 428/602] doc: mention unusability of dropped CHECK to verify NOT NULL It's possible to use a CHECK (col IS NOT NULL) constraint to skip scanning a table for nulls when adding a NOT NULL constraint on the same column. However, if the CHECK constraint is dropped on the same command that the NOT NULL is added, this fails, i.e., makes the NOT NULL addition slow. The best we can do about it at this stage is to document this so that users aren't taken by surprise. (In Postgres 18 you can directly add the NOT NULL constraint as NOT VALID instead, so there's no longer much use for the CHECK constraint, therefore no point in building mechanism to support the case better.) Reported-by: Andrew Reviewed-by: David G. Johnston Discussion: https://postgr.es/m/175385113607.786.16774570234342968908@wrigleys.postgresql.org --- doc/src/sgml/ref/alter_table.sgml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/doc/src/sgml/ref/alter_table.sgml b/doc/src/sgml/ref/alter_table.sgml index 1e4f26c13f650..541e093a519d7 100644 --- a/doc/src/sgml/ref/alter_table.sgml +++ b/doc/src/sgml/ref/alter_table.sgml @@ -240,9 +240,10 @@ WITH ( MODULUS numeric_literal, REM provided none of the records in the table contain a NULL value for the column. Ordinarily this is checked during the ALTER TABLE by scanning the - entire table; however, if a valid CHECK constraint is - found which proves no NULL can exist, then the - table scan is skipped. + entire table, unless NOT VALID is specified; + however, if a valid CHECK constraint exists + (and is not dropped in the same command) which proves no + NULL can exist, then the table scan is skipped. If a column has an invalid not-null constraint, SET NOT NULL validates it. From 4614d53d4ef4d2249df45adedd85da8129feee94 Mon Sep 17 00:00:00 2001 From: Fujii Masao Date: Mon, 4 Aug 2025 20:51:42 +0900 Subject: [PATCH 429/602] Avoid unexpected shutdown when sync_replication_slots is enabled. Previously, enabling sync_replication_slots while wal_level was not set to logical could cause the server to shut down. This was because the postmaster performed a configuration check before launching the slot synchronization worker and raised an ERROR if the settings were incompatible. Since ERROR is treated as FATAL in the postmaster, this resulted in the entire server shutting down unexpectedly. This commit changes the postmaster to log that message with a LOG-level instead of raising an ERROR, allowing the server to continue running even with the misconfiguration. Back-patch to v17, where slot synchronization was introduced. Reported-by: Hugo DUBOIS Author: Fujii Masao Reviewed-by: Hugo DUBOIS Reviewed-by: Shveta Malik Discussion: https://postgr.es/m/CAH0PTU_pc3oHi__XESF9ZigCyzai1Mo3LsOdFyQA4aUDkm01RA@mail.gmail.com Backpatch-through: 17 --- src/backend/replication/logical/slotsync.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/backend/replication/logical/slotsync.c b/src/backend/replication/logical/slotsync.c index 2f0c08b8fbd33..3773844011358 100644 --- a/src/backend/replication/logical/slotsync.c +++ b/src/backend/replication/logical/slotsync.c @@ -1059,14 +1059,14 @@ ValidateSlotSyncParams(int elevel) { /* * Logical slot sync/creation requires wal_level >= logical. - * - * Since altering the wal_level requires a server restart, so error out in - * this case regardless of elevel provided by caller. */ if (wal_level < WAL_LEVEL_LOGICAL) - ereport(ERROR, + { + ereport(elevel, errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("replication slot synchronization requires \"wal_level\" >= \"logical\"")); + return false; + } /* * A physical replication slot(primary_slot_name) is required on the From 07684443b1e03cd56a6a9dee589f5de91e3f9a34 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Herrera?= Date: Mon, 4 Aug 2025 14:03:01 +0200 Subject: [PATCH 430/602] Rename XLogData protocol message to WALData This name is only used as documentation, and using this name is consistent with its byte being a 'w'. Renaming it would also make the use of a symbolic name based on the word "WAL" rather than the obsolete "XLog" term more consistent, per future commits along the lines of 37c7a7eeb6d1, 4a68d5008869, f4b54e1ed985. Discussion: https://postgr.es/m/aIECfYfevCUpenBT@nathan --- doc/src/sgml/protocol.sgml | 8 ++++---- src/bin/pg_basebackup/pg_recvlogical.c | 4 ++-- src/bin/pg_basebackup/receivelog.c | 18 +++++++++--------- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/doc/src/sgml/protocol.sgml b/doc/src/sgml/protocol.sgml index e56eac8fd0fa0..cc5c8dc574ce5 100644 --- a/doc/src/sgml/protocol.sgml +++ b/doc/src/sgml/protocol.sgml @@ -2555,8 +2555,8 @@ psql "dbname=postgres replication=database" -c "IDENTIFY_SYSTEM;" - - XLogData (B) + + WALData (B) @@ -2604,11 +2604,11 @@ psql "dbname=postgres replication=database" -c "IDENTIFY_SYSTEM;" - A single WAL record is never split across two XLogData messages. + A single WAL record is never split across two WALData messages. When a WAL record crosses a WAL page boundary, and is therefore already split using continuation records, it can be split at the page boundary. In other words, the first main WAL record and its - continuation records can be sent in different XLogData messages. + continuation records can be sent in different WALData messages. diff --git a/src/bin/pg_basebackup/pg_recvlogical.c b/src/bin/pg_basebackup/pg_recvlogical.c index 8a5dd24e6c9ad..0e9d2e2394731 100644 --- a/src/bin/pg_basebackup/pg_recvlogical.c +++ b/src/bin/pg_basebackup/pg_recvlogical.c @@ -517,7 +517,7 @@ StreamLogicalLog(void) } /* - * Read the header of the XLogData message, enclosed in the CopyData + * Read the header of the WALData message, enclosed in the CopyData * message. We only need the WAL location field (dataStart), the rest * of the header is ignored. */ @@ -605,7 +605,7 @@ StreamLogicalLog(void) /* * We're doing a client-initiated clean exit and have sent CopyDone to * the server. Drain any messages, so we don't miss a last-minute - * ErrorResponse. The walsender stops generating XLogData records once + * ErrorResponse. The walsender stops generating WALData records once * it sees CopyDone, so expect this to finish quickly. After CopyDone, * it's too late for sendFeedback(), even if this were to take a long * time. Hence, use synchronous-mode PQgetCopyData(). diff --git a/src/bin/pg_basebackup/receivelog.c b/src/bin/pg_basebackup/receivelog.c index d6b7f117fa3bb..f2b54d3c50171 100644 --- a/src/bin/pg_basebackup/receivelog.c +++ b/src/bin/pg_basebackup/receivelog.c @@ -38,8 +38,8 @@ static int CopyStreamReceive(PGconn *conn, long timeout, pgsocket stop_socket, char **buffer); static bool ProcessKeepaliveMsg(PGconn *conn, StreamCtl *stream, char *copybuf, int len, XLogRecPtr blockpos, TimestampTz *last_status); -static bool ProcessXLogDataMsg(PGconn *conn, StreamCtl *stream, char *copybuf, int len, - XLogRecPtr *blockpos); +static bool ProcessWALDataMsg(PGconn *conn, StreamCtl *stream, char *copybuf, int len, + XLogRecPtr *blockpos); static PGresult *HandleEndOfCopyStream(PGconn *conn, StreamCtl *stream, char *copybuf, XLogRecPtr blockpos, XLogRecPtr *stoppos); static bool CheckCopyStreamStop(PGconn *conn, StreamCtl *stream, XLogRecPtr blockpos); @@ -831,7 +831,7 @@ HandleCopyStream(PGconn *conn, StreamCtl *stream, } else if (copybuf[0] == 'w') { - if (!ProcessXLogDataMsg(conn, stream, copybuf, r, &blockpos)) + if (!ProcessWALDataMsg(conn, stream, copybuf, r, &blockpos)) goto error; /* @@ -1041,11 +1041,11 @@ ProcessKeepaliveMsg(PGconn *conn, StreamCtl *stream, char *copybuf, int len, } /* - * Process XLogData message. + * Process WALData message. */ static bool -ProcessXLogDataMsg(PGconn *conn, StreamCtl *stream, char *copybuf, int len, - XLogRecPtr *blockpos) +ProcessWALDataMsg(PGconn *conn, StreamCtl *stream, char *copybuf, int len, + XLogRecPtr *blockpos) { int xlogoff; int bytes_left; @@ -1054,13 +1054,13 @@ ProcessXLogDataMsg(PGconn *conn, StreamCtl *stream, char *copybuf, int len, /* * Once we've decided we don't want to receive any more, just ignore any - * subsequent XLogData messages. + * subsequent WALData messages. */ if (!(still_sending)) return true; /* - * Read the header of the XLogData message, enclosed in the CopyData + * Read the header of the WALData message, enclosed in the CopyData * message. We only need the WAL location field (dataStart), the rest of * the header is ignored. */ @@ -1162,7 +1162,7 @@ ProcessXLogDataMsg(PGconn *conn, StreamCtl *stream, char *copybuf, int len, return false; } still_sending = false; - return true; /* ignore the rest of this XLogData packet */ + return true; /* ignore the rest of this WALData packet */ } } } From 6ae268cf284c5a706455e164f8879bd721296535 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Mon, 4 Aug 2025 09:08:10 +0200 Subject: [PATCH 431/602] Improve prep_buildtree When prep_buildtree is used to prepare a build tree when the source directory already contains another build tree, then it will produce the directory structure of the first build tree in the second one. For example, if there is postgresql/ postgresql/build1/ and a new build tree postgresql/build2/ is prepared, then this will produce postgresql/build2/build1/ because it just copies all subdirectories of the source tree. This is not harmful, but it's pretty stupid and can be confusing, and it slows down prep_buildtree when there are many build trees. When prep_buildtree was first created, it was more common for the build tree to be outside the source tree, in which case this is not a problem. But now with the arrival of meson, it appears to be more common (and also the way it is documented in the PostgreSQL documentation) to have the build tree inside the source tree. (To be clear: This change does not affect meson at all. But it would be an issue for example if you have a meson build tree and a configure build tree under the same source tree.) To fix this, change the "find" command to process only those top-level directories that we know about (namely config, contrib, doc, src). (I alternatively looked for ways to ignore directories that looked like build directories, but that seemed extremely complicated.) With that, we can also remove the code that ignores directories related to source-control management. In passing, also remove the workaround for handling prebuilt docs, since that has been obsolete since commit 54fac0e5050. Reviewed-by: Nazir Bilal Yavuz Discussion: https://www.postgresql.org/message-id/flat/8b96b07f-1f48-46e9-b26e-01b2c9e4ac8d%40eisentraut.org --- config/prep_buildtree | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/config/prep_buildtree b/config/prep_buildtree index a0eabd3dee288..e148535ac112e 100644 --- a/config/prep_buildtree +++ b/config/prep_buildtree @@ -22,18 +22,14 @@ sourcetree=`cd $1 && pwd` buildtree=`cd ${2:-'.'} && pwd` -# We must not auto-create the subdirectories holding built documentation. -# If we did, it would interfere with installation of prebuilt docs from -# the source tree, if a VPATH build is done from a distribution tarball. -# See bug #5595. -for item in `find "$sourcetree" -type d \( \( -name CVS -prune \) -o \( -name .git -prune \) -o -print \) | grep -v "$sourcetree/doc/src/sgml/\+"`; do +for item in `find "$sourcetree"/config "$sourcetree"/contrib "$sourcetree"/doc "$sourcetree"/src -type d -print`; do subdir=`expr "$item" : "$sourcetree\(.*\)"` if test ! -d "$buildtree/$subdir"; then mkdir -p "$buildtree/$subdir" || exit 1 fi done -for item in `find "$sourcetree" -name Makefile -print -o -name GNUmakefile -print | grep -v "$sourcetree/doc/src/sgml/images/"`; do +for item in "$sourcetree"/Makefile `find "$sourcetree"/config "$sourcetree"/contrib "$sourcetree"/doc "$sourcetree"/src -name Makefile -print -o -name GNUmakefile -print`; do filename=`expr "$item" : "$sourcetree\(.*\)"` if test ! -f "${item}.in"; then if cmp "$item" "$buildtree/$filename" >/dev/null 2>&1; then : ; else From 4e23c9ef65accde7eb3e56aa28d50ae5cf79b64b Mon Sep 17 00:00:00 2001 From: Andrew Dunstan Date: Mon, 4 Aug 2025 08:56:48 -0400 Subject: [PATCH 432/602] Split func.sgml into more manageable pieces func.sgml has grown over the years to the point where it is very difficult to manage. This commit splits out each sect1 piece into its own file, which is then included in the main file, so that the built documentation should be identical to the pre-split documentation. All these new files are placed in a new "func" subdirectory, and the previous func.sgml is removed. Done using scripts developed by: Author: jian he Discussion: https://postgr.es/m/CACJufxFgAh1--EMwOjMuANe=VTmjkNaZjH+AzSe04-8ZCGiESA@mail.gmail.com --- doc/src/sgml/filelist.sgml | 5 +- doc/src/sgml/func.sgml | 32075 ------------------- doc/src/sgml/func/allfiles.sgml | 40 + doc/src/sgml/func/func-admin.sgml | 2962 ++ doc/src/sgml/func/func-aggregate.sgml | 1418 + doc/src/sgml/func/func-array.sgml | 646 + doc/src/sgml/func/func-binarystring.sgml | 854 + doc/src/sgml/func/func-bitstring.sgml | 358 + doc/src/sgml/func/func-comparison.sgml | 638 + doc/src/sgml/func/func-comparisons.sgml | 336 + doc/src/sgml/func/func-conditional.sgml | 283 + doc/src/sgml/func/func-datetime.sgml | 2200 ++ doc/src/sgml/func/func-enum.sgml | 121 + doc/src/sgml/func/func-event-triggers.sgml | 332 + doc/src/sgml/func/func-formatting.sgml | 1193 + doc/src/sgml/func/func-geometry.sgml | 1261 + doc/src/sgml/func/func-info.sgml | 3790 +++ doc/src/sgml/func/func-json.sgml | 3945 +++ doc/src/sgml/func/func-logical.sgml | 146 + doc/src/sgml/func/func-matching.sgml | 2471 ++ doc/src/sgml/func/func-math.sgml | 1615 + doc/src/sgml/func/func-merge-support.sgml | 78 + doc/src/sgml/func/func-net.sgml | 592 + doc/src/sgml/func/func-range.sgml | 1053 + doc/src/sgml/func/func-sequence.sgml | 195 + doc/src/sgml/func/func-srf.sgml | 306 + doc/src/sgml/func/func-statistics.sgml | 85 + doc/src/sgml/func/func-string.sgml | 1818 ++ doc/src/sgml/func/func-subquery.sgml | 349 + doc/src/sgml/func/func-textsearch.sgml | 1046 + doc/src/sgml/func/func-trigger.sgml | 135 + doc/src/sgml/func/func-uuid.sgml | 188 + doc/src/sgml/func/func-window.sgml | 284 + doc/src/sgml/func/func-xml.sgml | 1283 + doc/src/sgml/func/func.sgml | 84 + 35 files changed, 32109 insertions(+), 32076 deletions(-) delete mode 100644 doc/src/sgml/func.sgml create mode 100644 doc/src/sgml/func/allfiles.sgml create mode 100644 doc/src/sgml/func/func-admin.sgml create mode 100644 doc/src/sgml/func/func-aggregate.sgml create mode 100644 doc/src/sgml/func/func-array.sgml create mode 100644 doc/src/sgml/func/func-binarystring.sgml create mode 100644 doc/src/sgml/func/func-bitstring.sgml create mode 100644 doc/src/sgml/func/func-comparison.sgml create mode 100644 doc/src/sgml/func/func-comparisons.sgml create mode 100644 doc/src/sgml/func/func-conditional.sgml create mode 100644 doc/src/sgml/func/func-datetime.sgml create mode 100644 doc/src/sgml/func/func-enum.sgml create mode 100644 doc/src/sgml/func/func-event-triggers.sgml create mode 100644 doc/src/sgml/func/func-formatting.sgml create mode 100644 doc/src/sgml/func/func-geometry.sgml create mode 100644 doc/src/sgml/func/func-info.sgml create mode 100644 doc/src/sgml/func/func-json.sgml create mode 100644 doc/src/sgml/func/func-logical.sgml create mode 100644 doc/src/sgml/func/func-matching.sgml create mode 100644 doc/src/sgml/func/func-math.sgml create mode 100644 doc/src/sgml/func/func-merge-support.sgml create mode 100644 doc/src/sgml/func/func-net.sgml create mode 100644 doc/src/sgml/func/func-range.sgml create mode 100644 doc/src/sgml/func/func-sequence.sgml create mode 100644 doc/src/sgml/func/func-srf.sgml create mode 100644 doc/src/sgml/func/func-statistics.sgml create mode 100644 doc/src/sgml/func/func-string.sgml create mode 100644 doc/src/sgml/func/func-subquery.sgml create mode 100644 doc/src/sgml/func/func-textsearch.sgml create mode 100644 doc/src/sgml/func/func-trigger.sgml create mode 100644 doc/src/sgml/func/func-uuid.sgml create mode 100644 doc/src/sgml/func/func-window.sgml create mode 100644 doc/src/sgml/func/func-xml.sgml create mode 100644 doc/src/sgml/func/func.sgml diff --git a/doc/src/sgml/filelist.sgml b/doc/src/sgml/filelist.sgml index bcde3cfd0374a..ac66fcbdb5727 100644 --- a/doc/src/sgml/filelist.sgml +++ b/doc/src/sgml/filelist.sgml @@ -17,7 +17,10 @@ - + + +%allfiles_func; + diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml deleted file mode 100644 index 74a16af04ad3b..0000000000000 --- a/doc/src/sgml/func.sgml +++ /dev/null @@ -1,32075 +0,0 @@ - - - - Functions and Operators - - - function - - - - operator - - - - PostgreSQL provides a large number of - functions and operators for the built-in data types. This chapter - describes most of them, although additional special-purpose functions - appear in relevant sections of the manual. Users can also - define their own functions and operators, as described in - . The - psql commands \df and - \do can be used to list all - available functions and operators, respectively. - - - - The notation used throughout this chapter to describe the argument and - result data types of a function or operator is like this: - -repeat ( text, integer ) text - - which says that the function repeat takes one text and - one integer argument and returns a result of type text. The right arrow - is also used to indicate the result of an example, thus: - -repeat('Pg', 4) PgPgPgPg - - - - - If you are concerned about portability then note that most of - the functions and operators described in this chapter, with the - exception of the most trivial arithmetic and comparison operators - and some explicitly marked functions, are not specified by the - SQL standard. Some of this extended functionality - is present in other SQL database management - systems, and in many cases this functionality is compatible and - consistent between the various implementations. - - - - - Logical Operators - - - operator - logical - - - - Boolean - operators - operators, logical - - - - The usual logical operators are available: - - - AND (operator) - - - - OR (operator) - - - - NOT (operator) - - - - conjunction - - - - disjunction - - - - negation - - - -boolean AND boolean boolean -boolean OR boolean boolean -NOT boolean boolean - - - SQL uses a three-valued logic system with true, - false, and null, which represents unknown. - Observe the following truth tables: - - - - - - a - b - a AND b - a OR b - - - - - - TRUE - TRUE - TRUE - TRUE - - - - TRUE - FALSE - FALSE - TRUE - - - - TRUE - NULL - NULL - TRUE - - - - FALSE - FALSE - FALSE - FALSE - - - - FALSE - NULL - FALSE - NULL - - - - NULL - NULL - NULL - NULL - - - - - - - - - - a - NOT a - - - - - - TRUE - FALSE - - - - FALSE - TRUE - - - - NULL - NULL - - - - - - - - The operators AND and OR are - commutative, that is, you can switch the left and right operands - without affecting the result. (However, it is not guaranteed that - the left operand is evaluated before the right operand. See for more information about the - order of evaluation of subexpressions.) - - - - - Comparison Functions and Operators - - - comparison - operators - - - - The usual comparison operators are available, as shown in . - - - - Comparison Operators - - - - Operator - Description - - - - - - - datatype < datatype - boolean - - Less than - - - - - datatype > datatype - boolean - - Greater than - - - - - datatype <= datatype - boolean - - Less than or equal to - - - - - datatype >= datatype - boolean - - Greater than or equal to - - - - - datatype = datatype - boolean - - Equal - - - - - datatype <> datatype - boolean - - Not equal - - - - - datatype != datatype - boolean - - Not equal - - - -
- - - - <> is the standard SQL notation for not - equal. != is an alias, which is converted - to <> at a very early stage of parsing. - Hence, it is not possible to implement != - and <> operators that do different things. - - - - - These comparison operators are available for all built-in data types - that have a natural ordering, including numeric, string, and date/time - types. In addition, arrays, composite types, and ranges can be compared - if their component data types are comparable. - - - - It is usually possible to compare values of related data - types as well; for example integer > - bigint will work. Some cases of this sort are implemented - directly by cross-type comparison operators, but if no - such operator is available, the parser will coerce the less-general type - to the more-general type and apply the latter's comparison operator. - - - - As shown above, all comparison operators are binary operators that - return values of type boolean. Thus, expressions like - 1 < 2 < 3 are not valid (because there is - no < operator to compare a Boolean value with - 3). Use the BETWEEN predicates - shown below to perform range tests. - - - - There are also some comparison predicates, as shown in . These behave much like - operators, but have special syntax mandated by the SQL standard. - - - - Comparison Predicates - - - - - Predicate - - - Description - - - Example(s) - - - - - - - - datatype BETWEEN datatype AND datatype - boolean - - - Between (inclusive of the range endpoints). - - - 2 BETWEEN 1 AND 3 - t - - - 2 BETWEEN 3 AND 1 - f - - - - - - datatype NOT BETWEEN datatype AND datatype - boolean - - - Not between (the negation of BETWEEN). - - - 2 NOT BETWEEN 1 AND 3 - f - - - - - - datatype BETWEEN SYMMETRIC datatype AND datatype - boolean - - - Between, after sorting the two endpoint values. - - - 2 BETWEEN SYMMETRIC 3 AND 1 - t - - - - - - datatype NOT BETWEEN SYMMETRIC datatype AND datatype - boolean - - - Not between, after sorting the two endpoint values. - - - 2 NOT BETWEEN SYMMETRIC 3 AND 1 - f - - - - - - datatype IS DISTINCT FROM datatype - boolean - - - Not equal, treating null as a comparable value. - - - 1 IS DISTINCT FROM NULL - t (rather than NULL) - - - NULL IS DISTINCT FROM NULL - f (rather than NULL) - - - - - - datatype IS NOT DISTINCT FROM datatype - boolean - - - Equal, treating null as a comparable value. - - - 1 IS NOT DISTINCT FROM NULL - f (rather than NULL) - - - NULL IS NOT DISTINCT FROM NULL - t (rather than NULL) - - - - - - datatype IS NULL - boolean - - - Test whether value is null. - - - 1.5 IS NULL - f - - - - - - datatype IS NOT NULL - boolean - - - Test whether value is not null. - - - 'null' IS NOT NULL - t - - - - - - datatype ISNULL - boolean - - - Test whether value is null (nonstandard syntax). - - - - - - datatype NOTNULL - boolean - - - Test whether value is not null (nonstandard syntax). - - - - - - boolean IS TRUE - boolean - - - Test whether boolean expression yields true. - - - true IS TRUE - t - - - NULL::boolean IS TRUE - f (rather than NULL) - - - - - - boolean IS NOT TRUE - boolean - - - Test whether boolean expression yields false or unknown. - - - true IS NOT TRUE - f - - - NULL::boolean IS NOT TRUE - t (rather than NULL) - - - - - - boolean IS FALSE - boolean - - - Test whether boolean expression yields false. - - - true IS FALSE - f - - - NULL::boolean IS FALSE - f (rather than NULL) - - - - - - boolean IS NOT FALSE - boolean - - - Test whether boolean expression yields true or unknown. - - - true IS NOT FALSE - t - - - NULL::boolean IS NOT FALSE - t (rather than NULL) - - - - - - boolean IS UNKNOWN - boolean - - - Test whether boolean expression yields unknown. - - - true IS UNKNOWN - f - - - NULL::boolean IS UNKNOWN - t (rather than NULL) - - - - - - boolean IS NOT UNKNOWN - boolean - - - Test whether boolean expression yields true or false. - - - true IS NOT UNKNOWN - t - - - NULL::boolean IS NOT UNKNOWN - f (rather than NULL) - - - - -
- - - - BETWEEN - - - BETWEEN SYMMETRIC - - The BETWEEN predicate simplifies range tests: - -a BETWEEN x AND y - - is equivalent to - -a >= x AND a <= y - - Notice that BETWEEN treats the endpoint values as included - in the range. - BETWEEN SYMMETRIC is like BETWEEN - except there is no requirement that the argument to the left of - AND be less than or equal to the argument on the right. - If it is not, those two arguments are automatically swapped, so that - a nonempty range is always implied. - - - - The various variants of BETWEEN are implemented in - terms of the ordinary comparison operators, and therefore will work for - any data type(s) that can be compared. - - - - - The use of AND in the BETWEEN - syntax creates an ambiguity with the use of AND as a - logical operator. To resolve this, only a limited set of expression - types are allowed as the second argument of a BETWEEN - clause. If you need to write a more complex sub-expression - in BETWEEN, write parentheses around the - sub-expression. - - - - - - IS DISTINCT FROM - - - IS NOT DISTINCT FROM - - Ordinary comparison operators yield null (signifying unknown), - not true or false, when either input is null. For example, - 7 = NULL yields null, as does 7 <> NULL. When - this behavior is not suitable, use the - IS NOT DISTINCT FROM predicates: - -a IS DISTINCT FROM b -a IS NOT DISTINCT FROM b - - For non-null inputs, IS DISTINCT FROM is - the same as the <> operator. However, if both - inputs are null it returns false, and if only one input is - null it returns true. Similarly, IS NOT DISTINCT - FROM is identical to = for non-null - inputs, but it returns true when both inputs are null, and false when only - one input is null. Thus, these predicates effectively act as though null - were a normal data value, rather than unknown. - - - - - IS NULL - - - IS NOT NULL - - - ISNULL - - - NOTNULL - - To check whether a value is or is not null, use the predicates: - -expression IS NULL -expression IS NOT NULL - - or the equivalent, but nonstandard, predicates: - -expression ISNULL -expression NOTNULL - - null valuecomparing - - - - Do not write - expression = NULL - because NULL is not equal to - NULL. (The null value represents an unknown value, - and it is not known whether two unknown values are equal.) - - - - - Some applications might expect that - expression = NULL - returns true if expression evaluates to - the null value. It is highly recommended that these applications - be modified to comply with the SQL standard. However, if that - cannot be done the - configuration variable is available. If it is enabled, - PostgreSQL will convert x = - NULL clauses to x IS NULL. - - - - - If the expression is row-valued, then - IS NULL is true when the row expression itself is null - or when all the row's fields are null, while - IS NOT NULL is true when the row expression itself is non-null - and all the row's fields are non-null. Because of this behavior, - IS NULL and IS NOT NULL do not always return - inverse results for row-valued expressions; in particular, a row-valued - expression that contains both null and non-null fields will return false - for both tests. For example: - - -SELECT ROW(1,2.5,'this is a test') = ROW(1, 3, 'not the same'); - -SELECT ROW(table.*) IS NULL FROM table; -- detect all-null rows - -SELECT ROW(table.*) IS NOT NULL FROM table; -- detect all-non-null rows - -SELECT NOT(ROW(table.*) IS NOT NULL) FROM TABLE; -- detect at least one null in rows - - - In some cases, it may be preferable to - write row IS DISTINCT FROM NULL - or row IS NOT DISTINCT FROM NULL, - which will simply check whether the overall row value is null without any - additional tests on the row fields. - - - - - IS TRUE - - - IS NOT TRUE - - - IS FALSE - - - IS NOT FALSE - - - IS UNKNOWN - - - IS NOT UNKNOWN - - Boolean values can also be tested using the predicates - -boolean_expression IS TRUE -boolean_expression IS NOT TRUE -boolean_expression IS FALSE -boolean_expression IS NOT FALSE -boolean_expression IS UNKNOWN -boolean_expression IS NOT UNKNOWN - - These will always return true or false, never a null value, even when the - operand is null. - A null input is treated as the logical value unknown. - Notice that IS UNKNOWN and IS NOT UNKNOWN are - effectively the same as IS NULL and - IS NOT NULL, respectively, except that the input - expression must be of Boolean type. - - - - Some comparison-related functions are also available, as shown in . - - - - Comparison Functions - - - - - Function - - - Description - - - Example(s) - - - - - - - - - num_nonnulls - - num_nonnulls ( VARIADIC "any" ) - integer - - - Returns the number of non-null arguments. - - - num_nonnulls(1, NULL, 2) - 2 - - - - - - num_nulls - - num_nulls ( VARIADIC "any" ) - integer - - - Returns the number of null arguments. - - - num_nulls(1, NULL, 2) - 1 - - - - -
- -
- - - Mathematical Functions and Operators - - - Mathematical operators are provided for many - PostgreSQL types. For types without - standard mathematical conventions - (e.g., date/time types) we - describe the actual behavior in subsequent sections. - - - - shows the mathematical - operators that are available for the standard numeric types. - Unless otherwise noted, operators shown as - accepting numeric_type are available for all - the types smallint, integer, - bigint, numeric, real, - and double precision. - Operators shown as accepting integral_type - are available for the types smallint, integer, - and bigint. - Except where noted, each form of an operator returns the same data type - as its argument(s). Calls involving multiple argument data types, such - as integer + numeric, - are resolved by using the type appearing later in these lists. - - - - Mathematical Operators - - - - - - Operator - - - Description - - - Example(s) - - - - - - - - numeric_type + numeric_type - numeric_type - - - Addition - - - 2 + 3 - 5 - - - - - - + numeric_type - numeric_type - - - Unary plus (no operation) - - - + 3.5 - 3.5 - - - - - - numeric_type - numeric_type - numeric_type - - - Subtraction - - - 2 - 3 - -1 - - - - - - - numeric_type - numeric_type - - - Negation - - - - (-4) - 4 - - - - - - numeric_type * numeric_type - numeric_type - - - Multiplication - - - 2 * 3 - 6 - - - - - - numeric_type / numeric_type - numeric_type - - - Division (for integral types, division truncates the result towards - zero) - - - 5.0 / 2 - 2.5000000000000000 - - - 5 / 2 - 2 - - - (-5) / 2 - -2 - - - - - - numeric_type % numeric_type - numeric_type - - - Modulo (remainder); available for smallint, - integer, bigint, and numeric - - - 5 % 4 - 1 - - - - - - numeric ^ numeric - numeric - - - double precision ^ double precision - double precision - - - Exponentiation - - - 2 ^ 3 - 8 - - - Unlike typical mathematical practice, multiple uses of - ^ will associate left to right by default: - - - 2 ^ 3 ^ 3 - 512 - - - 2 ^ (3 ^ 3) - 134217728 - - - - - - |/ double precision - double precision - - - Square root - - - |/ 25.0 - 5 - - - - - - ||/ double precision - double precision - - - Cube root - - - ||/ 64.0 - 4 - - - - - - @ numeric_type - numeric_type - - - Absolute value - - - @ -5.0 - 5.0 - - - - - - integral_type & integral_type - integral_type - - - Bitwise AND - - - 91 & 15 - 11 - - - - - - integral_type | integral_type - integral_type - - - Bitwise OR - - - 32 | 3 - 35 - - - - - - integral_type # integral_type - integral_type - - - Bitwise exclusive OR - - - 17 # 5 - 20 - - - - - - ~ integral_type - integral_type - - - Bitwise NOT - - - ~1 - -2 - - - - - - integral_type << integer - integral_type - - - Bitwise shift left - - - 1 << 4 - 16 - - - - - - integral_type >> integer - integral_type - - - Bitwise shift right - - - 8 >> 2 - 2 - - - - - -
- - - shows the available - mathematical functions. - Many of these functions are provided in multiple forms with different - argument types. - Except where noted, any given form of a function returns the same - data type as its argument(s); cross-type cases are resolved in the - same way as explained above for operators. - The functions working with double precision data are mostly - implemented on top of the host system's C library; accuracy and behavior in - boundary cases can therefore vary depending on the host system. - - - - Mathematical Functions - - - - - Function - - - Description - - - Example(s) - - - - - - - - - abs - - abs ( numeric_type ) - numeric_type - - - Absolute value - - - abs(-17.4) - 17.4 - - - - - - - cbrt - - cbrt ( double precision ) - double precision - - - Cube root - - - cbrt(64.0) - 4 - - - - - - - ceil - - ceil ( numeric ) - numeric - - - ceil ( double precision ) - double precision - - - Nearest integer greater than or equal to argument - - - ceil(42.2) - 43 - - - ceil(-42.8) - -42 - - - - - - - ceiling - - ceiling ( numeric ) - numeric - - - ceiling ( double precision ) - double precision - - - Nearest integer greater than or equal to argument (same - as ceil) - - - ceiling(95.3) - 96 - - - - - - - degrees - - degrees ( double precision ) - double precision - - - Converts radians to degrees - - - degrees(0.5) - 28.64788975654116 - - - - - - - div - - div ( y numeric, - x numeric ) - numeric - - - Integer quotient of y/x - (truncates towards zero) - - - div(9, 4) - 2 - - - - - - - erf - - erf ( double precision ) - double precision - - - Error function - - - erf(1.0) - 0.8427007929497149 - - - - - - - erfc - - erfc ( double precision ) - double precision - - - Complementary error function (1 - erf(x), without - loss of precision for large inputs) - - - erfc(1.0) - 0.15729920705028513 - - - - - - - exp - - exp ( numeric ) - numeric - - - exp ( double precision ) - double precision - - - Exponential (e raised to the given power) - - - exp(1.0) - 2.7182818284590452 - - - - - - - factorial - - factorial ( bigint ) - numeric - - - Factorial - - - factorial(5) - 120 - - - - - - - floor - - floor ( numeric ) - numeric - - - floor ( double precision ) - double precision - - - Nearest integer less than or equal to argument - - - floor(42.8) - 42 - - - floor(-42.8) - -43 - - - - - - - gamma - - gamma ( double precision ) - double precision - - - Gamma function - - - gamma(0.5) - 1.772453850905516 - - - gamma(6) - 120 - - - - - - - gcd - - gcd ( numeric_type, numeric_type ) - numeric_type - - - Greatest common divisor (the largest positive number that divides both - inputs with no remainder); returns 0 if both inputs - are zero; available for integer, bigint, - and numeric - - - gcd(1071, 462) - 21 - - - - - - - lcm - - lcm ( numeric_type, numeric_type ) - numeric_type - - - Least common multiple (the smallest strictly positive number that is - an integral multiple of both inputs); returns 0 if - either input is zero; available for integer, - bigint, and numeric - - - lcm(1071, 462) - 23562 - - - - - - - lgamma - - lgamma ( double precision ) - double precision - - - Natural logarithm of the absolute value of the gamma function - - - lgamma(1000) - 5905.220423209181 - - - - - - - ln - - ln ( numeric ) - numeric - - - ln ( double precision ) - double precision - - - Natural logarithm - - - ln(2.0) - 0.6931471805599453 - - - - - - - log - - log ( numeric ) - numeric - - - log ( double precision ) - double precision - - - Base 10 logarithm - - - log(100) - 2 - - - - - - - log10 - - log10 ( numeric ) - numeric - - - log10 ( double precision ) - double precision - - - Base 10 logarithm (same as log) - - - log10(1000) - 3 - - - - - - log ( b numeric, - x numeric ) - numeric - - - Logarithm of x to base b - - - log(2.0, 64.0) - 6.0000000000000000 - - - - - - - min_scale - - min_scale ( numeric ) - integer - - - Minimum scale (number of fractional decimal digits) needed - to represent the supplied value precisely - - - min_scale(8.4100) - 2 - - - - - - - mod - - mod ( y numeric_type, - x numeric_type ) - numeric_type - - - Remainder of y/x; - available for smallint, integer, - bigint, and numeric - - - mod(9, 4) - 1 - - - - - - - pi - - pi ( ) - double precision - - - Approximate value of π - - - pi() - 3.141592653589793 - - - - - - - power - - power ( a numeric, - b numeric ) - numeric - - - power ( a double precision, - b double precision ) - double precision - - - a raised to the power of b - - - power(9, 3) - 729 - - - - - - - radians - - radians ( double precision ) - double precision - - - Converts degrees to radians - - - radians(45.0) - 0.7853981633974483 - - - - - - - round - - round ( numeric ) - numeric - - - round ( double precision ) - double precision - - - Rounds to nearest integer. For numeric, ties are - broken by rounding away from zero. For double precision, - the tie-breaking behavior is platform dependent, but - round to nearest even is the most common rule. - - - round(42.4) - 42 - - - - - - round ( v numeric, s integer ) - numeric - - - Rounds v to s decimal - places. Ties are broken by rounding away from zero. - - - round(42.4382, 2) - 42.44 - - - round(1234.56, -1) - 1230 - - - - - - - scale - - scale ( numeric ) - integer - - - Scale of the argument (the number of decimal digits in the fractional part) - - - scale(8.4100) - 4 - - - - - - - sign - - sign ( numeric ) - numeric - - - sign ( double precision ) - double precision - - - Sign of the argument (-1, 0, or +1) - - - sign(-8.4) - -1 - - - - - - - sqrt - - sqrt ( numeric ) - numeric - - - sqrt ( double precision ) - double precision - - - Square root - - - sqrt(2) - 1.4142135623730951 - - - - - - - trim_scale - - trim_scale ( numeric ) - numeric - - - Reduces the value's scale (number of fractional decimal digits) by - removing trailing zeroes - - - trim_scale(8.4100) - 8.41 - - - - - - - trunc - - trunc ( numeric ) - numeric - - - trunc ( double precision ) - double precision - - - Truncates to integer (towards zero) - - - trunc(42.8) - 42 - - - trunc(-42.8) - -42 - - - - - - trunc ( v numeric, s integer ) - numeric - - - Truncates v to s - decimal places - - - trunc(42.4382, 2) - 42.43 - - - - - - - width_bucket - - width_bucket ( operand numeric, low numeric, high numeric, count integer ) - integer - - - width_bucket ( operand double precision, low double precision, high double precision, count integer ) - integer - - - Returns the number of the bucket in - which operand falls in a histogram - having count equal-width buckets spanning the - range low to high. - The buckets have inclusive lower bounds and exclusive upper bounds. - Returns 0 for an input less - than low, - or count+1 for an input - greater than or equal to high. - If low > high, - the behavior is mirror-reversed, with bucket 1 - now being the one just below low, and the - inclusive bounds now being on the upper side. - - - width_bucket(5.35, 0.024, 10.06, 5) - 3 - - - width_bucket(9, 10, 0, 10) - 2 - - - - - - width_bucket ( operand anycompatible, thresholds anycompatiblearray ) - integer - - - Returns the number of the bucket in - which operand falls given an array listing the - inclusive lower bounds of the buckets. - Returns 0 for an input less than the first lower - bound. operand and the array elements can be - of any type having standard comparison operators. - The thresholds array must be - sorted, smallest first, or unexpected results will be - obtained. - - - width_bucket(now(), array['yesterday', 'today', 'tomorrow']::timestamptz[]) - 2 - - - - -
- - - shows functions for - generating random numbers. - - - - Random Functions - - - - - - Function - - - Description - - - Example(s) - - - - - - - - - random - - random ( ) - double precision - - - Returns a random value in the range 0.0 <= x < 1.0 - - - random() - 0.897124072839091 - - - - - - - random - - random ( min integer, max integer ) - integer - - - random ( min bigint, max bigint ) - bigint - - - random ( min numeric, max numeric ) - numeric - - - Returns a random value in the range - min <= x <= max. - For type numeric, the result will have the same number of - fractional decimal digits as min or - max, whichever has more. - - - random(1, 10) - 7 - - - random(-0.499, 0.499) - 0.347 - - - - - - - random_normal - - - random_normal ( - mean double precision - , stddev double precision ) - double precision - - - Returns a random value from the normal distribution with the given - parameters; mean defaults to 0.0 - and stddev defaults to 1.0 - - - random_normal(0.0, 1.0) - 0.051285419 - - - - - - - setseed - - setseed ( double precision ) - void - - - Sets the seed for subsequent random() and - random_normal() calls; - argument must be between -1.0 and 1.0, inclusive - - - setseed(0.12345) - - - - -
- - - The random() and random_normal() - functions listed in use a - deterministic pseudo-random number generator. - It is fast but not suitable for cryptographic - applications; see the module for a more - secure alternative. - If setseed() is called, the series of results of - subsequent calls to these functions in the current session - can be repeated by re-issuing setseed() with the same - argument. - Without any prior setseed() call in the same - session, the first call to any of these functions obtains a seed - from a platform-dependent source of random bits. - - - - shows the - available trigonometric functions. Each of these functions comes in - two variants, one that measures angles in radians and one that - measures angles in degrees. - - - - Trigonometric Functions - - - - - - Function - - - Description - - - Example(s) - - - - - - - - - acos - - acos ( double precision ) - double precision - - - Inverse cosine, result in radians - - - acos(1) - 0 - - - - - - - acosd - - acosd ( double precision ) - double precision - - - Inverse cosine, result in degrees - - - acosd(0.5) - 60 - - - - - - - asin - - asin ( double precision ) - double precision - - - Inverse sine, result in radians - - - asin(1) - 1.5707963267948966 - - - - - - - asind - - asind ( double precision ) - double precision - - - Inverse sine, result in degrees - - - asind(0.5) - 30 - - - - - - - atan - - atan ( double precision ) - double precision - - - Inverse tangent, result in radians - - - atan(1) - 0.7853981633974483 - - - - - - - atand - - atand ( double precision ) - double precision - - - Inverse tangent, result in degrees - - - atand(1) - 45 - - - - - - - atan2 - - atan2 ( y double precision, - x double precision ) - double precision - - - Inverse tangent of - y/x, - result in radians - - - atan2(1, 0) - 1.5707963267948966 - - - - - - - atan2d - - atan2d ( y double precision, - x double precision ) - double precision - - - Inverse tangent of - y/x, - result in degrees - - - atan2d(1, 0) - 90 - - - - - - - cos - - cos ( double precision ) - double precision - - - Cosine, argument in radians - - - cos(0) - 1 - - - - - - - cosd - - cosd ( double precision ) - double precision - - - Cosine, argument in degrees - - - cosd(60) - 0.5 - - - - - - - cot - - cot ( double precision ) - double precision - - - Cotangent, argument in radians - - - cot(0.5) - 1.830487721712452 - - - - - - - cotd - - cotd ( double precision ) - double precision - - - Cotangent, argument in degrees - - - cotd(45) - 1 - - - - - - - sin - - sin ( double precision ) - double precision - - - Sine, argument in radians - - - sin(1) - 0.8414709848078965 - - - - - - - sind - - sind ( double precision ) - double precision - - - Sine, argument in degrees - - - sind(30) - 0.5 - - - - - - - tan - - tan ( double precision ) - double precision - - - Tangent, argument in radians - - - tan(1) - 1.5574077246549023 - - - - - - - tand - - tand ( double precision ) - double precision - - - Tangent, argument in degrees - - - tand(45) - 1 - - - - -
- - - - Another way to work with angles measured in degrees is to use the unit - transformation functions radians() - and degrees() shown earlier. - However, using the degree-based trigonometric functions is preferred, - as that way avoids round-off error for special cases such - as sind(30). - - - - - shows the - available hyperbolic functions. - - - - Hyperbolic Functions - - - - - - Function - - - Description - - - Example(s) - - - - - - - - - sinh - - sinh ( double precision ) - double precision - - - Hyperbolic sine - - - sinh(1) - 1.1752011936438014 - - - - - - - cosh - - cosh ( double precision ) - double precision - - - Hyperbolic cosine - - - cosh(0) - 1 - - - - - - - tanh - - tanh ( double precision ) - double precision - - - Hyperbolic tangent - - - tanh(1) - 0.7615941559557649 - - - - - - - asinh - - asinh ( double precision ) - double precision - - - Inverse hyperbolic sine - - - asinh(1) - 0.881373587019543 - - - - - - - acosh - - acosh ( double precision ) - double precision - - - Inverse hyperbolic cosine - - - acosh(1) - 0 - - - - - - - atanh - - atanh ( double precision ) - double precision - - - Inverse hyperbolic tangent - - - atanh(0.5) - 0.5493061443340548 - - - - -
- -
- - - - String Functions and Operators - - - This section describes functions and operators for examining and - manipulating string values. Strings in this context include values - of the types character, character varying, - and text. Except where noted, these functions and operators - are declared to accept and return type text. They will - interchangeably accept character varying arguments. - Values of type character will be converted - to text before the function or operator is applied, resulting - in stripping any trailing spaces in the character value. - - - - SQL defines some string functions that use - key words, rather than commas, to separate - arguments. Details are in - . - PostgreSQL also provides versions of these functions - that use the regular function invocation syntax - (see ). - - - - - The string concatenation operator (||) will accept - non-string input, so long as at least one input is of string type, as shown - in . For other cases, inserting an - explicit coercion to text can be used to have non-string input - accepted. - - - - - <acronym>SQL</acronym> String Functions and Operators - - - - - Function/Operator - - - Description - - - Example(s) - - - - - - - - - character string - concatenation - - text || text - text - - - Concatenates the two strings. - - - 'Post' || 'greSQL' - PostgreSQL - - - - - - text || anynonarray - text - - - anynonarray || text - text - - - Converts the non-string input to text, then concatenates the two - strings. (The non-string input cannot be of an array type, because - that would create ambiguity with the array || - operators. If you want to concatenate an array's text equivalent, - cast it to text explicitly.) - - - 'Value: ' || 42 - Value: 42 - - - - - - - btrim - - btrim ( string text - , characters text ) - text - - - Removes the longest string containing only characters - in characters (a space by default) - from the start and end of string. - - - btrim('xyxtrimyyx', 'xyz') - trim - - - - - - - normalized - - - Unicode normalization - - text IS NOT form NORMALIZED - boolean - - - Checks whether the string is in the specified Unicode normalization - form. The optional form key word specifies the - form: NFC (the default), NFD, - NFKC, or NFKD. This expression can - only be used when the server encoding is UTF8. Note - that checking for normalization using this expression is often faster - than normalizing possibly already normalized strings. - - - U&'\0061\0308bc' IS NFD NORMALIZED - t - - - - - - - bit_length - - bit_length ( text ) - integer - - - Returns number of bits in the string (8 - times the octet_length). - - - bit_length('jose') - 32 - - - - - - - char_length - - - character string - length - - - length - of a character string - character string, length - - char_length ( text ) - integer - - - - character_length - - character_length ( text ) - integer - - - Returns number of characters in the string. - - - char_length('josé') - 4 - - - - - - - lower - - lower ( text ) - text - - - Converts the string to all lower case, according to the rules of the - database's locale. - - - lower('TOM') - tom - - - - - - - lpad - - lpad ( string text, - length integer - , fill text ) - text - - - Extends the string to length - length by prepending the characters - fill (a space by default). If the - string is already longer than - length then it is truncated (on the right). - - - lpad('hi', 5, 'xy') - xyxhi - - - - - - - ltrim - - ltrim ( string text - , characters text ) - text - - - Removes the longest string containing only characters in - characters (a space by default) from the start of - string. - - - ltrim('zzzytest', 'xyz') - test - - - - - - - normalize - - - Unicode normalization - - normalize ( text - , form ) - text - - - Converts the string to the specified Unicode - normalization form. The optional form key word - specifies the form: NFC (the default), - NFD, NFKC, or - NFKD. This function can only be used when the - server encoding is UTF8. - - - normalize(U&'\0061\0308bc', NFC) - U&'\00E4bc' - - - - - - - octet_length - - octet_length ( text ) - integer - - - Returns number of bytes in the string. - - - octet_length('josé') - 5 (if server encoding is UTF8) - - - - - - - octet_length - - octet_length ( character ) - integer - - - Returns number of bytes in the string. Since this version of the - function accepts type character directly, it will not - strip trailing spaces. - - - octet_length('abc '::character(4)) - 4 - - - - - - - overlay - - overlay ( string text PLACING newsubstring text FROM start integer FOR count integer ) - text - - - Replaces the substring of string that starts at - the start'th character and extends - for count characters - with newsubstring. - If count is omitted, it defaults to the length - of newsubstring. - - - overlay('Txxxxas' placing 'hom' from 2 for 4) - Thomas - - - - - - - position - - position ( substring text IN string text ) - integer - - - Returns first starting index of the specified - substring within - string, or zero if it's not present. - - - position('om' in 'Thomas') - 3 - - - - - - - rpad - - rpad ( string text, - length integer - , fill text ) - text - - - Extends the string to length - length by appending the characters - fill (a space by default). If the - string is already longer than - length then it is truncated. - - - rpad('hi', 5, 'xy') - hixyx - - - - - - - rtrim - - rtrim ( string text - , characters text ) - text - - - Removes the longest string containing only characters in - characters (a space by default) from the end of - string. - - - rtrim('testxxzx', 'xyz') - test - - - - - - - substring - - substring ( string text FROM start integer FOR count integer ) - text - - - Extracts the substring of string starting at - the start'th character if that is specified, - and stopping after count characters if that is - specified. Provide at least one of start - and count. - - - substring('Thomas' from 2 for 3) - hom - - - substring('Thomas' from 3) - omas - - - substring('Thomas' for 2) - Th - - - - - - substring ( string text FROM pattern text ) - text - - - Extracts the first substring matching POSIX regular expression; see - . - - - substring('Thomas' from '...$') - mas - - - - - - substring ( string text SIMILAR pattern text ESCAPE escape text ) - text - - - substring ( string text FROM pattern text FOR escape text ) - text - - - Extracts the first substring matching SQL regular expression; - see . The first form has - been specified since SQL:2003; the second form was only in SQL:1999 - and should be considered obsolete. - - - substring('Thomas' similar '%#"o_a#"_' escape '#') - oma - - - - - - - trim - - trim ( LEADING | TRAILING | BOTH - characters text FROM - string text ) - text - - - Removes the longest string containing only characters in - characters (a space by default) from the - start, end, or both ends (BOTH is the default) - of string. - - - trim(both 'xyz' from 'yxTomxx') - Tom - - - - - - trim ( LEADING | TRAILING | BOTH FROM - string text , - characters text ) - text - - - This is a non-standard syntax for trim(). - - - trim(both from 'yxTomxx', 'xyz') - Tom - - - - - - - unicode_assigned - - unicode_assigned ( text ) - boolean - - - Returns true if all characters in the string are - assigned Unicode codepoints; false otherwise. This - function can only be used when the server encoding is - UTF8. - - - - - - - upper - - upper ( text ) - text - - - Converts the string to all upper case, according to the rules of the - database's locale. - - - upper('tom') - TOM - - - - -
- - - Additional string manipulation functions and operators are available - and are listed in . (Some of - these are used internally to implement - the SQL-standard string functions listed in - .) - There are also pattern-matching operators, which are described in - , and operators for full-text - search, which are described in . - - - - Other String Functions and Operators - - - - - Function/Operator - - - Description - - - Example(s) - - - - - - - - - character string - prefix test - - text ^@ text - boolean - - - Returns true if the first string starts with the second string - (equivalent to the starts_with() function). - - - 'alphabet' ^@ 'alph' - t - - - - - - - ascii - - ascii ( text ) - integer - - - Returns the numeric code of the first character of the argument. - In UTF8 encoding, returns the Unicode code point - of the character. In other multibyte encodings, the argument must - be an ASCII character. - - - ascii('x') - 120 - - - - - - - chr - - chr ( integer ) - text - - - Returns the character with the given code. In UTF8 - encoding the argument is treated as a Unicode code point. In other - multibyte encodings the argument must designate - an ASCII character. chr(0) is - disallowed because text data types cannot store that character. - - - chr(65) - A - - - - - - - concat - - concat ( val1 "any" - , val2 "any" , ... ) - text - - - Concatenates the text representations of all the arguments. - NULL arguments are ignored. - - - concat('abcde', 2, NULL, 22) - abcde222 - - - - - - - concat_ws - - concat_ws ( sep text, - val1 "any" - , val2 "any" , ... ) - text - - - Concatenates all but the first argument, with separators. The first - argument is used as the separator string, and should not be NULL. - Other NULL arguments are ignored. - - - concat_ws(',', 'abcde', 2, NULL, 22) - abcde,2,22 - - - - - - - format - - format ( formatstr text - , formatarg "any" , ... ) - text - - - Formats arguments according to a format string; - see . - This function is similar to the C function sprintf. - - - format('Hello %s, %1$s', 'World') - Hello World, World - - - - - - - initcap - - initcap ( text ) - text - - - Converts the first letter of each word to upper case and the - rest to lower case. When using the libc locale - provider, words are sequences of alphanumeric characters separated - by non-alphanumeric characters; when using the ICU locale provider, - words are separated according to - Unicode Standard Annex #29. - - - initcap('hi THOMAS') - Hi Thomas - - - - - - - casefold - - casefold ( text ) - text - - - Performs case folding of the input string according to the collation. - Case folding is similar to case conversion, but the purpose of case - folding is to facilitate case-insensitive matching of strings, - whereas the purpose of case conversion is to convert to a particular - cased form. This function can only be used when the server encoding - is UTF8. - - - Ordinarily, case folding simply converts to lowercase, but there may - be exceptions depending on the collation. For instance, some - characters have more than two lowercase variants, or fold to uppercase. - - - Case folding may change the length of the string. For instance, in - the PG_UNICODE_FAST collation, ß - (U+00DF) folds to ss. - - - casefold can be used for Unicode Default Caseless - Matching. It does not always preserve the normalized form of the - input string (see ). - - - The libc provider doesn't support case folding, so - casefold is identical to . - - - - - - - left - - left ( string text, - n integer ) - text - - - Returns first n characters in the - string, or when n is negative, returns - all but last |n| characters. - - - left('abcde', 2) - ab - - - - - - - length - - length ( text ) - integer - - - Returns the number of characters in the string. - - - length('jose') - 4 - - - - - - - md5 - - md5 ( text ) - text - - - Computes the MD5 hash of - the argument, with the result written in hexadecimal. - - - md5('abc') - 900150983cd24fb0&zwsp;d6963f7d28e17f72 - - - - - - - parse_ident - - parse_ident ( qualified_identifier text - , strict_mode boolean DEFAULT true ) - text[] - - - Splits qualified_identifier into an array of - identifiers, removing any quoting of individual identifiers. By - default, extra characters after the last identifier are considered an - error; but if the second parameter is false, then such - extra characters are ignored. (This behavior is useful for parsing - names for objects like functions.) Note that this function does not - truncate over-length identifiers. If you want truncation you can cast - the result to name[]. - - - parse_ident('"SomeSchema".someTable') - {SomeSchema,sometable} - - - - - - - pg_client_encoding - - pg_client_encoding ( ) - name - - - Returns current client encoding name. - - - pg_client_encoding() - UTF8 - - - - - - - quote_ident - - quote_ident ( text ) - text - - - Returns the given string suitably quoted to be used as an identifier - in an SQL statement string. - Quotes are added only if necessary (i.e., if the string contains - non-identifier characters or would be case-folded). - Embedded quotes are properly doubled. - See also . - - - quote_ident('Foo bar') - "Foo bar" - - - - - - - quote_literal - - quote_literal ( text ) - text - - - Returns the given string suitably quoted to be used as a string literal - in an SQL statement string. - Embedded single-quotes and backslashes are properly doubled. - Note that quote_literal returns null on null - input; if the argument might be null, - quote_nullable is often more suitable. - See also . - - - quote_literal(E'O\'Reilly') - 'O''Reilly' - - - - - - quote_literal ( anyelement ) - text - - - Converts the given value to text and then quotes it as a literal. - Embedded single-quotes and backslashes are properly doubled. - - - quote_literal(42.5) - '42.5' - - - - - - - quote_nullable - - quote_nullable ( text ) - text - - - Returns the given string suitably quoted to be used as a string literal - in an SQL statement string; or, if the argument - is null, returns NULL. - Embedded single-quotes and backslashes are properly doubled. - See also . - - - quote_nullable(NULL) - NULL - - - - - - quote_nullable ( anyelement ) - text - - - Converts the given value to text and then quotes it as a literal; - or, if the argument is null, returns NULL. - Embedded single-quotes and backslashes are properly doubled. - - - quote_nullable(42.5) - '42.5' - - - - - - - regexp_count - - regexp_count ( string text, pattern text - , start integer - , flags text ) - integer - - - Returns the number of times the POSIX regular - expression pattern matches in - the string; see - . - - - regexp_count('123456789012', '\d\d\d', 2) - 3 - - - - - - - regexp_instr - - regexp_instr ( string text, pattern text - , start integer - , N integer - , endoption integer - , flags text - , subexpr integer ) - integer - - - Returns the position within string where - the N'th match of the POSIX regular - expression pattern occurs, or zero if there is - no such match; see . - - - regexp_instr('ABCDEF', 'c(.)(..)', 1, 1, 0, 'i') - 3 - - - regexp_instr('ABCDEF', 'c(.)(..)', 1, 1, 0, 'i', 2) - 5 - - - - - - - regexp_like - - regexp_like ( string text, pattern text - , flags text ) - boolean - - - Checks whether a match of the POSIX regular - expression pattern occurs - within string; see - . - - - regexp_like('Hello World', 'world$', 'i') - t - - - - - - - regexp_match - - regexp_match ( string text, pattern text , flags text ) - text[] - - - Returns substrings within the first match of the POSIX regular - expression pattern to - the string; see - . - - - regexp_match('foobarbequebaz', '(bar)(beque)') - {bar,beque} - - - - - - - regexp_matches - - regexp_matches ( string text, pattern text , flags text ) - setof text[] - - - Returns substrings within the first match of the POSIX regular - expression pattern to - the string, or substrings within all - such matches if the g flag is used; - see . - - - regexp_matches('foobarbequebaz', 'ba.', 'g') - - - {bar} - {baz} - - - - - - - - regexp_replace - - regexp_replace ( string text, pattern text, replacement text - , flags text ) - text - - - Replaces the substring that is the first match to the POSIX - regular expression pattern, or all such - matches if the g flag is used; see - . - - - regexp_replace('Thomas', '.[mN]a.', 'M') - ThM - - - - - - regexp_replace ( string text, pattern text, replacement text, - start integer - , N integer - , flags text ) - text - - - Replaces the substring that is the N'th - match to the POSIX regular expression pattern, - or all such matches if N is zero, with the - search beginning at the start'th character - of string. If N is - omitted, it defaults to 1. See - . - - - regexp_replace('Thomas', '.', 'X', 3, 2) - ThoXas - - - regexp_replace(string=>'hello world', pattern=>'l', replacement=>'XX', start=>1, "N"=>2) - helXXo world - - - - - - - regexp_split_to_array - - regexp_split_to_array ( string text, pattern text , flags text ) - text[] - - - Splits string using a POSIX regular - expression as the delimiter, producing an array of results; see - . - - - regexp_split_to_array('hello world', '\s+') - {hello,world} - - - - - - - regexp_split_to_table - - regexp_split_to_table ( string text, pattern text , flags text ) - setof text - - - Splits string using a POSIX regular - expression as the delimiter, producing a set of results; see - . - - - regexp_split_to_table('hello world', '\s+') - - - hello - world - - - - - - - - regexp_substr - - regexp_substr ( string text, pattern text - , start integer - , N integer - , flags text - , subexpr integer ) - text - - - Returns the substring within string that - matches the N'th occurrence of the POSIX - regular expression pattern, - or NULL if there is no such match; see - . - - - regexp_substr('ABCDEF', 'c(.)(..)', 1, 1, 'i') - CDEF - - - regexp_substr('ABCDEF', 'c(.)(..)', 1, 1, 'i', 2) - EF - - - - - - - repeat - - repeat ( string text, number integer ) - text - - - Repeats string the specified - number of times. - - - repeat('Pg', 4) - PgPgPgPg - - - - - - - replace - - replace ( string text, - from text, - to text ) - text - - - Replaces all occurrences in string of - substring from with - substring to. - - - replace('abcdefabcdef', 'cd', 'XX') - abXXefabXXef - - - - - - - reverse - - reverse ( text ) - text - - - Reverses the order of the characters in the string. - - - reverse('abcde') - edcba - - - - - - - right - - right ( string text, - n integer ) - text - - - Returns last n characters in the string, - or when n is negative, returns all but - first |n| characters. - - - right('abcde', 2) - de - - - - - - - split_part - - split_part ( string text, - delimiter text, - n integer ) - text - - - Splits string at occurrences - of delimiter and returns - the n'th field (counting from one), - or when n is negative, returns - the |n|'th-from-last field. - - - split_part('abc~@~def~@~ghi', '~@~', 2) - def - - - split_part('abc,def,ghi,jkl', ',', -2) - ghi - - - - - - - starts_with - - starts_with ( string text, prefix text ) - boolean - - - Returns true if string starts - with prefix. - - - starts_with('alphabet', 'alph') - t - - - - - - - string_to_array - - string_to_array ( string text, delimiter text , null_string text ) - text[] - - - Splits the string at occurrences - of delimiter and forms the resulting fields - into a text array. - If delimiter is NULL, - each character in the string will become a - separate element in the array. - If delimiter is an empty string, then - the string is treated as a single field. - If null_string is supplied and is - not NULL, fields matching that string are - replaced by NULL. - See also array_to_string. - - - string_to_array('xx~~yy~~zz', '~~', 'yy') - {xx,NULL,zz} - - - - - - - string_to_table - - string_to_table ( string text, delimiter text , null_string text ) - setof text - - - Splits the string at occurrences - of delimiter and returns the resulting fields - as a set of text rows. - If delimiter is NULL, - each character in the string will become a - separate row of the result. - If delimiter is an empty string, then - the string is treated as a single field. - If null_string is supplied and is - not NULL, fields matching that string are - replaced by NULL. - - - string_to_table('xx~^~yy~^~zz', '~^~', 'yy') - - - xx - NULL - zz - - - - - - - - strpos - - strpos ( string text, substring text ) - integer - - - Returns first starting index of the specified substring - within string, or zero if it's not present. - (Same as position(substring in - string), but note the reversed - argument order.) - - - strpos('high', 'ig') - 2 - - - - - - - substr - - substr ( string text, start integer , count integer ) - text - - - Extracts the substring of string starting at - the start'th character, - and extending for count characters if that is - specified. (Same - as substring(string - from start - for count).) - - - substr('alphabet', 3) - phabet - - - substr('alphabet', 3, 2) - ph - - - - - - - to_ascii - - to_ascii ( string text ) - text - - - to_ascii ( string text, - encoding name ) - text - - - to_ascii ( string text, - encoding integer ) - text - - - Converts string to ASCII - from another encoding, which may be identified by name or number. - If encoding is omitted the database encoding - is assumed (which in practice is the only useful case). - The conversion consists primarily of dropping accents. - Conversion is only supported - from LATIN1, LATIN2, - LATIN9, and WIN1250 encodings. - (See the module for another, more flexible - solution.) - - - to_ascii('Karél') - Karel - - - - - - - to_bin - - to_bin ( integer ) - text - - - to_bin ( bigint ) - text - - - Converts the number to its equivalent two's complement binary - representation. - - - to_bin(2147483647) - 1111111111111111111111111111111 - - - to_bin(-1234) - 11111111111111111111101100101110 - - - - - - - to_hex - - to_hex ( integer ) - text - - - to_hex ( bigint ) - text - - - Converts the number to its equivalent two's complement hexadecimal - representation. - - - to_hex(2147483647) - 7fffffff - - - to_hex(-1234) - fffffb2e - - - - - - - to_oct - - to_oct ( integer ) - text - - - to_oct ( bigint ) - text - - - Converts the number to its equivalent two's complement octal - representation. - - - to_oct(2147483647) - 17777777777 - - - to_oct(-1234) - 37777775456 - - - - - - - translate - - translate ( string text, - from text, - to text ) - text - - - Replaces each character in string that - matches a character in the from set with the - corresponding character in the to - set. If from is longer than - to, occurrences of the extra characters in - from are deleted. - - - translate('12345', '143', 'ax') - a2x5 - - - - - - - unistr - - unistr ( text ) - text - - - Evaluate escaped Unicode characters in the argument. Unicode characters - can be specified as - \XXXX (4 hexadecimal - digits), \+XXXXXX (6 - hexadecimal digits), - \uXXXX (4 hexadecimal - digits), or \UXXXXXXXX - (8 hexadecimal digits). To specify a backslash, write two - backslashes. All other characters are taken literally. - - - - If the server encoding is not UTF-8, the Unicode code point identified - by one of these escape sequences is converted to the actual server - encoding; an error is reported if that's not possible. - - - - This function provides a (non-standard) alternative to string - constants with Unicode escapes (see ). - - - - unistr('d\0061t\+000061') - data - - - unistr('d\u0061t\U00000061') - data - - - - - -
- - - The concat, concat_ws and - format functions are variadic, so it is possible to - pass the values to be concatenated or formatted as an array marked with - the VARIADIC keyword (see ). The array's elements are - treated as if they were separate ordinary arguments to the function. - If the variadic array argument is NULL, concat - and concat_ws return NULL, but - format treats a NULL as a zero-element array. - - - - See also the aggregate function string_agg in - , and the functions for - converting between strings and the bytea type in - . - - - - <function>format</function> - - - format - - - - The function format produces output formatted according to - a format string, in a style similar to the C function - sprintf. - - - - -format(formatstr text , formatarg "any" , ... ) - - formatstr is a format string that specifies how the - result should be formatted. Text in the format string is copied - directly to the result, except where format specifiers are - used. Format specifiers act as placeholders in the string, defining how - subsequent function arguments should be formatted and inserted into the - result. Each formatarg argument is converted to text - according to the usual output rules for its data type, and then formatted - and inserted into the result string according to the format specifier(s). - - - - Format specifiers are introduced by a % character and have - the form - -%[position][flags][width]type - - where the component fields are: - - - - position (optional) - - - A string of the form n$ where - n is the index of the argument to print. - Index 1 means the first argument after - formatstr. If the position is - omitted, the default is to use the next argument in sequence. - - - - - - flags (optional) - - - Additional options controlling how the format specifier's output is - formatted. Currently the only supported flag is a minus sign - (-) which will cause the format specifier's output to be - left-justified. This has no effect unless the width - field is also specified. - - - - - - width (optional) - - - Specifies the minimum number of characters to use to - display the format specifier's output. The output is padded on the - left or right (depending on the - flag) with spaces as - needed to fill the width. A too-small width does not cause - truncation of the output, but is simply ignored. The width may be - specified using any of the following: a positive integer; an - asterisk (*) to use the next function argument as the - width; or a string of the form *n$ to - use the nth function argument as the width. - - - - If the width comes from a function argument, that argument is - consumed before the argument that is used for the format specifier's - value. If the width argument is negative, the result is left - aligned (as if the - flag had been specified) within a - field of length abs(width). - - - - - - type (required) - - - The type of format conversion to use to produce the format - specifier's output. The following types are supported: - - - - s formats the argument value as a simple - string. A null value is treated as an empty string. - - - - - I treats the argument value as an SQL - identifier, double-quoting it if necessary. - It is an error for the value to be null (equivalent to - quote_ident). - - - - - L quotes the argument value as an SQL literal. - A null value is displayed as the string NULL, without - quotes (equivalent to quote_nullable). - - - - - - - - - - - In addition to the format specifiers described above, the special sequence - %% may be used to output a literal % character. - - - - Here are some examples of the basic format conversions: - - -SELECT format('Hello %s', 'World'); -Result: Hello World - -SELECT format('Testing %s, %s, %s, %%', 'one', 'two', 'three'); -Result: Testing one, two, three, % - -SELECT format('INSERT INTO %I VALUES(%L)', 'Foo bar', E'O\'Reilly'); -Result: INSERT INTO "Foo bar" VALUES('O''Reilly') - -SELECT format('INSERT INTO %I VALUES(%L)', 'locations', 'C:\Program Files'); -Result: INSERT INTO locations VALUES('C:\Program Files') - - - - - Here are examples using width fields - and the - flag: - - -SELECT format('|%10s|', 'foo'); -Result: | foo| - -SELECT format('|%-10s|', 'foo'); -Result: |foo | - -SELECT format('|%*s|', 10, 'foo'); -Result: | foo| - -SELECT format('|%*s|', -10, 'foo'); -Result: |foo | - -SELECT format('|%-*s|', 10, 'foo'); -Result: |foo | - -SELECT format('|%-*s|', -10, 'foo'); -Result: |foo | - - - - - These examples show use of position fields: - - -SELECT format('Testing %3$s, %2$s, %1$s', 'one', 'two', 'three'); -Result: Testing three, two, one - -SELECT format('|%*2$s|', 'foo', 10, 'bar'); -Result: | bar| - -SELECT format('|%1$*2$s|', 'foo', 10, 'bar'); -Result: | foo| - - - - - Unlike the standard C function sprintf, - PostgreSQL's format function allows format - specifiers with and without position fields to be mixed - in the same format string. A format specifier without a - position field always uses the next argument after the - last argument consumed. - In addition, the format function does not require all - function arguments to be used in the format string. - For example: - - -SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three'); -Result: Testing three, two, three - - - - - The %I and %L format specifiers are particularly - useful for safely constructing dynamic SQL statements. See - . - - - -
- - - - Binary String Functions and Operators - - - binary data - functions - - - - This section describes functions and operators for examining and - manipulating binary strings, that is values of type bytea. - Many of these are equivalent, in purpose and syntax, to the - text-string functions described in the previous section. - - - - SQL defines some string functions that use - key words, rather than commas, to separate - arguments. Details are in - . - PostgreSQL also provides versions of these functions - that use the regular function invocation syntax - (see ). - - - - <acronym>SQL</acronym> Binary String Functions and Operators - - - - - Function/Operator - - - Description - - - Example(s) - - - - - - - - - binary string - concatenation - - bytea || bytea - bytea - - - Concatenates the two binary strings. - - - '\x123456'::bytea || '\x789a00bcde'::bytea - \x123456789a00bcde - - - - - - - bit_length - - bit_length ( bytea ) - integer - - - Returns number of bits in the binary string (8 - times the octet_length). - - - bit_length('\x123456'::bytea) - 24 - - - - - - - btrim - - btrim ( bytes bytea, - bytesremoved bytea ) - bytea - - - Removes the longest string containing only bytes appearing in - bytesremoved from the start and end of - bytes. - - - btrim('\x1234567890'::bytea, '\x9012'::bytea) - \x345678 - - - - - - - ltrim - - ltrim ( bytes bytea, - bytesremoved bytea ) - bytea - - - Removes the longest string containing only bytes appearing in - bytesremoved from the start of - bytes. - - - ltrim('\x1234567890'::bytea, '\x9012'::bytea) - \x34567890 - - - - - - - octet_length - - octet_length ( bytea ) - integer - - - Returns number of bytes in the binary string. - - - octet_length('\x123456'::bytea) - 3 - - - - - - - overlay - - overlay ( bytes bytea PLACING newsubstring bytea FROM start integer FOR count integer ) - bytea - - - Replaces the substring of bytes that starts at - the start'th byte and extends - for count bytes - with newsubstring. - If count is omitted, it defaults to the length - of newsubstring. - - - overlay('\x1234567890'::bytea placing '\002\003'::bytea from 2 for 3) - \x12020390 - - - - - - - position - - position ( substring bytea IN bytes bytea ) - integer - - - Returns first starting index of the specified - substring within - bytes, or zero if it's not present. - - - position('\x5678'::bytea in '\x1234567890'::bytea) - 3 - - - - - - - rtrim - - rtrim ( bytes bytea, - bytesremoved bytea ) - bytea - - - Removes the longest string containing only bytes appearing in - bytesremoved from the end of - bytes. - - - rtrim('\x1234567890'::bytea, '\x9012'::bytea) - \x12345678 - - - - - - - substring - - substring ( bytes bytea FROM start integer FOR count integer ) - bytea - - - Extracts the substring of bytes starting at - the start'th byte if that is specified, - and stopping after count bytes if that is - specified. Provide at least one of start - and count. - - - substring('\x1234567890'::bytea from 3 for 2) - \x5678 - - - - - - - trim - - trim ( LEADING | TRAILING | BOTH - bytesremoved bytea FROM - bytes bytea ) - bytea - - - Removes the longest string containing only bytes appearing in - bytesremoved from the start, - end, or both ends (BOTH is the default) - of bytes. - - - trim('\x9012'::bytea from '\x1234567890'::bytea) - \x345678 - - - - - - trim ( LEADING | TRAILING | BOTH FROM - bytes bytea, - bytesremoved bytea ) - bytea - - - This is a non-standard syntax for trim(). - - - trim(both from '\x1234567890'::bytea, '\x9012'::bytea) - \x345678 - - - - -
- - - Additional binary string manipulation functions are available and - are listed in . Some - of them are used internally to implement the - SQL-standard string functions listed in . - - - - Other Binary String Functions - - - - - Function - - - Description - - - Example(s) - - - - - - - - - bit_count - - - popcount - bit_count - - bit_count ( bytes bytea ) - bigint - - - Returns the number of bits set in the binary string (also known as - popcount). - - - bit_count('\x1234567890'::bytea) - 15 - - - - - - - crc32 - - crc32 ( bytea ) - bigint - - - Computes the CRC-32 value of the binary string. - - - crc32('abc'::bytea) - 891568578 - - - - - - - crc32c - - crc32c ( bytea ) - bigint - - - Computes the CRC-32C value of the binary string. - - - crc32c('abc'::bytea) - 910901175 - - - - - - - get_bit - - get_bit ( bytes bytea, - n bigint ) - integer - - - Extracts n'th bit - from binary string. - - - get_bit('\x1234567890'::bytea, 30) - 1 - - - - - - - get_byte - - get_byte ( bytes bytea, - n integer ) - integer - - - Extracts n'th byte - from binary string. - - - get_byte('\x1234567890'::bytea, 4) - 144 - - - - - - - length - - - binary string - length - - - length - of a binary string - binary strings, length - - length ( bytea ) - integer - - - Returns the number of bytes in the binary string. - - - length('\x1234567890'::bytea) - 5 - - - - - - length ( bytes bytea, - encoding name ) - integer - - - Returns the number of characters in the binary string, assuming - that it is text in the given encoding. - - - length('jose'::bytea, 'UTF8') - 4 - - - - - - - md5 - - md5 ( bytea ) - text - - - Computes the MD5 hash of - the binary string, with the result written in hexadecimal. - - - md5('Th\000omas'::bytea) - 8ab2d3c9689aaf18&zwsp;b4958c334c82d8b1 - - - - - - - reverse - - reverse ( bytea ) - bytea - - - Reverses the order of the bytes in the binary string. - - - reverse('\xabcd'::bytea) - \xcdab - - - - - - - set_bit - - set_bit ( bytes bytea, - n bigint, - newvalue integer ) - bytea - - - Sets n'th bit in - binary string to newvalue. - - - set_bit('\x1234567890'::bytea, 30, 0) - \x1234563890 - - - - - - - set_byte - - set_byte ( bytes bytea, - n integer, - newvalue integer ) - bytea - - - Sets n'th byte in - binary string to newvalue. - - - set_byte('\x1234567890'::bytea, 4, 64) - \x1234567840 - - - - - - - sha224 - - sha224 ( bytea ) - bytea - - - Computes the SHA-224 hash - of the binary string. - - - sha224('abc'::bytea) - \x23097d223405d8228642a477bda2&zwsp;55b32aadbce4bda0b3f7e36c9da7 - - - - - - - sha256 - - sha256 ( bytea ) - bytea - - - Computes the SHA-256 hash - of the binary string. - - - sha256('abc'::bytea) - \xba7816bf8f01cfea414140de5dae2223&zwsp;b00361a396177a9cb410ff61f20015ad - - - - - - - sha384 - - sha384 ( bytea ) - bytea - - - Computes the SHA-384 hash - of the binary string. - - - sha384('abc'::bytea) - \xcb00753f45a35e8bb5a03d699ac65007&zwsp;272c32ab0eded1631a8b605a43ff5bed&zwsp;8086072ba1e7cc2358baeca134c825a7 - - - - - - - sha512 - - sha512 ( bytea ) - bytea - - - Computes the SHA-512 hash - of the binary string. - - - sha512('abc'::bytea) - \xddaf35a193617abacc417349ae204131&zwsp;12e6fa4e89a97ea20a9eeee64b55d39a&zwsp;2192992a274fc1a836ba3c23a3feebbd&zwsp;454d4423643ce80e2a9ac94fa54ca49f - - - - - - - substr - - substr ( bytes bytea, start integer , count integer ) - bytea - - - Extracts the substring of bytes starting at - the start'th byte, - and extending for count bytes if that is - specified. (Same - as substring(bytes - from start - for count).) - - - substr('\x1234567890'::bytea, 3, 2) - \x5678 - - - - -
- - - Functions get_byte and set_byte - number the first byte of a binary string as byte 0. - Functions get_bit and set_bit - number bits from the right within each byte; for example bit 0 is the least - significant bit of the first byte, and bit 15 is the most significant bit - of the second byte. - - - - For historical reasons, the function md5 - returns a hex-encoded value of type text whereas the SHA-2 - functions return type bytea. Use the functions - encode - and decode to - convert between the two. For example write encode(sha256('abc'), - 'hex') to get a hex-encoded text representation, - or decode(md5('abc'), 'hex') to get - a bytea value. - - - - - character string - converting to binary string - - - binary string - converting to character string - - Functions for converting strings between different character sets - (encodings), and for representing arbitrary binary data in textual - form, are shown in - . For these - functions, an argument or result of type text is expressed - in the database's default encoding, while arguments or results of - type bytea are in an encoding named by another argument. - - - - Text/Binary String Conversion Functions - - - - - Function - - - Description - - - Example(s) - - - - - - - - - convert - - convert ( bytes bytea, - src_encoding name, - dest_encoding name ) - bytea - - - Converts a binary string representing text in - encoding src_encoding - to a binary string in encoding dest_encoding - (see for - available conversions). - - - convert('text_in_utf8', 'UTF8', 'LATIN1') - \x746578745f696e5f75746638 - - - - - - - convert_from - - convert_from ( bytes bytea, - src_encoding name ) - text - - - Converts a binary string representing text in - encoding src_encoding - to text in the database encoding - (see for - available conversions). - - - convert_from('text_in_utf8', 'UTF8') - text_in_utf8 - - - - - - - convert_to - - convert_to ( string text, - dest_encoding name ) - bytea - - - Converts a text string (in the database encoding) to a - binary string encoded in encoding dest_encoding - (see for - available conversions). - - - convert_to('some_text', 'UTF8') - \x736f6d655f74657874 - - - - - - - encode - - encode ( bytes bytea, - format text ) - text - - - Encodes binary data into a textual representation; supported - format values are: - base64, - escape, - hex. - - - encode('123\000\001', 'base64') - MTIzAAE= - - - - - - - decode - - decode ( string text, - format text ) - bytea - - - Decodes binary data from a textual representation; supported - format values are the same as - for encode. - - - decode('MTIzAAE=', 'base64') - \x3132330001 - - - - -
- - - The encode and decode - functions support the following textual formats: - - - - base64 - - base64 format - - - - The base64 format is that - of RFC - 2045 Section 6.8. As per the RFC, encoded lines are - broken at 76 characters. However instead of the MIME CRLF - end-of-line marker, only a newline is used for end-of-line. - The decode function ignores carriage-return, - newline, space, and tab characters. Otherwise, an error is - raised when decode is supplied invalid - base64 data — including when trailing padding is incorrect. - - - - - - escape - - escape format - - - - The escape format converts zero bytes and - bytes with the high bit set into octal escape sequences - (\nnn), and it doubles - backslashes. Other byte values are represented literally. - The decode function will raise an error if a - backslash is not followed by either a second backslash or three - octal digits; it accepts other byte values unchanged. - - - - - - hex - - hex format - - - - The hex format represents each 4 bits of - data as one hexadecimal digit, 0 - through f, writing the higher-order digit of - each byte first. The encode function outputs - the a-f hex digits in lower - case. Because the smallest unit of data is 8 bits, there are - always an even number of characters returned - by encode. - The decode function - accepts the a-f characters in - either upper or lower case. An error is raised - when decode is given invalid hex data - — including when given an odd number of characters. - - - - - - - - In addition, it is possible to cast integral values to and from type - bytea. Casting an integer to bytea produces - 2, 4, or 8 bytes, depending on the width of the integer type. The result - is the two's complement representation of the integer, with the most - significant byte first. Some examples: - -1234::smallint::bytea \x04d2 -cast(1234 as bytea) \x000004d2 -cast(-1234 as bytea) \xfffffb2e -'\x8000'::bytea::smallint -32768 -'\x8000'::bytea::integer 32768 - - Casting a bytea to an integer will raise an error if the - length of the bytea exceeds the width of the integer type. - - - - See also the aggregate function string_agg in - and the large object functions - in . - -
- - - - Bit String Functions and Operators - - - bit strings - functions - - - - This section describes functions and operators for examining and - manipulating bit strings, that is values of the types - bit and bit varying. (While only - type bit is mentioned in these tables, values of - type bit varying can be used interchangeably.) - Bit strings support the usual comparison operators shown in - , as well as the - operators shown in . - - - - Bit String Operators - - - - - Operator - - - Description - - - Example(s) - - - - - - - - bit || bit - bit - - - Concatenation - - - B'10001' || B'011' - 10001011 - - - - - - bit & bit - bit - - - Bitwise AND (inputs must be of equal length) - - - B'10001' & B'01101' - 00001 - - - - - - bit | bit - bit - - - Bitwise OR (inputs must be of equal length) - - - B'10001' | B'01101' - 11101 - - - - - - bit # bit - bit - - - Bitwise exclusive OR (inputs must be of equal length) - - - B'10001' # B'01101' - 11100 - - - - - - ~ bit - bit - - - Bitwise NOT - - - ~ B'10001' - 01110 - - - - - - bit << integer - bit - - - Bitwise shift left - (string length is preserved) - - - B'10001' << 3 - 01000 - - - - - - bit >> integer - bit - - - Bitwise shift right - (string length is preserved) - - - B'10001' >> 2 - 00100 - - - - -
- - - Some of the functions available for binary strings are also available - for bit strings, as shown in . - - - - Bit String Functions - - - - - Function - - - Description - - - Example(s) - - - - - - - - - bit_count - - bit_count ( bit ) - bigint - - - Returns the number of bits set in the bit string (also known as - popcount). - - - bit_count(B'10111') - 4 - - - - - - - bit_length - - bit_length ( bit ) - integer - - - Returns number of bits in the bit string. - - - bit_length(B'10111') - 5 - - - - - - - length - - - bit string - length - - length ( bit ) - integer - - - Returns number of bits in the bit string. - - - length(B'10111') - 5 - - - - - - - octet_length - - octet_length ( bit ) - integer - - - Returns number of bytes in the bit string. - - - octet_length(B'1011111011') - 2 - - - - - - - overlay - - overlay ( bits bit PLACING newsubstring bit FROM start integer FOR count integer ) - bit - - - Replaces the substring of bits that starts at - the start'th bit and extends - for count bits - with newsubstring. - If count is omitted, it defaults to the length - of newsubstring. - - - overlay(B'01010101010101010' placing B'11111' from 2 for 3) - 0111110101010101010 - - - - - - - position - - position ( substring bit IN bits bit ) - integer - - - Returns first starting index of the specified substring - within bits, or zero if it's not present. - - - position(B'010' in B'000001101011') - 8 - - - - - - - substring - - substring ( bits bit FROM start integer FOR count integer ) - bit - - - Extracts the substring of bits starting at - the start'th bit if that is specified, - and stopping after count bits if that is - specified. Provide at least one of start - and count. - - - substring(B'110010111111' from 3 for 2) - 00 - - - - - - - get_bit - - get_bit ( bits bit, - n integer ) - integer - - - Extracts n'th bit - from bit string; the first (leftmost) bit is bit 0. - - - get_bit(B'101010101010101010', 6) - 1 - - - - - - - set_bit - - set_bit ( bits bit, - n integer, - newvalue integer ) - bit - - - Sets n'th bit in - bit string to newvalue; - the first (leftmost) bit is bit 0. - - - set_bit(B'101010101010101010', 6, 0) - 101010001010101010 - - - - -
- - - In addition, it is possible to cast integral values to and from type - bit. - Casting an integer to bit(n) copies the rightmost - n bits. Casting an integer to a bit string width wider - than the integer itself will sign-extend on the left. - Some examples: - -44::bit(10) 0000101100 -44::bit(3) 100 -cast(-44 as bit(12)) 111111010100 -'1110'::bit(4)::integer 14 - - Note that casting to just bit means casting to - bit(1), and so will deliver only the least significant - bit of the integer. - -
- - - - Pattern Matching - - - pattern matching - - - - There are three separate approaches to pattern matching provided - by PostgreSQL: the traditional - SQL LIKE operator, the - more recent SIMILAR TO operator (added in - SQL:1999), and POSIX-style regular - expressions. Aside from the basic does this string match - this pattern? operators, functions are available to extract - or replace matching substrings and to split a string at matching - locations. - - - - - If you have pattern matching needs that go beyond this, - consider writing a user-defined function in Perl or Tcl. - - - - - - While most regular-expression searches can be executed very quickly, - regular expressions can be contrived that take arbitrary amounts of - time and memory to process. Be wary of accepting regular-expression - search patterns from hostile sources. If you must do so, it is - advisable to impose a statement timeout. - - - - Searches using SIMILAR TO patterns have the same - security hazards, since SIMILAR TO provides many - of the same capabilities as POSIX-style regular - expressions. - - - - LIKE searches, being much simpler than the other - two options, are safer to use with possibly-hostile pattern sources. - - - - - SIMILAR TO and POSIX-style regular - expressions do not support nondeterministic collations. If required, use - LIKE or apply a different collation to the expression - to work around this limitation. - - - - <function>LIKE</function> - - - LIKE - - - -string LIKE pattern ESCAPE escape-character -string NOT LIKE pattern ESCAPE escape-character - - - - The LIKE expression returns true if the - string matches the supplied - pattern. (As - expected, the NOT LIKE expression returns - false if LIKE returns true, and vice versa. - An equivalent expression is - NOT (string LIKE - pattern).) - - - - If pattern does not contain percent - signs or underscores, then the pattern only represents the string - itself; in that case LIKE acts like the - equals operator. An underscore (_) in - pattern stands for (matches) any single - character; a percent sign (%) matches any sequence - of zero or more characters. - - - - Some examples: - -'abc' LIKE 'abc' true -'abc' LIKE 'a%' true -'abc' LIKE '_b_' true -'abc' LIKE 'c' false - - - - - LIKE pattern matching supports nondeterministic - collations (see ), such as - case-insensitive collations or collations that, say, ignore punctuation. - So with a case-insensitive collation, one could have: - -'AbC' LIKE 'abc' COLLATE case_insensitive true -'AbC' LIKE 'a%' COLLATE case_insensitive true - - With collations that ignore certain characters or in general that consider - strings of different lengths equal, the semantics can become a bit more - complicated. Consider these examples: - -'.foo.' LIKE 'foo' COLLATE ign_punct true -'.foo.' LIKE 'f_o' COLLATE ign_punct true -'.foo.' LIKE '_oo' COLLATE ign_punct false - - The way the matching works is that the pattern is partitioned into - sequences of wildcards and non-wildcard strings (wildcards being - _ and %). For example, the pattern - f_o is partitioned into f, _, o, the - pattern _oo is partitioned into _, - oo. The input string matches the pattern if it can be - partitioned in such a way that the wildcards match one character or any - number of characters respectively and the non-wildcard partitions are - equal under the applicable collation. So for example, '.foo.' - LIKE 'f_o' COLLATE ign_punct is true because one can partition - .foo. into .f, o, o., and then - '.f' = 'f' COLLATE ign_punct, 'o' - matches the _ wildcard, and 'o.' = 'o' COLLATE - ign_punct. But '.foo.' LIKE '_oo' COLLATE - ign_punct is false because .foo. cannot be - partitioned in a way that the first character is any character and the - rest of the string compares equal to oo. (Note that - the single-character wildcard always matches exactly one character, - independent of the collation. So in this example, the - _ would match ., but then the rest - of the input string won't match the rest of the pattern.) - - - - LIKE pattern matching always covers the entire - string. Therefore, if it's desired to match a sequence anywhere within - a string, the pattern must start and end with a percent sign. - - - - To match a literal underscore or percent sign without matching - other characters, the respective character in - pattern must be - preceded by the escape character. The default escape - character is the backslash but a different one can be selected by - using the ESCAPE clause. To match the escape - character itself, write two escape characters. - - - - - If you have turned off, - any backslashes you write in literal string constants will need to be - doubled. See for more information. - - - - - It's also possible to select no escape character by writing - ESCAPE ''. This effectively disables the - escape mechanism, which makes it impossible to turn off the - special meaning of underscore and percent signs in the pattern. - - - - According to the SQL standard, omitting ESCAPE - means there is no escape character (rather than defaulting to a - backslash), and a zero-length ESCAPE value is - disallowed. PostgreSQL's behavior in - this regard is therefore slightly nonstandard. - - - - The key word ILIKE can be used instead of - LIKE to make the match case-insensitive according to the - active locale. (But this does not support nondeterministic collations.) - This is not in the SQL standard but is a - PostgreSQL extension. - - - - The operator ~~ is equivalent to - LIKE, and ~~* corresponds to - ILIKE. There are also - !~~ and !~~* operators that - represent NOT LIKE and NOT - ILIKE, respectively. All of these operators are - PostgreSQL-specific. You may see these - operator names in EXPLAIN output and similar - places, since the parser actually translates LIKE - et al. to these operators. - - - - The phrases LIKE, ILIKE, - NOT LIKE, and NOT ILIKE are - generally treated as operators - in PostgreSQL syntax; for example they can - be used in expression - operator ANY - (subquery) constructs, although - an ESCAPE clause cannot be included there. In some - obscure cases it may be necessary to use the underlying operator names - instead. - - - - Also see the starts-with operator ^@ and the - corresponding starts_with() function, which are - useful in cases where simply matching the beginning of a string is - needed. - - - - - - <function>SIMILAR TO</function> Regular Expressions - - - regular expression - - - - - SIMILAR TO - - - substring - - - -string SIMILAR TO pattern ESCAPE escape-character -string NOT SIMILAR TO pattern ESCAPE escape-character - - - - The SIMILAR TO operator returns true or - false depending on whether its pattern matches the given string. - It is similar to LIKE, except that it - interprets the pattern using the SQL standard's definition of a - regular expression. SQL regular expressions are a curious cross - between LIKE notation and common (POSIX) regular - expression notation. - - - - Like LIKE, the SIMILAR TO - operator succeeds only if its pattern matches the entire string; - this is unlike common regular expression behavior where the pattern - can match any part of the string. - Also like - LIKE, SIMILAR TO uses - _ and % as wildcard characters denoting - any single character and any string, respectively (these are - comparable to . and .* in POSIX regular - expressions). - - - - In addition to these facilities borrowed from LIKE, - SIMILAR TO supports these pattern-matching - metacharacters borrowed from POSIX regular expressions: - - - - - | denotes alternation (either of two alternatives). - - - - - * denotes repetition of the previous item zero - or more times. - - - - - + denotes repetition of the previous item one - or more times. - - - - - ? denotes repetition of the previous item zero - or one time. - - - - - {m} denotes repetition - of the previous item exactly m times. - - - - - {m,} denotes repetition - of the previous item m or more times. - - - - - {m,n} - denotes repetition of the previous item at least m and - not more than n times. - - - - - Parentheses () can be used to group items into - a single logical item. - - - - - A bracket expression [...] specifies a character - class, just as in POSIX regular expressions. - - - - - Notice that the period (.) is not a metacharacter - for SIMILAR TO. - - - - As with LIKE, a backslash disables the special - meaning of any of these metacharacters. A different escape character - can be specified with ESCAPE, or the escape - capability can be disabled by writing ESCAPE ''. - - - - According to the SQL standard, omitting ESCAPE - means there is no escape character (rather than defaulting to a - backslash), and a zero-length ESCAPE value is - disallowed. PostgreSQL's behavior in - this regard is therefore slightly nonstandard. - - - - Another nonstandard extension is that following the escape character - with a letter or digit provides access to the escape sequences - defined for POSIX regular expressions; see - , - , and - below. - - - - Some examples: - -'abc' SIMILAR TO 'abc' true -'abc' SIMILAR TO 'a' false -'abc' SIMILAR TO '%(b|d)%' true -'abc' SIMILAR TO '(b|c)%' false -'-abc-' SIMILAR TO '%\mabc\M%' true -'xabcy' SIMILAR TO '%\mabc\M%' false - - - - - The substring function with three parameters - provides extraction of a substring that matches an SQL - regular expression pattern. The function can be written according - to standard SQL syntax: - -substring(string similar pattern escape escape-character) - - or using the now obsolete SQL:1999 syntax: - -substring(string from pattern for escape-character) - - or as a plain three-argument function: - -substring(string, pattern, escape-character) - - As with SIMILAR TO, the - specified pattern must match the entire data string, or else the - function fails and returns null. To indicate the part of the - pattern for which the matching data sub-string is of interest, - the pattern should contain - two occurrences of the escape character followed by a double quote - ("). - The text matching the portion of the pattern - between these separators is returned when the match is successful. - - - - The escape-double-quote separators actually - divide substring's pattern into three independent - regular expressions; for example, a vertical bar (|) - in any of the three sections affects only that section. Also, the first - and third of these regular expressions are defined to match the smallest - possible amount of text, not the largest, when there is any ambiguity - about how much of the data string matches which pattern. (In POSIX - parlance, the first and third regular expressions are forced to be - non-greedy.) - - - - As an extension to the SQL standard, PostgreSQL - allows there to be just one escape-double-quote separator, in which case - the third regular expression is taken as empty; or no separators, in which - case the first and third regular expressions are taken as empty. - - - - Some examples, with #" delimiting the return string: - -substring('foobar' similar '%#"o_b#"%' escape '#') oob -substring('foobar' similar '#"o_b#"%' escape '#') NULL - - - - - - <acronym>POSIX</acronym> Regular Expressions - - - regular expression - pattern matching - - - substring - - - regexp_count - - - regexp_instr - - - regexp_like - - - regexp_match - - - regexp_matches - - - regexp_replace - - - regexp_split_to_table - - - regexp_split_to_array - - - regexp_substr - - - - lists the available - operators for pattern matching using POSIX regular expressions. - - - - Regular Expression Match Operators - - - - - - Operator - - - Description - - - Example(s) - - - - - - - - text ~ text - boolean - - - String matches regular expression, case sensitively - - - 'thomas' ~ 't.*ma' - t - - - - - - text ~* text - boolean - - - String matches regular expression, case-insensitively - - - 'thomas' ~* 'T.*ma' - t - - - - - - text !~ text - boolean - - - String does not match regular expression, case sensitively - - - 'thomas' !~ 't.*max' - t - - - - - - text !~* text - boolean - - - String does not match regular expression, case-insensitively - - - 'thomas' !~* 'T.*ma' - f - - - - -
- - - POSIX regular expressions provide a more - powerful means for pattern matching than the LIKE and - SIMILAR TO operators. - Many Unix tools such as egrep, - sed, or awk use a pattern - matching language that is similar to the one described here. - - - - A regular expression is a character sequence that is an - abbreviated definition of a set of strings (a regular - set). A string is said to match a regular expression - if it is a member of the regular set described by the regular - expression. As with LIKE, pattern characters - match string characters exactly unless they are special characters - in the regular expression language — but regular expressions use - different special characters than LIKE does. - Unlike LIKE patterns, a - regular expression is allowed to match anywhere within a string, unless - the regular expression is explicitly anchored to the beginning or - end of the string. - - - - Some examples: - -'abcd' ~ 'bc' true -'abcd' ~ 'a.c' true — dot matches any character -'abcd' ~ 'a.*d' true — * repeats the preceding pattern item -'abcd' ~ '(b|x)' true — | means OR, parentheses group -'abcd' ~ '^a' true — ^ anchors to start of string -'abcd' ~ '^(b|c)' false — would match except for anchoring - - - - - The POSIX pattern language is described in much - greater detail below. - - - - The substring function with two parameters, - substring(string from - pattern), provides extraction of a - substring - that matches a POSIX regular expression pattern. It returns null if - there is no match, otherwise the first portion of the text that matched the - pattern. But if the pattern contains any parentheses, the portion - of the text that matched the first parenthesized subexpression (the - one whose left parenthesis comes first) is - returned. You can put parentheses around the whole expression - if you want to use parentheses within it without triggering this - exception. If you need parentheses in the pattern before the - subexpression you want to extract, see the non-capturing parentheses - described below. - - - - Some examples: - -substring('foobar' from 'o.b') oob -substring('foobar' from 'o(.)b') o - - - - - The regexp_count function counts the number of - places where a POSIX regular expression pattern matches a string. - It has the syntax - regexp_count(string, - pattern - , start - , flags - ). - pattern is searched for - in string, normally from the beginning of - the string, but if the start parameter is - provided then beginning from that character index. - The flags parameter is an optional text - string containing zero or more single-letter flags that change the - function's behavior. For example, including i in - flags specifies case-insensitive matching. - Supported flags are described in - . - - - - Some examples: - -regexp_count('ABCABCAXYaxy', 'A.') 3 -regexp_count('ABCABCAXYaxy', 'A.', 1, 'i') 4 - - - - - The regexp_instr function returns the starting or - ending position of the N'th match of a - POSIX regular expression pattern to a string, or zero if there is no - such match. It has the syntax - regexp_instr(string, - pattern - , start - , N - , endoption - , flags - , subexpr - ). - pattern is searched for - in string, normally from the beginning of - the string, but if the start parameter is - provided then beginning from that character index. - If N is specified - then the N'th match of the pattern - is located, otherwise the first match is located. - If the endoption parameter is omitted or - specified as zero, the function returns the position of the first - character of the match. Otherwise, endoption - must be one, and the function returns the position of the character - following the match. - The flags parameter is an optional text - string containing zero or more single-letter flags that change the - function's behavior. Supported flags are described - in . - For a pattern containing parenthesized - subexpressions, subexpr is an integer - indicating which subexpression is of interest: the result identifies - the position of the substring matching that subexpression. - Subexpressions are numbered in the order of their leading parentheses. - When subexpr is omitted or zero, the result - identifies the position of the whole match regardless of - parenthesized subexpressions. - - - - Some examples: - -regexp_instr('number of your street, town zip, FR', '[^,]+', 1, 2) - 23 -regexp_instr(string=>'ABCDEFGHI', pattern=>'(c..)(...)', start=>1, "N"=>1, endoption=>0, flags=>'i', subexpr=>2) - 6 - - - - - The regexp_like function checks whether a match - of a POSIX regular expression pattern occurs within a string, - returning boolean true or false. It has the syntax - regexp_like(string, - pattern - , flags ). - The flags parameter is an optional text - string containing zero or more single-letter flags that change the - function's behavior. Supported flags are described - in . - This function has the same results as the ~ - operator if no flags are specified. If only the i - flag is specified, it has the same results as - the ~* operator. - - - - Some examples: - -regexp_like('Hello World', 'world') false -regexp_like('Hello World', 'world', 'i') true - - - - - The regexp_match function returns a text array of - matching substring(s) within the first match of a POSIX - regular expression pattern to a string. It has the syntax - regexp_match(string, - pattern , flags ). - If there is no match, the result is NULL. - If a match is found, and the pattern contains no - parenthesized subexpressions, then the result is a single-element text - array containing the substring matching the whole pattern. - If a match is found, and the pattern contains - parenthesized subexpressions, then the result is a text array - whose n'th element is the substring matching - the n'th parenthesized subexpression of - the pattern (not counting non-capturing - parentheses; see below for details). - The flags parameter is an optional text string - containing zero or more single-letter flags that change the function's - behavior. Supported flags are described - in . - - - - Some examples: - -SELECT regexp_match('foobarbequebaz', 'bar.*que'); - regexp_match --------------- - {barbeque} -(1 row) - -SELECT regexp_match('foobarbequebaz', '(bar)(beque)'); - regexp_match --------------- - {bar,beque} -(1 row) - - - - - - In the common case where you just want the whole matching substring - or NULL for no match, the best solution is to - use regexp_substr(). - However, regexp_substr() only exists - in PostgreSQL version 15 and up. When - working in older versions, you can extract the first element - of regexp_match()'s result, for example: - -SELECT (regexp_match('foobarbequebaz', 'bar.*que'))[1]; - regexp_match --------------- - barbeque -(1 row) - - - - - - The regexp_matches function returns a set of text arrays - of matching substring(s) within matches of a POSIX regular - expression pattern to a string. It has the same syntax as - regexp_match. - This function returns no rows if there is no match, one row if there is - a match and the g flag is not given, or N - rows if there are N matches and the g flag - is given. Each returned row is a text array containing the whole - matched substring or the substrings matching parenthesized - subexpressions of the pattern, just as described above - for regexp_match. - regexp_matches accepts all the flags shown - in , plus - the g flag which commands it to return all matches, not - just the first one. - - - - Some examples: - -SELECT regexp_matches('foo', 'not there'); - regexp_matches ----------------- -(0 rows) - -SELECT regexp_matches('foobarbequebazilbarfbonk', '(b[^b]+)(b[^b]+)', 'g'); - regexp_matches ----------------- - {bar,beque} - {bazil,barf} -(2 rows) - - - - - - In most cases regexp_matches() should be used with - the g flag, since if you only want the first match, it's - easier and more efficient to use regexp_match(). - However, regexp_match() only exists - in PostgreSQL version 10 and up. When working in older - versions, a common trick is to place a regexp_matches() - call in a sub-select, for example: - -SELECT col1, (SELECT regexp_matches(col2, '(bar)(beque)')) FROM tab; - - This produces a text array if there's a match, or NULL if - not, the same as regexp_match() would do. Without the - sub-select, this query would produce no output at all for table rows - without a match, which is typically not the desired behavior. - - - - - The regexp_replace function provides substitution of - new text for substrings that match POSIX regular expression patterns. - It has the syntax - regexp_replace(string, - pattern, replacement - , flags ) - or - regexp_replace(string, - pattern, replacement, - start - , N - , flags ). - The source string is returned unchanged if - there is no match to the pattern. If there is a - match, the string is returned with the - replacement string substituted for the matching - substring. The replacement string can contain - \n, where n is 1 - through 9, to indicate that the source substring matching the - n'th parenthesized subexpression of the pattern should be - inserted, and it can contain \& to indicate that the - substring matching the entire pattern should be inserted. Write - \\ if you need to put a literal backslash in the replacement - text. - pattern is searched for - in string, normally from the beginning of - the string, but if the start parameter is - provided then beginning from that character index. - By default, only the first match of the pattern is replaced. - If N is specified and is greater than zero, - then the N'th match of the pattern - is replaced. - If the g flag is given, or - if N is specified and is zero, then all - matches at or after the start position are - replaced. (The g flag is ignored - when N is specified.) - The flags parameter is an optional text - string containing zero or more single-letter flags that change the - function's behavior. Supported flags (though - not g) are - described in . - - - - Some examples: - -regexp_replace('foobarbaz', 'b..', 'X') - fooXbaz -regexp_replace('foobarbaz', 'b..', 'X', 'g') - fooXX -regexp_replace('foobarbaz', 'b(..)', 'X\1Y', 'g') - fooXarYXazY -regexp_replace('A PostgreSQL function', 'a|e|i|o|u', 'X', 1, 0, 'i') - X PXstgrXSQL fXnctXXn -regexp_replace(string=>'A PostgreSQL function', pattern=>'a|e|i|o|u', replacement=>'X', start=>1, "N"=>3, flags=>'i') - A PostgrXSQL function - - - - - The regexp_split_to_table function splits a string using a POSIX - regular expression pattern as a delimiter. It has the syntax - regexp_split_to_table(string, pattern - , flags ). - If there is no match to the pattern, the function returns the - string. If there is at least one match, for each match it returns - the text from the end of the last match (or the beginning of the string) - to the beginning of the match. When there are no more matches, it - returns the text from the end of the last match to the end of the string. - The flags parameter is an optional text string containing - zero or more single-letter flags that change the function's behavior. - regexp_split_to_table supports the flags described in - . - - - - The regexp_split_to_array function behaves the same as - regexp_split_to_table, except that regexp_split_to_array - returns its result as an array of text. It has the syntax - regexp_split_to_array(string, pattern - , flags ). - The parameters are the same as for regexp_split_to_table. - - - - Some examples: - -SELECT foo FROM regexp_split_to_table('the quick brown fox jumps over the lazy dog', '\s+') AS foo; - foo -------- - the - quick - brown - fox - jumps - over - the - lazy - dog -(9 rows) - -SELECT regexp_split_to_array('the quick brown fox jumps over the lazy dog', '\s+'); - regexp_split_to_array ------------------------------------------------ - {the,quick,brown,fox,jumps,over,the,lazy,dog} -(1 row) - -SELECT foo FROM regexp_split_to_table('the quick brown fox', '\s*') AS foo; - foo ------ - t - h - e - q - u - i - c - k - b - r - o - w - n - f - o - x -(16 rows) - - - - - As the last example demonstrates, the regexp split functions ignore - zero-length matches that occur at the start or end of the string - or immediately after a previous match. This is contrary to the strict - definition of regexp matching that is implemented by - the other regexp functions, but is usually the most convenient behavior - in practice. Other software systems such as Perl use similar definitions. - - - - The regexp_substr function returns the substring - that matches a POSIX regular expression pattern, - or NULL if there is no match. It has the syntax - regexp_substr(string, - pattern - , start - , N - , flags - , subexpr - ). - pattern is searched for - in string, normally from the beginning of - the string, but if the start parameter is - provided then beginning from that character index. - If N is specified - then the N'th match of the pattern - is returned, otherwise the first match is returned. - The flags parameter is an optional text - string containing zero or more single-letter flags that change the - function's behavior. Supported flags are described - in . - For a pattern containing parenthesized - subexpressions, subexpr is an integer - indicating which subexpression is of interest: the result is the - substring matching that subexpression. - Subexpressions are numbered in the order of their leading parentheses. - When subexpr is omitted or zero, the result - is the whole match regardless of parenthesized subexpressions. - - - - Some examples: - -regexp_substr('number of your street, town zip, FR', '[^,]+', 1, 2) - town zip -regexp_substr('ABCDEFGHI', '(c..)(...)', 1, 1, 'i', 2) - FGH - - - - - - - Regular Expression Details - - - PostgreSQL's regular expressions are implemented - using a software package written by Henry Spencer. Much of - the description of regular expressions below is copied verbatim from his - manual. - - - - Regular expressions (REs), as defined in - POSIX 1003.2, come in two forms: - extended REs or EREs - (roughly those of egrep), and - basic REs or BREs - (roughly those of ed). - PostgreSQL supports both forms, and - also implements some extensions - that are not in the POSIX standard, but have become widely used - due to their availability in programming languages such as Perl and Tcl. - REs using these non-POSIX extensions are called - advanced REs or AREs - in this documentation. AREs are almost an exact superset of EREs, - but BREs have several notational incompatibilities (as well as being - much more limited). - We first describe the ARE and ERE forms, noting features that apply - only to AREs, and then describe how BREs differ. - - - - - PostgreSQL always initially presumes that a regular - expression follows the ARE rules. However, the more limited ERE or - BRE rules can be chosen by prepending an embedded option - to the RE pattern, as described in . - This can be useful for compatibility with applications that expect - exactly the POSIX 1003.2 rules. - - - - - A regular expression is defined as one or more - branches, separated by - |. It matches anything that matches one of the - branches. - - - - A branch is zero or more quantified atoms or - constraints, concatenated. - It matches a match for the first, followed by a match for the second, etc.; - an empty branch matches the empty string. - - - - A quantified atom is an atom possibly followed - by a single quantifier. - Without a quantifier, it matches a match for the atom. - With a quantifier, it can match some number of matches of the atom. - An atom can be any of the possibilities - shown in . - The possible quantifiers and their meanings are shown in - . - - - - A constraint matches an empty string, but matches only when - specific conditions are met. A constraint can be used where an atom - could be used, except it cannot be followed by a quantifier. - The simple constraints are shown in - ; - some more constraints are described later. - - - - - Regular Expression Atoms - - - - - Atom - Description - - - - - - (re) - (where re is any regular expression) - matches a match for - re, with the match noted for possible reporting - - - - (?:re) - as above, but the match is not noted for reporting - (a non-capturing set of parentheses) - (AREs only) - - - - . - matches any single character - - - - [chars] - a bracket expression, - matching any one of the chars (see - for more detail) - - - - \k - (where k is a non-alphanumeric character) - matches that character taken as an ordinary character, - e.g., \\ matches a backslash character - - - - \c - where c is alphanumeric - (possibly followed by other characters) - is an escape, see - (AREs only; in EREs and BREs, this matches c) - - - - { - when followed by a character other than a digit, - matches the left-brace character {; - when followed by a digit, it is the beginning of a - bound (see below) - - - - x - where x is a single character with no other - significance, matches that character - - - -
- - - An RE cannot end with a backslash (\). - - - - - If you have turned off, - any backslashes you write in literal string constants will need to be - doubled. See for more information. - - - - - Regular Expression Quantifiers - - - - - Quantifier - Matches - - - - - - * - a sequence of 0 or more matches of the atom - - - - + - a sequence of 1 or more matches of the atom - - - - ? - a sequence of 0 or 1 matches of the atom - - - - {m} - a sequence of exactly m matches of the atom - - - - {m,} - a sequence of m or more matches of the atom - - - - - {m,n} - a sequence of m through n - (inclusive) matches of the atom; m cannot exceed - n - - - - *? - non-greedy version of * - - - - +? - non-greedy version of + - - - - ?? - non-greedy version of ? - - - - {m}? - non-greedy version of {m} - - - - {m,}? - non-greedy version of {m,} - - - - - {m,n}? - non-greedy version of {m,n} - - - -
- - - The forms using {...} - are known as bounds. - The numbers m and n within a bound are - unsigned decimal integers with permissible values from 0 to 255 inclusive. - - - - Non-greedy quantifiers (available in AREs only) match the - same possibilities as their corresponding normal (greedy) - counterparts, but prefer the smallest number rather than the largest - number of matches. - See for more detail. - - - - - A quantifier cannot immediately follow another quantifier, e.g., - ** is invalid. - A quantifier cannot - begin an expression or subexpression or follow - ^ or |. - - - - - Regular Expression Constraints - - - - - Constraint - Description - - - - - - ^ - matches at the beginning of the string - - - - $ - matches at the end of the string - - - - (?=re) - positive lookahead matches at any point - where a substring matching re begins - (AREs only) - - - - (?!re) - negative lookahead matches at any point - where no substring matching re begins - (AREs only) - - - - (?<=re) - positive lookbehind matches at any point - where a substring matching re ends - (AREs only) - - - - (?<!re) - negative lookbehind matches at any point - where no substring matching re ends - (AREs only) - - - -
- - - Lookahead and lookbehind constraints cannot contain back - references (see ), - and all parentheses within them are considered non-capturing. - -
- - - Bracket Expressions - - - A bracket expression is a list of - characters enclosed in []. It normally matches - any single character from the list (but see below). If the list - begins with ^, it matches any single character - not from the rest of the list. - If two characters - in the list are separated by -, this is - shorthand for the full range of characters between those two - (inclusive) in the collating sequence, - e.g., [0-9] in ASCII matches - any decimal digit. It is illegal for two ranges to share an - endpoint, e.g., a-c-e. Ranges are very - collating-sequence-dependent, so portable programs should avoid - relying on them. - - - - To include a literal ] in the list, make it the - first character (after ^, if that is used). To - include a literal -, make it the first or last - character, or the second endpoint of a range. To use a literal - - as the first endpoint of a range, enclose it - in [. and .] to make it a - collating element (see below). With the exception of these characters, - some combinations using [ - (see next paragraphs), and escapes (AREs only), all other special - characters lose their special significance within a bracket expression. - In particular, \ is not special when following - ERE or BRE rules, though it is special (as introducing an escape) - in AREs. - - - - Within a bracket expression, a collating element (a character, a - multiple-character sequence that collates as if it were a single - character, or a collating-sequence name for either) enclosed in - [. and .] stands for the - sequence of characters of that collating element. The sequence is - treated as a single element of the bracket expression's list. This - allows a bracket - expression containing a multiple-character collating element to - match more than one character, e.g., if the collating sequence - includes a ch collating element, then the RE - [[.ch.]]*c matches the first five characters of - chchcc. - - - - - PostgreSQL currently does not support multi-character collating - elements. This information describes possible future behavior. - - - - - Within a bracket expression, a collating element enclosed in - [= and =] is an equivalence - class, standing for the sequences of characters of all collating - elements equivalent to that one, including itself. (If there are - no other equivalent collating elements, the treatment is as if the - enclosing delimiters were [. and - .].) For example, if o and - ^ are the members of an equivalence class, then - [[=o=]], [[=^=]], and - [o^] are all synonymous. An equivalence class - cannot be an endpoint of a range. - - - - Within a bracket expression, the name of a character class - enclosed in [: and :] stands - for the list of all characters belonging to that class. A character - class cannot be used as an endpoint of a range. - The POSIX standard defines these character class - names: - alnum (letters and numeric digits), - alpha (letters), - blank (space and tab), - cntrl (control characters), - digit (numeric digits), - graph (printable characters except space), - lower (lower-case letters), - print (printable characters including space), - punct (punctuation), - space (any white space), - upper (upper-case letters), - and xdigit (hexadecimal digits). - The behavior of these standard character classes is generally - consistent across platforms for characters in the 7-bit ASCII set. - Whether a given non-ASCII character is considered to belong to one - of these classes depends on the collation - that is used for the regular-expression function or operator - (see ), or by default on the - database's LC_CTYPE locale setting (see - ). The classification of non-ASCII - characters can vary across platforms even in similarly-named - locales. (But the C locale never considers any - non-ASCII characters to belong to any of these classes.) - In addition to these standard character - classes, PostgreSQL defines - the word character class, which is the same as - alnum plus the underscore (_) - character, and - the ascii character class, which contains exactly - the 7-bit ASCII set. - - - - There are two special cases of bracket expressions: the bracket - expressions [[:<:]] and - [[:>:]] are constraints, - matching empty strings at the beginning - and end of a word respectively. A word is defined as a sequence - of word characters that is neither preceded nor followed by word - characters. A word character is any character belonging to the - word character class, that is, any letter, digit, - or underscore. This is an extension, compatible with but not - specified by POSIX 1003.2, and should be used with - caution in software intended to be portable to other systems. - The constraint escapes described below are usually preferable; they - are no more standard, but are easier to type. - - - - - Regular Expression Escapes - - - Escapes are special sequences beginning with \ - followed by an alphanumeric character. Escapes come in several varieties: - character entry, class shorthands, constraint escapes, and back references. - A \ followed by an alphanumeric character but not constituting - a valid escape is illegal in AREs. - In EREs, there are no escapes: outside a bracket expression, - a \ followed by an alphanumeric character merely stands for - that character as an ordinary character, and inside a bracket expression, - \ is an ordinary character. - (The latter is the one actual incompatibility between EREs and AREs.) - - - - Character-entry escapes exist to make it easier to specify - non-printing and other inconvenient characters in REs. They are - shown in . - - - - Class-shorthand escapes provide shorthands for certain - commonly-used character classes. They are - shown in . - - - - A constraint escape is a constraint, - matching the empty string if specific conditions are met, - written as an escape. They are - shown in . - - - - A back reference (\n) matches the - same string matched by the previous parenthesized subexpression specified - by the number n - (see ). For example, - ([bc])\1 matches bb or cc - but not bc or cb. - The subexpression must entirely precede the back reference in the RE. - Subexpressions are numbered in the order of their leading parentheses. - Non-capturing parentheses do not define subexpressions. - The back reference considers only the string characters matched by the - referenced subexpression, not any constraints contained in it. For - example, (^\d)\1 will match 22. - - - - Regular Expression Character-Entry Escapes - - - - - Escape - Description - - - - - - \a - alert (bell) character, as in C - - - - \b - backspace, as in C - - - - \B - synonym for backslash (\) to help reduce the need for backslash - doubling - - - - \cX - (where X is any character) the character whose - low-order 5 bits are the same as those of - X, and whose other bits are all zero - - - - \e - the character whose collating-sequence name - is ESC, - or failing that, the character with octal value 033 - - - - \f - form feed, as in C - - - - \n - newline, as in C - - - - \r - carriage return, as in C - - - - \t - horizontal tab, as in C - - - - \uwxyz - (where wxyz is exactly four hexadecimal digits) - the character whose hexadecimal value is - 0xwxyz - - - - - \Ustuvwxyz - (where stuvwxyz is exactly eight hexadecimal - digits) - the character whose hexadecimal value is - 0xstuvwxyz - - - - - \v - vertical tab, as in C - - - - \xhhh - (where hhh is any sequence of hexadecimal - digits) - the character whose hexadecimal value is - 0xhhh - (a single character no matter how many hexadecimal digits are used) - - - - - \0 - the character whose value is 0 (the null byte) - - - - \xy - (where xy is exactly two octal digits, - and is not a back reference) - the character whose octal value is - 0xy - - - - \xyz - (where xyz is exactly three octal digits, - and is not a back reference) - the character whose octal value is - 0xyz - - - -
- - - Hexadecimal digits are 0-9, - a-f, and A-F. - Octal digits are 0-7. - - - - Numeric character-entry escapes specifying values outside the ASCII range - (0–127) have meanings dependent on the database encoding. When the - encoding is UTF-8, escape values are equivalent to Unicode code points, - for example \u1234 means the character U+1234. - For other multibyte encodings, character-entry escapes usually just - specify the concatenation of the byte values for the character. If the - escape value does not correspond to any legal character in the database - encoding, no error will be raised, but it will never match any data. - - - - The character-entry escapes are always taken as ordinary characters. - For example, \135 is ] in ASCII, but - \135 does not terminate a bracket expression. - - - - Regular Expression Class-Shorthand Escapes - - - - - Escape - Description - - - - - - \d - matches any digit, like - [[:digit:]] - - - - \s - matches any whitespace character, like - [[:space:]] - - - - \w - matches any word character, like - [[:word:]] - - - - \D - matches any non-digit, like - [^[:digit:]] - - - - \S - matches any non-whitespace character, like - [^[:space:]] - - - - \W - matches any non-word character, like - [^[:word:]] - - - -
- - - The class-shorthand escapes also work within bracket expressions, - although the definitions shown above are not quite syntactically - valid in that context. - For example, [a-c\d] is equivalent to - [a-c[:digit:]]. - - - - Regular Expression Constraint Escapes - - - - - Escape - Description - - - - - - \A - matches only at the beginning of the string - (see for how this differs from - ^) - - - - \m - matches only at the beginning of a word - - - - \M - matches only at the end of a word - - - - \y - matches only at the beginning or end of a word - - - - \Y - matches only at a point that is not the beginning or end of a - word - - - - \Z - matches only at the end of the string - (see for how this differs from - $) - - - -
- - - A word is defined as in the specification of - [[:<:]] and [[:>:]] above. - Constraint escapes are illegal within bracket expressions. - - - - Regular Expression Back References - - - - - Escape - Description - - - - - - \m - (where m is a nonzero digit) - a back reference to the m'th subexpression - - - - \mnn - (where m is a nonzero digit, and - nn is some more digits, and the decimal value - mnn is not greater than the number of closing capturing - parentheses seen so far) - a back reference to the mnn'th subexpression - - - -
- - - - There is an inherent ambiguity between octal character-entry - escapes and back references, which is resolved by the following heuristics, - as hinted at above. - A leading zero always indicates an octal escape. - A single non-zero digit, not followed by another digit, - is always taken as a back reference. - A multi-digit sequence not starting with a zero is taken as a back - reference if it comes after a suitable subexpression - (i.e., the number is in the legal range for a back reference), - and otherwise is taken as octal. - - -
- - - Regular Expression Metasyntax - - - In addition to the main syntax described above, there are some special - forms and miscellaneous syntactic facilities available. - - - - An RE can begin with one of two special director prefixes. - If an RE begins with ***:, - the rest of the RE is taken as an ARE. (This normally has no effect in - PostgreSQL, since REs are assumed to be AREs; - but it does have an effect if ERE or BRE mode had been specified by - the flags parameter to a regex function.) - If an RE begins with ***=, - the rest of the RE is taken to be a literal string, - with all characters considered ordinary characters. - - - - An ARE can begin with embedded options: - a sequence (?xyz) - (where xyz is one or more alphabetic characters) - specifies options affecting the rest of the RE. - These options override any previously determined options — - in particular, they can override the case-sensitivity behavior implied by - a regex operator, or the flags parameter to a regex - function. - The available option letters are - shown in . - Note that these same option letters are used in the flags - parameters of regex functions. - - - - ARE Embedded-Option Letters - - - - - Option - Description - - - - - - b - rest of RE is a BRE - - - - c - case-sensitive matching (overrides operator type) - - - - e - rest of RE is an ERE - - - - i - case-insensitive matching (see - ) (overrides operator type) - - - - m - historical synonym for n - - - - n - newline-sensitive matching (see - ) - - - - p - partial newline-sensitive matching (see - ) - - - - q - rest of RE is a literal (quoted) string, all ordinary - characters - - - - s - non-newline-sensitive matching (default) - - - - t - tight syntax (default; see below) - - - - w - inverse partial newline-sensitive (weird) matching - (see ) - - - - x - expanded syntax (see below) - - - -
- - - Embedded options take effect at the ) terminating the sequence. - They can appear only at the start of an ARE (after the - ***: director if any). - - - - In addition to the usual (tight) RE syntax, in which all - characters are significant, there is an expanded syntax, - available by specifying the embedded x option. - In the expanded syntax, - white-space characters in the RE are ignored, as are - all characters between a # - and the following newline (or the end of the RE). This - permits paragraphing and commenting a complex RE. - There are three exceptions to that basic rule: - - - - - a white-space character or # preceded by \ is - retained - - - - - white space or # within a bracket expression is retained - - - - - white space and comments cannot appear within multi-character symbols, - such as (?: - - - - - For this purpose, white-space characters are blank, tab, newline, and - any character that belongs to the space character class. - - - - Finally, in an ARE, outside bracket expressions, the sequence - (?#ttt) - (where ttt is any text not containing a )) - is a comment, completely ignored. - Again, this is not allowed between the characters of - multi-character symbols, like (?:. - Such comments are more a historical artifact than a useful facility, - and their use is deprecated; use the expanded syntax instead. - - - - None of these metasyntax extensions is available if - an initial ***= director - has specified that the user's input be treated as a literal string - rather than as an RE. - -
- - - Regular Expression Matching Rules - - - In the event that an RE could match more than one substring of a given - string, the RE matches the one starting earliest in the string. - If the RE could match more than one substring starting at that point, - either the longest possible match or the shortest possible match will - be taken, depending on whether the RE is greedy or - non-greedy. - - - - Whether an RE is greedy or not is determined by the following rules: - - - - Most atoms, and all constraints, have no greediness attribute (because - they cannot match variable amounts of text anyway). - - - - - Adding parentheses around an RE does not change its greediness. - - - - - A quantified atom with a fixed-repetition quantifier - ({m} - or - {m}?) - has the same greediness (possibly none) as the atom itself. - - - - - A quantified atom with other normal quantifiers (including - {m,n} - with m equal to n) - is greedy (prefers longest match). - - - - - A quantified atom with a non-greedy quantifier (including - {m,n}? - with m equal to n) - is non-greedy (prefers shortest match). - - - - - A branch — that is, an RE that has no top-level - | operator — has the same greediness as the first - quantified atom in it that has a greediness attribute. - - - - - An RE consisting of two or more branches connected by the - | operator is always greedy. - - - - - - - The above rules associate greediness attributes not only with individual - quantified atoms, but with branches and entire REs that contain quantified - atoms. What that means is that the matching is done in such a way that - the branch, or whole RE, matches the longest or shortest possible - substring as a whole. Once the length of the entire match - is determined, the part of it that matches any particular subexpression - is determined on the basis of the greediness attribute of that - subexpression, with subexpressions starting earlier in the RE taking - priority over ones starting later. - - - - An example of what this means: - -SELECT SUBSTRING('XY1234Z', 'Y*([0-9]{1,3})'); -Result: 123 -SELECT SUBSTRING('XY1234Z', 'Y*?([0-9]{1,3})'); -Result: 1 - - In the first case, the RE as a whole is greedy because Y* - is greedy. It can match beginning at the Y, and it matches - the longest possible string starting there, i.e., Y123. - The output is the parenthesized part of that, or 123. - In the second case, the RE as a whole is non-greedy because Y*? - is non-greedy. It can match beginning at the Y, and it matches - the shortest possible string starting there, i.e., Y1. - The subexpression [0-9]{1,3} is greedy but it cannot change - the decision as to the overall match length; so it is forced to match - just 1. - - - - In short, when an RE contains both greedy and non-greedy subexpressions, - the total match length is either as long as possible or as short as - possible, according to the attribute assigned to the whole RE. The - attributes assigned to the subexpressions only affect how much of that - match they are allowed to eat relative to each other. - - - - The quantifiers {1,1} and {1,1}? - can be used to force greediness or non-greediness, respectively, - on a subexpression or a whole RE. - This is useful when you need the whole RE to have a greediness attribute - different from what's deduced from its elements. As an example, - suppose that we are trying to separate a string containing some digits - into the digits and the parts before and after them. We might try to - do that like this: - -SELECT regexp_match('abc01234xyz', '(.*)(\d+)(.*)'); -Result: {abc0123,4,xyz} - - That didn't work: the first .* is greedy so - it eats as much as it can, leaving the \d+ to - match at the last possible place, the last digit. We might try to fix - that by making it non-greedy: - -SELECT regexp_match('abc01234xyz', '(.*?)(\d+)(.*)'); -Result: {abc,0,""} - - That didn't work either, because now the RE as a whole is non-greedy - and so it ends the overall match as soon as possible. We can get what - we want by forcing the RE as a whole to be greedy: - -SELECT regexp_match('abc01234xyz', '(?:(.*?)(\d+)(.*)){1,1}'); -Result: {abc,01234,xyz} - - Controlling the RE's overall greediness separately from its components' - greediness allows great flexibility in handling variable-length patterns. - - - - When deciding what is a longer or shorter match, - match lengths are measured in characters, not collating elements. - An empty string is considered longer than no match at all. - For example: - bb* - matches the three middle characters of abbbc; - (week|wee)(night|knights) - matches all ten characters of weeknights; - when (.*).* - is matched against abc the parenthesized subexpression - matches all three characters; and when - (a*)* is matched against bc - both the whole RE and the parenthesized - subexpression match an empty string. - - - - If case-independent matching is specified, - the effect is much as if all case distinctions had vanished from the - alphabet. - When an alphabetic that exists in multiple cases appears as an - ordinary character outside a bracket expression, it is effectively - transformed into a bracket expression containing both cases, - e.g., x becomes [xX]. - When it appears inside a bracket expression, all case counterparts - of it are added to the bracket expression, e.g., - [x] becomes [xX] - and [^x] becomes [^xX]. - - - - If newline-sensitive matching is specified, . - and bracket expressions using ^ - will never match the newline character - (so that matches will not cross lines unless the RE - explicitly includes a newline) - and ^ and $ - will match the empty string after and before a newline - respectively, in addition to matching at beginning and end of string - respectively. - But the ARE escapes \A and \Z - continue to match beginning or end of string only. - Also, the character class shorthands \D - and \W will match a newline regardless of this mode. - (Before PostgreSQL 14, they did not match - newlines when in newline-sensitive mode. - Write [^[:digit:]] - or [^[:word:]] to get the old behavior.) - - - - If partial newline-sensitive matching is specified, - this affects . and bracket expressions - as with newline-sensitive matching, but not ^ - and $. - - - - If inverse partial newline-sensitive matching is specified, - this affects ^ and $ - as with newline-sensitive matching, but not . - and bracket expressions. - This isn't very useful but is provided for symmetry. - - - - - Limits and Compatibility - - - No particular limit is imposed on the length of REs in this - implementation. However, - programs intended to be highly portable should not employ REs longer - than 256 bytes, - as a POSIX-compliant implementation can refuse to accept such REs. - - - - The only feature of AREs that is actually incompatible with - POSIX EREs is that \ does not lose its special - significance inside bracket expressions. - All other ARE features use syntax which is illegal or has - undefined or unspecified effects in POSIX EREs; - the *** syntax of directors likewise is outside the POSIX - syntax for both BREs and EREs. - - - - Many of the ARE extensions are borrowed from Perl, but some have - been changed to clean them up, and a few Perl extensions are not present. - Incompatibilities of note include \b, \B, - the lack of special treatment for a trailing newline, - the addition of complemented bracket expressions to the things - affected by newline-sensitive matching, - the restrictions on parentheses and back references in lookahead/lookbehind - constraints, and the longest/shortest-match (rather than first-match) - matching semantics. - - - - - Basic Regular Expressions - - - BREs differ from EREs in several respects. - In BREs, |, +, and ? - are ordinary characters and there is no equivalent - for their functionality. - The delimiters for bounds are - \{ and \}, - with { and } - by themselves ordinary characters. - The parentheses for nested subexpressions are - \( and \), - with ( and ) by themselves ordinary characters. - ^ is an ordinary character except at the beginning of the - RE or the beginning of a parenthesized subexpression, - $ is an ordinary character except at the end of the - RE or the end of a parenthesized subexpression, - and * is an ordinary character if it appears at the beginning - of the RE or the beginning of a parenthesized subexpression - (after a possible leading ^). - Finally, single-digit back references are available, and - \< and \> - are synonyms for - [[:<:]] and [[:>:]] - respectively; no other escapes are available in BREs. - - - - - - - Differences from SQL Standard and XQuery - - - LIKE_REGEX - - - - OCCURRENCES_REGEX - - - - POSITION_REGEX - - - - SUBSTRING_REGEX - - - - TRANSLATE_REGEX - - - - XQuery regular expressions - - - - Since SQL:2008, the SQL standard includes regular expression operators - and functions that performs pattern - matching according to the XQuery regular expression - standard: - - LIKE_REGEX - OCCURRENCES_REGEX - POSITION_REGEX - SUBSTRING_REGEX - TRANSLATE_REGEX - - PostgreSQL does not currently implement these - operators and functions. You can get approximately equivalent - functionality in each case as shown in . (Various optional clauses on - both sides have been omitted in this table.) - - - - Regular Expression Functions Equivalencies - - - - - SQL standard - PostgreSQL - - - - - - string LIKE_REGEX pattern - regexp_like(string, pattern) or string ~ pattern - - - - OCCURRENCES_REGEX(pattern IN string) - regexp_count(string, pattern) - - - - POSITION_REGEX(pattern IN string) - regexp_instr(string, pattern) - - - - SUBSTRING_REGEX(pattern IN string) - regexp_substr(string, pattern) - - - - TRANSLATE_REGEX(pattern IN string WITH replacement) - regexp_replace(string, pattern, replacement) - - - -
- - - Regular expression functions similar to those provided by PostgreSQL are - also available in a number of other SQL implementations, whereas the - SQL-standard functions are not as widely implemented. Some of the - details of the regular expression syntax will likely differ in each - implementation. - - - - The SQL-standard operators and functions use XQuery regular expressions, - which are quite close to the ARE syntax described above. - Notable differences between the existing POSIX-based - regular-expression feature and XQuery regular expressions include: - - - - - XQuery character class subtraction is not supported. An example of - this feature is using the following to match only English - consonants: [a-z-[aeiou]]. - - - - - XQuery character class shorthands \c, - \C, \i, - and \I are not supported. - - - - - XQuery character class elements - using \p{UnicodeProperty} or the - inverse \P{UnicodeProperty} are not supported. - - - - - POSIX interprets character classes such as \w - (see ) - according to the prevailing locale (which you can control by - attaching a COLLATE clause to the operator or - function). XQuery specifies these classes by reference to Unicode - character properties, so equivalent behavior is obtained only with - a locale that follows the Unicode rules. - - - - - The SQL standard (not XQuery itself) attempts to cater for more - variants of newline than POSIX does. The - newline-sensitive matching options described above consider only - ASCII NL (\n) to be a newline, but SQL would have - us treat CR (\r), CRLF (\r\n) - (a Windows-style newline), and some Unicode-only characters like - LINE SEPARATOR (U+2028) as newlines as well. - Notably, . and \s should - count \r\n as one character not two according to - SQL. - - - - - Of the character-entry escapes described in - , - XQuery supports only \n, \r, - and \t. - - - - - XQuery does not support - the [:name:] syntax - for character classes within bracket expressions. - - - - - XQuery does not have lookahead or lookbehind constraints, - nor any of the constraint escapes described in - . - - - - - The metasyntax forms described in - do not exist in XQuery. - - - - - The regular expression flag letters defined by XQuery are - related to but not the same as the option letters for POSIX - (). While the - i and q options behave the - same, others do not: - - - - XQuery's s (allow dot to match newline) - and m (allow ^ - and $ to match at newlines) flags provide - access to the same behaviors as - POSIX's n, p - and w flags, but they - do not match the behavior of - POSIX's s and m flags. - Note in particular that dot-matches-newline is the default - behavior in POSIX but not XQuery. - - - - - XQuery's x (ignore whitespace in pattern) flag - is noticeably different from POSIX's expanded-mode flag. - POSIX's x flag also - allows # to begin a comment in the pattern, - and POSIX will not ignore a whitespace character after a - backslash. - - - - - - - - -
-
-
- - - - Data Type Formatting Functions - - - formatting - - - - The PostgreSQL formatting functions - provide a powerful set of tools for converting various data types - (date/time, integer, floating point, numeric) to formatted strings - and for converting from formatted strings to specific data types. - lists them. - These functions all follow a common calling convention: the first - argument is the value to be formatted and the second argument is a - template that defines the output or input format. - - - - Formatting Functions - - - - - Function - - - Description - - - Example(s) - - - - - - - - - to_char - - to_char ( timestamp, text ) - text - - - to_char ( timestamp with time zone, text ) - text - - - Converts time stamp to string according to the given format. - - - to_char(timestamp '2002-04-20 17:31:12.66', 'HH12:MI:SS') - 05:31:12 - - - - - - to_char ( interval, text ) - text - - - Converts interval to string according to the given format. - - - to_char(interval '15h 2m 12s', 'HH24:MI:SS') - 15:02:12 - - - - - - to_char ( numeric_type, text ) - text - - - Converts number to string according to the given format; available - for integer, bigint, numeric, - real, double precision. - - - to_char(125, '999') - 125 - - - to_char(125.8::real, '999D9') - 125.8 - - - to_char(-125.8, '999D99S') - 125.80- - - - - - - - to_date - - to_date ( text, text ) - date - - - Converts string to date according to the given format. - - - to_date('05 Dec 2000', 'DD Mon YYYY') - 2000-12-05 - - - - - - - to_number - - to_number ( text, text ) - numeric - - - Converts string to numeric according to the given format. - - - to_number('12,454.8-', '99G999D9S') - -12454.8 - - - - - - - to_timestamp - - to_timestamp ( text, text ) - timestamp with time zone - - - Converts string to time stamp according to the given format. - (See also to_timestamp(double precision) in - .) - - - to_timestamp('05 Dec 2000', 'DD Mon YYYY') - 2000-12-05 00:00:00-05 - - - - -
- - - - to_timestamp and to_date - exist to handle input formats that cannot be converted by - simple casting. For most standard date/time formats, simply casting the - source string to the required data type works, and is much easier. - Similarly, to_number is unnecessary for standard numeric - representations. - - - - - In a to_char output template string, there are certain - patterns that are recognized and replaced with appropriately-formatted - data based on the given value. Any text that is not a template pattern is - simply copied verbatim. Similarly, in an input template string (for the - other functions), template patterns identify the values to be supplied by - the input data string. If there are characters in the template string - that are not template patterns, the corresponding characters in the input - data string are simply skipped over (whether or not they are equal to the - template string characters). - - - - shows the - template patterns available for formatting date and time values. - - - - Template Patterns for Date/Time Formatting - - - - Pattern - Description - - - - - HH - hour of day (01–12) - - - HH12 - hour of day (01–12) - - - HH24 - hour of day (00–23) - - - MI - minute (00–59) - - - SS - second (00–59) - - - MS - millisecond (000–999) - - - US - microsecond (000000–999999) - - - FF1 - tenth of second (0–9) - - - FF2 - hundredth of second (00–99) - - - FF3 - millisecond (000–999) - - - FF4 - tenth of a millisecond (0000–9999) - - - FF5 - hundredth of a millisecond (00000–99999) - - - FF6 - microsecond (000000–999999) - - - SSSS, SSSSS - seconds past midnight (0–86399) - - - AM, am, - PM or pm - meridiem indicator (without periods) - - - A.M., a.m., - P.M. or p.m. - meridiem indicator (with periods) - - - Y,YYY - year (4 or more digits) with comma - - - YYYY - year (4 or more digits) - - - YYY - last 3 digits of year - - - YY - last 2 digits of year - - - Y - last digit of year - - - IYYY - ISO 8601 week-numbering year (4 or more digits) - - - IYY - last 3 digits of ISO 8601 week-numbering year - - - IY - last 2 digits of ISO 8601 week-numbering year - - - I - last digit of ISO 8601 week-numbering year - - - BC, bc, - AD or ad - era indicator (without periods) - - - B.C., b.c., - A.D. or a.d. - era indicator (with periods) - - - MONTH - full upper case month name (blank-padded to 9 chars) - - - Month - full capitalized month name (blank-padded to 9 chars) - - - month - full lower case month name (blank-padded to 9 chars) - - - MON - abbreviated upper case month name (3 chars in English, localized lengths vary) - - - Mon - abbreviated capitalized month name (3 chars in English, localized lengths vary) - - - mon - abbreviated lower case month name (3 chars in English, localized lengths vary) - - - MM - month number (01–12) - - - DAY - full upper case day name (blank-padded to 9 chars) - - - Day - full capitalized day name (blank-padded to 9 chars) - - - day - full lower case day name (blank-padded to 9 chars) - - - DY - abbreviated upper case day name (3 chars in English, localized lengths vary) - - - Dy - abbreviated capitalized day name (3 chars in English, localized lengths vary) - - - dy - abbreviated lower case day name (3 chars in English, localized lengths vary) - - - DDD - day of year (001–366) - - - IDDD - day of ISO 8601 week-numbering year (001–371; day 1 of the year is Monday of the first ISO week) - - - DD - day of month (01–31) - - - D - day of the week, Sunday (1) to Saturday (7) - - - ID - ISO 8601 day of the week, Monday (1) to Sunday (7) - - - W - week of month (1–5) (the first week starts on the first day of the month) - - - WW - week number of year (1–53) (the first week starts on the first day of the year) - - - IW - week number of ISO 8601 week-numbering year (01–53; the first Thursday of the year is in week 1) - - - CC - century (2 digits) (the twenty-first century starts on 2001-01-01) - - - J - Julian Date (integer days since November 24, 4714 BC at local - midnight; see ) - - - Q - quarter - - - RM - month in upper case Roman numerals (I–XII; I=January) - - - rm - month in lower case Roman numerals (i–xii; i=January) - - - TZ - upper case time-zone abbreviation - - - tz - lower case time-zone abbreviation - - - TZH - time-zone hours - - - TZM - time-zone minutes - - - OF - time-zone offset from UTC (HH - or HH:MM) - - - -
- - - Modifiers can be applied to any template pattern to alter its - behavior. For example, FMMonth - is the Month pattern with the - FM modifier. - shows the - modifier patterns for date/time formatting. - - - - Template Pattern Modifiers for Date/Time Formatting - - - - Modifier - Description - Example - - - - - FM prefix - fill mode (suppress leading zeroes and padding blanks) - FMMonth - - - TH suffix - upper case ordinal number suffix - DDTH, e.g., 12TH - - - th suffix - lower case ordinal number suffix - DDth, e.g., 12th - - - FX prefix - fixed format global option (see usage notes) - FX Month DD Day - - - TM prefix - translation mode (use localized day and month names based on - ) - TMMonth - - - SP suffix - spell mode (not implemented) - DDSP - - - -
- - - Usage notes for date/time formatting: - - - - - FM suppresses leading zeroes and trailing blanks - that would otherwise be added to make the output of a pattern be - fixed-width. In PostgreSQL, - FM modifies only the next specification, while in - Oracle FM affects all subsequent - specifications, and repeated FM modifiers - toggle fill mode on and off. - - - - - - TM suppresses trailing blanks whether or - not FM is specified. - - - - - - to_timestamp and to_date - ignore letter case in the input; so for - example MON, Mon, - and mon all accept the same strings. When using - the TM modifier, case-folding is done according to - the rules of the function's input collation (see - ). - - - - - - to_timestamp and to_date - skip multiple blank spaces at the beginning of the input string and - around date and time values unless the FX option is used. For example, - to_timestamp(' 2000    JUN', 'YYYY MON') and - to_timestamp('2000 - JUN', 'YYYY-MON') work, but - to_timestamp('2000    JUN', 'FXYYYY MON') returns an error - because to_timestamp expects only a single space. - FX must be specified as the first item in - the template. - - - - - - A separator (a space or non-letter/non-digit character) in the template string of - to_timestamp and to_date - matches any single separator in the input string or is skipped, - unless the FX option is used. - For example, to_timestamp('2000JUN', 'YYYY///MON') and - to_timestamp('2000/JUN', 'YYYY MON') work, but - to_timestamp('2000//JUN', 'YYYY/MON') - returns an error because the number of separators in the input string - exceeds the number of separators in the template. - - - If FX is specified, a separator in the template string - matches exactly one character in the input string. But note that the - input string character is not required to be the same as the separator from the template string. - For example, to_timestamp('2000/JUN', 'FXYYYY MON') - works, but to_timestamp('2000/JUN', 'FXYYYY  MON') - returns an error because the second space in the template string consumes - the letter J from the input string. - - - - - - A TZH template pattern can match a signed number. - Without the FX option, minus signs may be ambiguous, - and could be interpreted as a separator. - This ambiguity is resolved as follows: If the number of separators before - TZH in the template string is less than the number of - separators before the minus sign in the input string, the minus sign - is interpreted as part of TZH. - Otherwise, the minus sign is considered to be a separator between values. - For example, to_timestamp('2000 -10', 'YYYY TZH') matches - -10 to TZH, but - to_timestamp('2000 -10', 'YYYY  TZH') - matches 10 to TZH. - - - - - - Ordinary text is allowed in to_char - templates and will be output literally. You can put a substring - in double quotes to force it to be interpreted as literal text - even if it contains template patterns. For example, in - '"Hello Year "YYYY', the YYYY - will be replaced by the year data, but the single Y in Year - will not be. - In to_date, to_number, - and to_timestamp, literal text and double-quoted - strings result in skipping the number of characters contained in the - string; for example "XX" skips two input characters - (whether or not they are XX). - - - - Prior to PostgreSQL 12, it was possible to - skip arbitrary text in the input string using non-letter or non-digit - characters. For example, - to_timestamp('2000y6m1d', 'yyyy-MM-DD') used to - work. Now you can only use letter characters for this purpose. For example, - to_timestamp('2000y6m1d', 'yyyytMMtDDt') and - to_timestamp('2000y6m1d', 'yyyy"y"MM"m"DD"d"') - skip y, m, and - d. - - - - - - - If you want to have a double quote in the output you must - precede it with a backslash, for example '\"YYYY - Month\"'. - Backslashes are not otherwise special outside of double-quoted - strings. Within a double-quoted string, a backslash causes the - next character to be taken literally, whatever it is (but this - has no special effect unless the next character is a double quote - or another backslash). - - - - - - In to_timestamp and to_date, - if the year format specification is less than four digits, e.g., - YYY, and the supplied year is less than four digits, - the year will be adjusted to be nearest to the year 2020, e.g., - 95 becomes 1995. - - - - - - In to_timestamp and to_date, - negative years are treated as signifying BC. If you write both a - negative year and an explicit BC field, you get AD - again. An input of year zero is treated as 1 BC. - - - - - - In to_timestamp and to_date, - the YYYY conversion has a restriction when - processing years with more than 4 digits. You must - use some non-digit character or template after YYYY, - otherwise the year is always interpreted as 4 digits. For example - (with the year 20000): - to_date('200001130', 'YYYYMMDD') will be - interpreted as a 4-digit year; instead use a non-digit - separator after the year, like - to_date('20000-1130', 'YYYY-MMDD') or - to_date('20000Nov30', 'YYYYMonDD'). - - - - - - In to_timestamp and to_date, - the CC (century) field is accepted but ignored - if there is a YYY, YYYY or - Y,YYY field. If CC is used with - YY or Y then the result is - computed as that year in the specified century. If the century is - specified but the year is not, the first year of the century - is assumed. - - - - - - In to_timestamp and to_date, - weekday names or numbers (DAY, D, - and related field types) are accepted but are ignored for purposes of - computing the result. The same is true for quarter - (Q) fields. - - - - - - In to_timestamp and to_date, - an ISO 8601 week-numbering date (as distinct from a Gregorian date) - can be specified in one of two ways: - - - - Year, week number, and weekday: for - example to_date('2006-42-4', 'IYYY-IW-ID') - returns the date 2006-10-19. - If you omit the weekday it is assumed to be 1 (Monday). - - - - - Year and day of year: for example to_date('2006-291', - 'IYYY-IDDD') also returns 2006-10-19. - - - - - - Attempting to enter a date using a mixture of ISO 8601 week-numbering - fields and Gregorian date fields is nonsensical, and will cause an - error. In the context of an ISO 8601 week-numbering year, the - concept of a month or day of month has no - meaning. In the context of a Gregorian year, the ISO week has no - meaning. - - - - While to_date will reject a mixture of - Gregorian and ISO week-numbering date - fields, to_char will not, since output format - specifications like YYYY-MM-DD (IYYY-IDDD) can be - useful. But avoid writing something like IYYY-MM-DD; - that would yield surprising results near the start of the year. - (See for more - information.) - - - - - - - In to_timestamp, millisecond - (MS) or microsecond (US) - fields are used as the - seconds digits after the decimal point. For example - to_timestamp('12.3', 'SS.MS') is not 3 milliseconds, - but 300, because the conversion treats it as 12 + 0.3 seconds. - So, for the format SS.MS, the input values - 12.3, 12.30, - and 12.300 specify the - same number of milliseconds. To get three milliseconds, one must write - 12.003, which the conversion treats as - 12 + 0.003 = 12.003 seconds. - - - - Here is a more - complex example: - to_timestamp('15:12:02.020.001230', 'HH24:MI:SS.MS.US') - is 15 hours, 12 minutes, and 2 seconds + 20 milliseconds + - 1230 microseconds = 2.021230 seconds. - - - - - - to_char(..., 'ID')'s day of the week numbering - matches the extract(isodow from ...) function, but - to_char(..., 'D')'s does not match - extract(dow from ...)'s day numbering. - - - - - - to_char(interval) formats HH and - HH12 as shown on a 12-hour clock, for example zero hours - and 36 hours both output as 12, while HH24 - outputs the full hour value, which can exceed 23 in - an interval value. - - - - - - - - shows the - template patterns available for formatting numeric values. - - - - Template Patterns for Numeric Formatting - - - - Pattern - Description - - - - - 9 - digit position (can be dropped if insignificant) - - - 0 - digit position (will not be dropped, even if insignificant) - - - . (period) - decimal point - - - , (comma) - group (thousands) separator - - - PR - negative value in angle brackets - - - S - sign anchored to number (uses locale) - - - L - currency symbol (uses locale) - - - D - decimal point (uses locale) - - - G - group separator (uses locale) - - - MI - minus sign in specified position (if number < 0) - - - PL - plus sign in specified position (if number > 0) - - - SG - plus/minus sign in specified position - - - RN or rn - Roman numeral (values between 1 and 3999) - - - TH or th - ordinal number suffix - - - V - shift specified number of digits (see notes) - - - EEEE - exponent for scientific notation - - - -
- - - Usage notes for numeric formatting: - - - - - 0 specifies a digit position that will always be printed, - even if it contains a leading/trailing zero. 9 also - specifies a digit position, but if it is a leading zero then it will - be replaced by a space, while if it is a trailing zero and fill mode - is specified then it will be deleted. (For to_number(), - these two pattern characters are equivalent.) - - - - - - If the format provides fewer fractional digits than the number being - formatted, to_char() will round the number to - the specified number of fractional digits. - - - - - - The pattern characters S, L, D, - and G represent the sign, currency symbol, decimal point, - and thousands separator characters defined by the current locale - (see - and ). The pattern characters period - and comma represent those exact characters, with the meanings of - decimal point and thousands separator, regardless of locale. - - - - - - If no explicit provision is made for a sign - in to_char()'s pattern, one column will be reserved for - the sign, and it will be anchored to (appear just left of) the - number. If S appears just left of some 9's, - it will likewise be anchored to the number. - - - - - - A sign formatted using SG, PL, or - MI is not anchored to - the number; for example, - to_char(-12, 'MI9999') produces '-  12' - but to_char(-12, 'S9999') produces '  -12'. - (The Oracle implementation does not allow the use of - MI before 9, but rather - requires that 9 precede - MI.) - - - - - - TH does not convert values less than zero - and does not convert fractional numbers. - - - - - - PL, SG, and - TH are PostgreSQL - extensions. - - - - - - In to_number, if non-data template patterns such - as L or TH are used, the - corresponding number of input characters are skipped, whether or not - they match the template pattern, unless they are data characters - (that is, digits, sign, decimal point, or comma). For - example, TH would skip two non-data characters. - - - - - - V with to_char - multiplies the input values by - 10^n, where - n is the number of digits following - V. V with - to_number divides in a similar manner. - The V can be thought of as marking the position - of an implicit decimal point in the input or output string. - to_char and to_number - do not support the use of - V combined with a decimal point - (e.g., 99.9V99 is not allowed). - - - - - - EEEE (scientific notation) cannot be used in - combination with any of the other formatting patterns or - modifiers other than digit and decimal point patterns, and must be at the end of the format string - (e.g., 9.99EEEE is a valid pattern). - - - - - - In to_number(), the RN - pattern converts Roman numerals (in standard form) to numbers. - Input is case-insensitive, so RN - and rn are equivalent. RN - cannot be used in combination with any other formatting patterns or - modifiers except FM, which is applicable only - in to_char() and is ignored - in to_number(). - - - - - - - Certain modifiers can be applied to any template pattern to alter its - behavior. For example, FM99.99 - is the 99.99 pattern with the - FM modifier. - shows the - modifier patterns for numeric formatting. - - - - Template Pattern Modifiers for Numeric Formatting - - - - Modifier - Description - Example - - - - - FM prefix - fill mode (suppress trailing zeroes and padding blanks) - FM99.99 - - - TH suffix - upper case ordinal number suffix - 999TH - - - th suffix - lower case ordinal number suffix - 999th - - - -
- - - shows some - examples of the use of the to_char function. - - - - <function>to_char</function> Examples - - - - Expression - Result - - - - - to_char(current_timestamp, 'Day, DD  HH12:MI:SS') - 'Tuesday  , 06  05:39:18' - - - to_char(current_timestamp, 'FMDay, FMDD  HH12:MI:SS') - 'Tuesday, 6  05:39:18' - - - to_char(current_timestamp AT TIME ZONE - 'UTC', 'YYYY-MM-DD"T"HH24:MI:SS"Z"') - '2022-12-06T05:39:18Z', - ISO 8601 extended format - - - to_char(-0.1, '99.99') - '  -.10' - - - to_char(-0.1, 'FM9.99') - '-.1' - - - to_char(-0.1, 'FM90.99') - '-0.1' - - - to_char(0.1, '0.9') - ' 0.1' - - - to_char(12, '9990999.9') - '    0012.0' - - - to_char(12, 'FM9990999.9') - '0012.' - - - to_char(485, '999') - ' 485' - - - to_char(-485, '999') - '-485' - - - to_char(485, '9 9 9') - ' 4 8 5' - - - to_char(1485, '9,999') - ' 1,485' - - - to_char(1485, '9G999') - ' 1 485' - - - to_char(148.5, '999.999') - ' 148.500' - - - to_char(148.5, 'FM999.999') - '148.5' - - - to_char(148.5, 'FM999.990') - '148.500' - - - to_char(148.5, '999D999') - ' 148,500' - - - to_char(3148.5, '9G999D999') - ' 3 148,500' - - - to_char(-485, '999S') - '485-' - - - to_char(-485, '999MI') - '485-' - - - to_char(485, '999MI') - '485 ' - - - to_char(485, 'FM999MI') - '485' - - - to_char(485, 'PL999') - '+485' - - - to_char(485, 'SG999') - '+485' - - - to_char(-485, 'SG999') - '-485' - - - to_char(-485, '9SG99') - '4-85' - - - to_char(-485, '999PR') - '<485>' - - - to_char(485, 'L999') - 'DM 485' - - - to_char(485, 'RN') - '        CDLXXXV' - - - to_char(485, 'FMRN') - 'CDLXXXV' - - - to_char(5.2, 'FMRN') - 'V' - - - to_char(482, '999th') - ' 482nd' - - - to_char(485, '"Good number:"999') - 'Good number: 485' - - - to_char(485.8, '"Pre:"999" Post:" .999') - 'Pre: 485 Post: .800' - - - to_char(12, '99V999') - ' 12000' - - - to_char(12.4, '99V999') - ' 12400' - - - to_char(12.45, '99V9') - ' 125' - - - to_char(0.0004859, '9.99EEEE') - ' 4.86e-04' - - - -
- -
- - - - Date/Time Functions and Operators - - - shows the available - functions for date/time value processing, with details appearing in - the following subsections. illustrates the behaviors of - the basic arithmetic operators (+, - *, etc.). For formatting functions, refer to - . You should be familiar with - the background information on date/time data types from . - - - - In addition, the usual comparison operators shown in - are available for the - date/time types. Dates and timestamps (with or without time zone) are - all comparable, while times (with or without time zone) and intervals - can only be compared to other values of the same data type. When - comparing a timestamp without time zone to a timestamp with time zone, - the former value is assumed to be given in the time zone specified by - the configuration parameter, and is - rotated to UTC for comparison to the latter value (which is already - in UTC internally). Similarly, a date value is assumed to represent - midnight in the TimeZone zone when comparing it - to a timestamp. - - - - All the functions and operators described below that take time or timestamp - inputs actually come in two variants: one that takes time with time zone or timestamp - with time zone, and one that takes time without time zone or timestamp without time zone. - For brevity, these variants are not shown separately. Also, the - + and * operators come in commutative pairs (for - example both date + integer - and integer + date); we show - only one of each such pair. - - - - Date/Time Operators - - - - - - Operator - - - Description - - - Example(s) - - - - - - - - date + integer - date - - - Add a number of days to a date - - - date '2001-09-28' + 7 - 2001-10-05 - - - - - - date + interval - timestamp - - - Add an interval to a date - - - date '2001-09-28' + interval '1 hour' - 2001-09-28 01:00:00 - - - - - - date + time - timestamp - - - Add a time-of-day to a date - - - date '2001-09-28' + time '03:00' - 2001-09-28 03:00:00 - - - - - - interval + interval - interval - - - Add intervals - - - interval '1 day' + interval '1 hour' - 1 day 01:00:00 - - - - - - timestamp + interval - timestamp - - - Add an interval to a timestamp - - - timestamp '2001-09-28 01:00' + interval '23 hours' - 2001-09-29 00:00:00 - - - - - - time + interval - time - - - Add an interval to a time - - - time '01:00' + interval '3 hours' - 04:00:00 - - - - - - - interval - interval - - - Negate an interval - - - - interval '23 hours' - -23:00:00 - - - - - - date - date - integer - - - Subtract dates, producing the number of days elapsed - - - date '2001-10-01' - date '2001-09-28' - 3 - - - - - - date - integer - date - - - Subtract a number of days from a date - - - date '2001-10-01' - 7 - 2001-09-24 - - - - - - date - interval - timestamp - - - Subtract an interval from a date - - - date '2001-09-28' - interval '1 hour' - 2001-09-27 23:00:00 - - - - - - time - time - interval - - - Subtract times - - - time '05:00' - time '03:00' - 02:00:00 - - - - - - time - interval - time - - - Subtract an interval from a time - - - time '05:00' - interval '2 hours' - 03:00:00 - - - - - - timestamp - interval - timestamp - - - Subtract an interval from a timestamp - - - timestamp '2001-09-28 23:00' - interval '23 hours' - 2001-09-28 00:00:00 - - - - - - interval - interval - interval - - - Subtract intervals - - - interval '1 day' - interval '1 hour' - 1 day -01:00:00 - - - - - - timestamp - timestamp - interval - - - Subtract timestamps (converting 24-hour intervals into days, - similarly to justify_hours()) - - - timestamp '2001-09-29 03:00' - timestamp '2001-07-27 12:00' - 63 days 15:00:00 - - - - - - interval * double precision - interval - - - Multiply an interval by a scalar - - - interval '1 second' * 900 - 00:15:00 - - - interval '1 day' * 21 - 21 days - - - interval '1 hour' * 3.5 - 03:30:00 - - - - - - interval / double precision - interval - - - Divide an interval by a scalar - - - interval '1 hour' / 1.5 - 00:40:00 - - - - -
- - - Date/Time Functions - - - - - Function - - - Description - - - Example(s) - - - - - - - - - age - - age ( timestamp, timestamp ) - interval - - - Subtract arguments, producing a symbolic result that - uses years and months, rather than just days - - - age(timestamp '2001-04-10', timestamp '1957-06-13') - 43 years 9 mons 27 days - - - - - - age ( timestamp ) - interval - - - Subtract argument from current_date (at midnight) - - - age(timestamp '1957-06-13') - 62 years 6 mons 10 days - - - - - - - clock_timestamp - - clock_timestamp ( ) - timestamp with time zone - - - Current date and time (changes during statement execution); - see - - - clock_timestamp() - 2019-12-23 14:39:53.662522-05 - - - - - - - current_date - - current_date - date - - - Current date; see - - - current_date - 2019-12-23 - - - - - - - current_time - - current_time - time with time zone - - - Current time of day; see - - - current_time - 14:39:53.662522-05 - - - - - - current_time ( integer ) - time with time zone - - - Current time of day, with limited precision; - see - - - current_time(2) - 14:39:53.66-05 - - - - - - - current_timestamp - - current_timestamp - timestamp with time zone - - - Current date and time (start of current transaction); - see - - - current_timestamp - 2019-12-23 14:39:53.662522-05 - - - - - - current_timestamp ( integer ) - timestamp with time zone - - - Current date and time (start of current transaction), with limited precision; - see - - - current_timestamp(0) - 2019-12-23 14:39:53-05 - - - - - - - date_add - - date_add ( timestamp with time zone, interval , text ) - timestamp with time zone - - - Add an interval to a timestamp with time - zone, computing times of day and daylight-savings adjustments - according to the time zone named by the third argument, or the - current setting if that is omitted. - The form with two arguments is equivalent to the timestamp with - time zone + interval operator. - - - date_add('2021-10-31 00:00:00+02'::timestamptz, '1 day'::interval, 'Europe/Warsaw') - 2021-10-31 23:00:00+00 - - - - - - date_bin ( interval, timestamp, timestamp ) - timestamp - - - Bin input into specified interval aligned with specified origin; see - - - date_bin('15 minutes', timestamp '2001-02-16 20:38:40', timestamp '2001-02-16 20:05:00') - 2001-02-16 20:35:00 - - - - - - - date_part - - date_part ( text, timestamp ) - double precision - - - Get timestamp subfield (equivalent to extract); - see - - - date_part('hour', timestamp '2001-02-16 20:38:40') - 20 - - - - - - date_part ( text, interval ) - double precision - - - Get interval subfield (equivalent to extract); - see - - - date_part('month', interval '2 years 3 months') - 3 - - - - - - - date_subtract - - date_subtract ( timestamp with time zone, interval , text ) - timestamp with time zone - - - Subtract an interval from a timestamp with time - zone, computing times of day and daylight-savings adjustments - according to the time zone named by the third argument, or the - current setting if that is omitted. - The form with two arguments is equivalent to the timestamp with - time zone - interval operator. - - - date_subtract('2021-11-01 00:00:00+01'::timestamptz, '1 day'::interval, 'Europe/Warsaw') - 2021-10-30 22:00:00+00 - - - - - - - date_trunc - - date_trunc ( text, timestamp ) - timestamp - - - Truncate to specified precision; see - - - date_trunc('hour', timestamp '2001-02-16 20:38:40') - 2001-02-16 20:00:00 - - - - - - date_trunc ( text, timestamp with time zone, text ) - timestamp with time zone - - - Truncate to specified precision in the specified time zone; see - - - - date_trunc('day', timestamptz '2001-02-16 20:38:40+00', 'Australia/Sydney') - 2001-02-16 13:00:00+00 - - - - - - date_trunc ( text, interval ) - interval - - - Truncate to specified precision; see - - - - date_trunc('hour', interval '2 days 3 hours 40 minutes') - 2 days 03:00:00 - - - - - - - extract - - extract ( field from timestamp ) - numeric - - - Get timestamp subfield; see - - - extract(hour from timestamp '2001-02-16 20:38:40') - 20 - - - - - - extract ( field from interval ) - numeric - - - Get interval subfield; see - - - extract(month from interval '2 years 3 months') - 3 - - - - - - - isfinite - - isfinite ( date ) - boolean - - - Test for finite date (not +/-infinity) - - - isfinite(date '2001-02-16') - true - - - - - - isfinite ( timestamp ) - boolean - - - Test for finite timestamp (not +/-infinity) - - - isfinite(timestamp 'infinity') - false - - - - - - isfinite ( interval ) - boolean - - - Test for finite interval (not +/-infinity) - - - isfinite(interval '4 hours') - true - - - - - - - justify_days - - justify_days ( interval ) - interval - - - Adjust interval, converting 30-day time periods to months - - - justify_days(interval '1 year 65 days') - 1 year 2 mons 5 days - - - - - - - justify_hours - - justify_hours ( interval ) - interval - - - Adjust interval, converting 24-hour time periods to days - - - justify_hours(interval '50 hours 10 minutes') - 2 days 02:10:00 - - - - - - - justify_interval - - justify_interval ( interval ) - interval - - - Adjust interval using justify_days - and justify_hours, with additional sign - adjustments - - - justify_interval(interval '1 mon -1 hour') - 29 days 23:00:00 - - - - - - - localtime - - localtime - time - - - Current time of day; - see - - - localtime - 14:39:53.662522 - - - - - - localtime ( integer ) - time - - - Current time of day, with limited precision; - see - - - localtime(0) - 14:39:53 - - - - - - - localtimestamp - - localtimestamp - timestamp - - - Current date and time (start of current transaction); - see - - - localtimestamp - 2019-12-23 14:39:53.662522 - - - - - - localtimestamp ( integer ) - timestamp - - - Current date and time (start of current - transaction), with limited precision; - see - - - localtimestamp(2) - 2019-12-23 14:39:53.66 - - - - - - - make_date - - make_date ( year int, - month int, - day int ) - date - - - Create date from year, month and day fields - (negative years signify BC) - - - make_date(2013, 7, 15) - 2013-07-15 - - - - - - make_interval - - make_interval ( years int - , months int - , weeks int - , days int - , hours int - , mins int - , secs double precision - ) - interval - - - Create interval from years, months, weeks, days, hours, minutes and - seconds fields, each of which can default to zero - - - make_interval(days => 10) - 10 days - - - - - - - make_time - - make_time ( hour int, - min int, - sec double precision ) - time - - - Create time from hour, minute and seconds fields - - - make_time(8, 15, 23.5) - 08:15:23.5 - - - - - - - make_timestamp - - make_timestamp ( year int, - month int, - day int, - hour int, - min int, - sec double precision ) - timestamp - - - Create timestamp from year, month, day, hour, minute and seconds fields - (negative years signify BC) - - - make_timestamp(2013, 7, 15, 8, 15, 23.5) - 2013-07-15 08:15:23.5 - - - - - - - make_timestamptz - - make_timestamptz ( year int, - month int, - day int, - hour int, - min int, - sec double precision - , timezone text ) - timestamp with time zone - - - Create timestamp with time zone from year, month, day, hour, minute - and seconds fields (negative years signify BC). - If timezone is not - specified, the current time zone is used; the examples assume the - session time zone is Europe/London - - - make_timestamptz(2013, 7, 15, 8, 15, 23.5) - 2013-07-15 08:15:23.5+01 - - - make_timestamptz(2013, 7, 15, 8, 15, 23.5, 'America/New_York') - 2013-07-15 13:15:23.5+01 - - - - - - - now - - now ( ) - timestamp with time zone - - - Current date and time (start of current transaction); - see - - - now() - 2019-12-23 14:39:53.662522-05 - - - - - - - statement_timestamp - - statement_timestamp ( ) - timestamp with time zone - - - Current date and time (start of current statement); - see - - - statement_timestamp() - 2019-12-23 14:39:53.662522-05 - - - - - - - timeofday - - timeofday ( ) - text - - - Current date and time - (like clock_timestamp, but as a text string); - see - - - timeofday() - Mon Dec 23 14:39:53.662522 2019 EST - - - - - - - transaction_timestamp - - transaction_timestamp ( ) - timestamp with time zone - - - Current date and time (start of current transaction); - see - - - transaction_timestamp() - 2019-12-23 14:39:53.662522-05 - - - - - - - to_timestamp - - to_timestamp ( double precision ) - timestamp with time zone - - - Convert Unix epoch (seconds since 1970-01-01 00:00:00+00) to - timestamp with time zone - - - to_timestamp(1284352323) - 2010-09-13 04:32:03+00 - - - - -
- - - - OVERLAPS - - In addition to these functions, the SQL OVERLAPS operator is - supported: - -(start1, end1) OVERLAPS (start2, end2) -(start1, length1) OVERLAPS (start2, length2) - - This expression yields true when two time periods (defined by their - endpoints) overlap, false when they do not overlap. The endpoints - can be specified as pairs of dates, times, or time stamps; or as - a date, time, or time stamp followed by an interval. When a pair - of values is provided, either the start or the end can be written - first; OVERLAPS automatically takes the earlier value - of the pair as the start. Each time period is considered to - represent the half-open interval start <= - time < end, unless - start and end are equal in which case it - represents that single time instant. This means for instance that two - time periods with only an endpoint in common do not overlap. - - - -SELECT (DATE '2001-02-16', DATE '2001-12-21') OVERLAPS - (DATE '2001-10-30', DATE '2002-10-30'); -Result: true -SELECT (DATE '2001-02-16', INTERVAL '100 days') OVERLAPS - (DATE '2001-10-30', DATE '2002-10-30'); -Result: false -SELECT (DATE '2001-10-29', DATE '2001-10-30') OVERLAPS - (DATE '2001-10-30', DATE '2001-10-31'); -Result: false -SELECT (DATE '2001-10-30', DATE '2001-10-30') OVERLAPS - (DATE '2001-10-30', DATE '2001-10-31'); -Result: true - - - - When adding an interval value to (or subtracting an - interval value from) a timestamp - or timestamp with time zone value, the months, days, and - microseconds fields of the interval value are handled in turn. - First, a nonzero months field advances or decrements the date of the - timestamp by the indicated number of months, keeping the day of month the - same unless it would be past the end of the new month, in which case the - last day of that month is used. (For example, March 31 plus 1 month - becomes April 30, but March 31 plus 2 months becomes May 31.) - Then the days field advances or decrements the date of the timestamp by - the indicated number of days. In both these steps the local time of day - is kept the same. Finally, if there is a nonzero microseconds field, it - is added or subtracted literally. - When doing arithmetic on a timestamp with time zone value in - a time zone that recognizes DST, this means that adding or subtracting - (say) interval '1 day' does not necessarily have the - same result as adding or subtracting interval '24 - hours'. - For example, with the session time zone set - to America/Denver: - -SELECT timestamp with time zone '2005-04-02 12:00:00-07' + interval '1 day'; -Result: 2005-04-03 12:00:00-06 -SELECT timestamp with time zone '2005-04-02 12:00:00-07' + interval '24 hours'; -Result: 2005-04-03 13:00:00-06 - - This happens because an hour was skipped due to a change in daylight saving - time at 2005-04-03 02:00:00 in time zone - America/Denver. - - - - Note there can be ambiguity in the months field returned by - age because different months have different numbers of - days. PostgreSQL's approach uses the month from the - earlier of the two dates when calculating partial months. For example, - age('2004-06-01', '2004-04-30') uses April to yield - 1 mon 1 day, while using May would yield 1 mon 2 - days because May has 31 days, while April has only 30. - - - - Subtraction of dates and timestamps can also be complex. One conceptually - simple way to perform subtraction is to convert each value to a number - of seconds using EXTRACT(EPOCH FROM ...), then subtract the - results; this produces the - number of seconds between the two values. This will adjust - for the number of days in each month, timezone changes, and daylight - saving time adjustments. Subtraction of date or timestamp - values with the - operator - returns the number of days (24-hours) and hours/minutes/seconds - between the values, making the same adjustments. The age - function returns years, months, days, and hours/minutes/seconds, - performing field-by-field subtraction and then adjusting for negative - field values. The following queries illustrate the differences in these - approaches. The sample results were produced with timezone - = 'US/Eastern'; there is a daylight saving time change between the - two dates used: - - - -SELECT EXTRACT(EPOCH FROM timestamptz '2013-07-01 12:00:00') - - EXTRACT(EPOCH FROM timestamptz '2013-03-01 12:00:00'); -Result: 10537200.000000 -SELECT (EXTRACT(EPOCH FROM timestamptz '2013-07-01 12:00:00') - - EXTRACT(EPOCH FROM timestamptz '2013-03-01 12:00:00')) - / 60 / 60 / 24; -Result: 121.9583333333333333 -SELECT timestamptz '2013-07-01 12:00:00' - timestamptz '2013-03-01 12:00:00'; -Result: 121 days 23:00:00 -SELECT age(timestamptz '2013-07-01 12:00:00', timestamptz '2013-03-01 12:00:00'); -Result: 4 mons - - - - <function>EXTRACT</function>, <function>date_part</function> - - - date_part - - - extract - - - -EXTRACT(field FROM source) - - - - The extract function retrieves subfields - such as year or hour from date/time values. - source must be a value expression of - type timestamp, date, time, - or interval. (Timestamps and times can be with or - without time zone.) - field is an identifier or - string that selects what field to extract from the source value. - Not all fields are valid for every input data type; for example, fields - smaller than a day cannot be extracted from a date, while - fields of a day or more cannot be extracted from a time. - The extract function returns values of type - numeric. - - - - The following are valid field names: - - - - - century - - - The century; for interval values, the year field - divided by 100 - - - -SELECT EXTRACT(CENTURY FROM TIMESTAMP '2000-12-16 12:21:13'); -Result: 20 -SELECT EXTRACT(CENTURY FROM TIMESTAMP '2001-02-16 20:38:40'); -Result: 21 -SELECT EXTRACT(CENTURY FROM DATE '0001-01-01 AD'); -Result: 1 -SELECT EXTRACT(CENTURY FROM DATE '0001-12-31 BC'); -Result: -1 -SELECT EXTRACT(CENTURY FROM INTERVAL '2001 years'); -Result: 20 - - - - - - day - - - The day of the month (1–31); for interval - values, the number of days - - - -SELECT EXTRACT(DAY FROM TIMESTAMP '2001-02-16 20:38:40'); -Result: 16 -SELECT EXTRACT(DAY FROM INTERVAL '40 days 1 minute'); -Result: 40 - - - - - - - decade - - - The year field divided by 10 - - - -SELECT EXTRACT(DECADE FROM TIMESTAMP '2001-02-16 20:38:40'); -Result: 200 - - - - - - dow - - - The day of the week as Sunday (0) to - Saturday (6) - - - -SELECT EXTRACT(DOW FROM TIMESTAMP '2001-02-16 20:38:40'); -Result: 5 - - - Note that extract's day of the week numbering - differs from that of the to_char(..., - 'D') function. - - - - - - - doy - - - The day of the year (1–365/366) - - - -SELECT EXTRACT(DOY FROM TIMESTAMP '2001-02-16 20:38:40'); -Result: 47 - - - - - - epoch - - - For timestamp with time zone values, the - number of seconds since 1970-01-01 00:00:00 UTC (negative for - timestamps before that); - for date and timestamp values, the - nominal number of seconds since 1970-01-01 00:00:00, - without regard to timezone or daylight-savings rules; - for interval values, the total number - of seconds in the interval - - - -SELECT EXTRACT(EPOCH FROM TIMESTAMP WITH TIME ZONE '2001-02-16 20:38:40.12-08'); -Result: 982384720.120000 -SELECT EXTRACT(EPOCH FROM TIMESTAMP '2001-02-16 20:38:40.12'); -Result: 982355920.120000 -SELECT EXTRACT(EPOCH FROM INTERVAL '5 days 3 hours'); -Result: 442800.000000 - - - - You can convert an epoch value back to a timestamp with time zone - with to_timestamp: - - -SELECT to_timestamp(982384720.12); -Result: 2001-02-17 04:38:40.12+00 - - - - Beware that applying to_timestamp to an epoch - extracted from a date or timestamp value - could produce a misleading result: the result will effectively - assume that the original value had been given in UTC, which might - not be the case. - - - - - - hour - - - The hour field (0–23 in timestamps, unrestricted in - intervals) - - - -SELECT EXTRACT(HOUR FROM TIMESTAMP '2001-02-16 20:38:40'); -Result: 20 - - - - - - isodow - - - The day of the week as Monday (1) to - Sunday (7) - - - -SELECT EXTRACT(ISODOW FROM TIMESTAMP '2001-02-18 20:38:40'); -Result: 7 - - - This is identical to dow except for Sunday. This - matches the ISO 8601 day of the week numbering. - - - - - - - isoyear - - - The ISO 8601 week-numbering year that the date - falls in - - - -SELECT EXTRACT(ISOYEAR FROM DATE '2006-01-01'); -Result: 2005 -SELECT EXTRACT(ISOYEAR FROM DATE '2006-01-02'); -Result: 2006 - - - - Each ISO 8601 week-numbering year begins with the - Monday of the week containing the 4th of January, so in early - January or late December the ISO year may be - different from the Gregorian year. See the week - field for more information. - - - - - - julian - - - The Julian Date corresponding to the - date or timestamp. Timestamps - that are not local midnight result in a fractional value. See - for more information. - - - -SELECT EXTRACT(JULIAN FROM DATE '2006-01-01'); -Result: 2453737 -SELECT EXTRACT(JULIAN FROM TIMESTAMP '2006-01-01 12:00'); -Result: 2453737.50000000000000000000 - - - - - - microseconds - - - The seconds field, including fractional parts, multiplied by 1 - 000 000; note that this includes full seconds - - - -SELECT EXTRACT(MICROSECONDS FROM TIME '17:12:28.5'); -Result: 28500000 - - - - - - millennium - - - The millennium; for interval values, the year field - divided by 1000 - - - -SELECT EXTRACT(MILLENNIUM FROM TIMESTAMP '2001-02-16 20:38:40'); -Result: 3 -SELECT EXTRACT(MILLENNIUM FROM INTERVAL '2001 years'); -Result: 2 - - - - Years in the 1900s are in the second millennium. - The third millennium started January 1, 2001. - - - - - - milliseconds - - - The seconds field, including fractional parts, multiplied by - 1000. Note that this includes full seconds. - - - -SELECT EXTRACT(MILLISECONDS FROM TIME '17:12:28.5'); -Result: 28500.000 - - - - - - minute - - - The minutes field (0–59) - - - -SELECT EXTRACT(MINUTE FROM TIMESTAMP '2001-02-16 20:38:40'); -Result: 38 - - - - - - month - - - The number of the month within the year (1–12); - for interval values, the number of months modulo 12 - (0–11) - - - -SELECT EXTRACT(MONTH FROM TIMESTAMP '2001-02-16 20:38:40'); -Result: 2 -SELECT EXTRACT(MONTH FROM INTERVAL '2 years 3 months'); -Result: 3 -SELECT EXTRACT(MONTH FROM INTERVAL '2 years 13 months'); -Result: 1 - - - - - - quarter - - - The quarter of the year (1–4) that the date is in; - for interval values, the month field divided by 3 - plus 1 - - - -SELECT EXTRACT(QUARTER FROM TIMESTAMP '2001-02-16 20:38:40'); -Result: 1 -SELECT EXTRACT(QUARTER FROM INTERVAL '1 year 6 months'); -Result: 3 - - - - - - second - - - The seconds field, including any fractional seconds - - - -SELECT EXTRACT(SECOND FROM TIMESTAMP '2001-02-16 20:38:40'); -Result: 40.000000 -SELECT EXTRACT(SECOND FROM TIME '17:12:28.5'); -Result: 28.500000 - - - - - timezone - - - The time zone offset from UTC, measured in seconds. Positive values - correspond to time zones east of UTC, negative values to - zones west of UTC. (Technically, - PostgreSQL does not use UTC because - leap seconds are not handled.) - - - - - - timezone_hour - - - The hour component of the time zone offset - - - - - - timezone_minute - - - The minute component of the time zone offset - - - - - - week - - - The number of the ISO 8601 week-numbering week of - the year. By definition, ISO weeks start on Mondays and the first - week of a year contains January 4 of that year. In other words, the - first Thursday of a year is in week 1 of that year. - - - In the ISO week-numbering system, it is possible for early-January - dates to be part of the 52nd or 53rd week of the previous year, and for - late-December dates to be part of the first week of the next year. - For example, 2005-01-01 is part of the 53rd week of year - 2004, and 2006-01-01 is part of the 52nd week of year - 2005, while 2012-12-31 is part of the first week of 2013. - It's recommended to use the isoyear field together with - week to get consistent results. - - - - For interval values, the week field is simply the number - of integral days divided by 7. - - - -SELECT EXTRACT(WEEK FROM TIMESTAMP '2001-02-16 20:38:40'); -Result: 7 -SELECT EXTRACT(WEEK FROM INTERVAL '13 days 24 hours'); -Result: 1 - - - - - - year - - - The year field. Keep in mind there is no 0 AD, so subtracting - BC years from AD years should be done with care. - - - -SELECT EXTRACT(YEAR FROM TIMESTAMP '2001-02-16 20:38:40'); -Result: 2001 - - - - - - - - - When processing an interval value, - the extract function produces field values that - match the interpretation used by the interval output function. This - can produce surprising results if one starts with a non-normalized - interval representation, for example: - -SELECT INTERVAL '80 minutes'; -Result: 01:20:00 -SELECT EXTRACT(MINUTES FROM INTERVAL '80 minutes'); -Result: 20 - - - - - - When the input value is +/-Infinity, extract returns - +/-Infinity for monotonically-increasing fields (epoch, - julian, year, isoyear, - decade, century, and millennium - for timestamp inputs; epoch, hour, - day, year, decade, - century, and millennium for - interval inputs). - For other fields, NULL is returned. PostgreSQL - versions before 9.6 returned zero for all cases of infinite input. - - - - - The extract function is primarily intended - for computational processing. For formatting date/time values for - display, see . - - - - The date_part function is modeled on the traditional - Ingres equivalent to the - SQL-standard function extract: - -date_part('field', source) - - Note that here the field parameter needs to - be a string value, not a name. The valid field names for - date_part are the same as for - extract. - For historical reasons, the date_part function - returns values of type double precision. This can result in - a loss of precision in certain uses. Using extract - is recommended instead. - - - -SELECT date_part('day', TIMESTAMP '2001-02-16 20:38:40'); -Result: 16 -SELECT date_part('hour', INTERVAL '4 hours 3 minutes'); -Result: 4 - - - - - - <function>date_trunc</function> - - - date_trunc - - - - The function date_trunc is conceptually - similar to the trunc function for numbers. - - - - -date_trunc(field, source , time_zone ) - - source is a value expression of type - timestamp, timestamp with time zone, - or interval. - (Values of type date and - time are cast automatically to timestamp or - interval, respectively.) - field selects to which precision to - truncate the input value. The return value is likewise of type - timestamp, timestamp with time zone, - or interval, - and it has all fields that are less significant than the - selected one set to zero (or one, for day and month). - - - - Valid values for field are: - - microseconds - milliseconds - second - minute - hour - day - week - month - quarter - year - decade - century - millennium - - - - - When the input value is of type timestamp with time zone, - the truncation is performed with respect to a particular time zone; - for example, truncation to day produces a value that - is midnight in that zone. By default, truncation is done with respect - to the current setting, but the - optional time_zone argument can be provided - to specify a different time zone. The time zone name can be specified - in any of the ways described in . - - - - A time zone cannot be specified when processing timestamp without - time zone or interval inputs. These are always - taken at face value. - - - - Examples (assuming the local time zone is America/New_York): - -SELECT date_trunc('hour', TIMESTAMP '2001-02-16 20:38:40'); -Result: 2001-02-16 20:00:00 -SELECT date_trunc('year', TIMESTAMP '2001-02-16 20:38:40'); -Result: 2001-01-01 00:00:00 -SELECT date_trunc('day', TIMESTAMP WITH TIME ZONE '2001-02-16 20:38:40+00'); -Result: 2001-02-16 00:00:00-05 -SELECT date_trunc('day', TIMESTAMP WITH TIME ZONE '2001-02-16 20:38:40+00', 'Australia/Sydney'); -Result: 2001-02-16 08:00:00-05 -SELECT date_trunc('hour', INTERVAL '3 days 02:47:33'); -Result: 3 days 02:00:00 - - - - - - <function>date_bin</function> - - - date_bin - - - - The function date_bin bins the input - timestamp into the specified interval (the stride) - aligned with a specified origin. - - - - -date_bin(stride, source, origin) - - source is a value expression of type - timestamp or timestamp with time zone. (Values - of type date are cast automatically to - timestamp.) stride is a value - expression of type interval. The return value is likewise - of type timestamp or timestamp with time zone, - and it marks the beginning of the bin into which the - source is placed. - - - - Examples: - -SELECT date_bin('15 minutes', TIMESTAMP '2020-02-11 15:44:17', TIMESTAMP '2001-01-01'); -Result: 2020-02-11 15:30:00 -SELECT date_bin('15 minutes', TIMESTAMP '2020-02-11 15:44:17', TIMESTAMP '2001-01-01 00:02:30'); -Result: 2020-02-11 15:32:30 - - - - - In the case of full units (1 minute, 1 hour, etc.), it gives the same result as - the analogous date_trunc call, but the difference is - that date_bin can truncate to an arbitrary interval. - - - - The stride interval must be greater than zero and - cannot contain units of month or larger. - - - - - <literal>AT TIME ZONE</literal> and <literal>AT LOCAL</literal> - - - time zone - conversion - - - - AT TIME ZONE - - - - AT LOCAL - - - - The AT TIME ZONE operator converts time - stamp without time zone to/from - time stamp with time zone, and - time with time zone values to different time - zones. shows its - variants. - - - - <literal>AT TIME ZONE</literal> and <literal>AT LOCAL</literal> Variants - - - - - Operator - - - Description - - - Example(s) - - - - - - - - timestamp without time zone AT TIME ZONE zone - timestamp with time zone - - - Converts given time stamp without time zone to - time stamp with time zone, assuming the given - value is in the named time zone. - - - timestamp '2001-02-16 20:38:40' at time zone 'America/Denver' - 2001-02-17 03:38:40+00 - - - - - - timestamp without time zone AT LOCAL - timestamp with time zone - - - Converts given time stamp without time zone to - time stamp with the session's - TimeZone value as time zone. - - - timestamp '2001-02-16 20:38:40' at local - 2001-02-17 03:38:40+00 - - - - - - timestamp with time zone AT TIME ZONE zone - timestamp without time zone - - - Converts given time stamp with time zone to - time stamp without time zone, as the time would - appear in that zone. - - - timestamp with time zone '2001-02-16 20:38:40-05' at time zone 'America/Denver' - 2001-02-16 18:38:40 - - - - - - timestamp with time zone AT LOCAL - timestamp without time zone - - - Converts given time stamp with time zone to - time stamp without time zone, as the time would - appear with the session's TimeZone value as time zone. - - - timestamp with time zone '2001-02-16 20:38:40-05' at local - 2001-02-16 18:38:40 - - - - - - time with time zone AT TIME ZONE zone - time with time zone - - - Converts given time with time zone to a new time - zone. Since no date is supplied, this uses the currently active UTC - offset for the named destination zone. - - - time with time zone '05:34:17-05' at time zone 'UTC' - 10:34:17+00 - - - - - - time with time zone AT LOCAL - time with time zone - - - Converts given time with time zone to a new time - zone. Since no date is supplied, this uses the currently active UTC - offset for the session's TimeZone value. - - - Assuming the session's TimeZone is set to UTC: - - - time with time zone '05:34:17-05' at local - 10:34:17+00 - - - - -
- - - In these expressions, the desired time zone zone can be - specified either as a text value (e.g., 'America/Los_Angeles') - or as an interval (e.g., INTERVAL '-08:00'). - In the text case, a time zone name can be specified in any of the ways - described in . - The interval case is only useful for zones that have fixed offsets from - UTC, so it is not very common in practice. - - - - The syntax AT LOCAL may be used as shorthand for - AT TIME ZONE local, where - local is the session's - TimeZone value. - - - - Examples (assuming the current setting - is America/Los_Angeles): - -SELECT TIMESTAMP '2001-02-16 20:38:40' AT TIME ZONE 'America/Denver'; -Result: 2001-02-16 19:38:40-08 -SELECT TIMESTAMP WITH TIME ZONE '2001-02-16 20:38:40-05' AT TIME ZONE 'America/Denver'; -Result: 2001-02-16 18:38:40 -SELECT TIMESTAMP '2001-02-16 20:38:40' AT TIME ZONE 'Asia/Tokyo' AT TIME ZONE 'America/Chicago'; -Result: 2001-02-16 05:38:40 -SELECT TIMESTAMP WITH TIME ZONE '2001-02-16 20:38:40-05' AT LOCAL; -Result: 2001-02-16 17:38:40 -SELECT TIMESTAMP WITH TIME ZONE '2001-02-16 20:38:40-05' AT TIME ZONE '+05'; -Result: 2001-02-16 20:38:40 -SELECT TIME WITH TIME ZONE '20:38:40-05' AT LOCAL; -Result: 17:38:40 - - The first example adds a time zone to a value that lacks it, and - displays the value using the current TimeZone - setting. The second example shifts the time stamp with time zone value - to the specified time zone, and returns the value without a time zone. - This allows storage and display of values different from the current - TimeZone setting. The third example converts - Tokyo time to Chicago time. The fourth example shifts the time stamp - with time zone value to the time zone currently specified by the - TimeZone setting and returns the value without a - time zone. The fifth example demonstrates that the sign in a POSIX-style - time zone specification has the opposite meaning of the sign in an - ISO-8601 datetime literal, as described in - and . - - - - The sixth example is a cautionary tale. Due to the fact that there is no - date associated with the input value, the conversion is made using the - current date of the session. Therefore, this static example may show a wrong - result depending on the time of the year it is viewed because - 'America/Los_Angeles' observes Daylight Savings Time. - - - - The function timezone(zone, - timestamp) is equivalent to the SQL-conforming construct - timestamp AT TIME ZONE - zone. - - - - The function timezone(zone, - time) is equivalent to the SQL-conforming construct - time AT TIME ZONE - zone. - - - - The function timezone(timestamp) - is equivalent to the SQL-conforming construct timestamp - AT LOCAL. - - - - The function timezone(time) - is equivalent to the SQL-conforming construct time - AT LOCAL. - -
- - - Current Date/Time - - - date - current - - - - time - current - - - - PostgreSQL provides a number of functions - that return values related to the current date and time. These - SQL-standard functions all return values based on the start time of - the current transaction: - -CURRENT_DATE -CURRENT_TIME -CURRENT_TIMESTAMP -CURRENT_TIME(precision) -CURRENT_TIMESTAMP(precision) -LOCALTIME -LOCALTIMESTAMP -LOCALTIME(precision) -LOCALTIMESTAMP(precision) - - - - - CURRENT_TIME and - CURRENT_TIMESTAMP deliver values with time zone; - LOCALTIME and - LOCALTIMESTAMP deliver values without time zone. - - - - CURRENT_TIME, - CURRENT_TIMESTAMP, - LOCALTIME, and - LOCALTIMESTAMP - can optionally take - a precision parameter, which causes the result to be rounded - to that many fractional digits in the seconds field. Without a precision parameter, - the result is given to the full available precision. - - - - Some examples: - -SELECT CURRENT_TIME; -Result: 14:39:53.662522-05 -SELECT CURRENT_DATE; -Result: 2019-12-23 -SELECT CURRENT_TIMESTAMP; -Result: 2019-12-23 14:39:53.662522-05 -SELECT CURRENT_TIMESTAMP(2); -Result: 2019-12-23 14:39:53.66-05 -SELECT LOCALTIMESTAMP; -Result: 2019-12-23 14:39:53.662522 - - - - - Since these functions return - the start time of the current transaction, their values do not - change during the transaction. This is considered a feature: - the intent is to allow a single transaction to have a consistent - notion of the current time, so that multiple - modifications within the same transaction bear the same - time stamp. - - - - - Other database systems might advance these values more - frequently. - - - - - PostgreSQL also provides functions that - return the start time of the current statement, as well as the actual - current time at the instant the function is called. The complete list - of non-SQL-standard time functions is: - -transaction_timestamp() -statement_timestamp() -clock_timestamp() -timeofday() -now() - - - - - transaction_timestamp() is equivalent to - CURRENT_TIMESTAMP, but is named to clearly reflect - what it returns. - statement_timestamp() returns the start time of the current - statement (more specifically, the time of receipt of the latest command - message from the client). - statement_timestamp() and transaction_timestamp() - return the same value during the first statement of a transaction, but might - differ during subsequent statements. - clock_timestamp() returns the actual current time, and - therefore its value changes even within a single SQL statement. - timeofday() is a historical - PostgreSQL function. Like - clock_timestamp(), it returns the actual current time, - but as a formatted text string rather than a timestamp - with time zone value. - now() is a traditional PostgreSQL - equivalent to transaction_timestamp(). - - - - All the date/time data types also accept the special literal value - now to specify the current date and time (again, - interpreted as the transaction start time). Thus, - the following three all return the same result: - -SELECT CURRENT_TIMESTAMP; -SELECT now(); -SELECT TIMESTAMP 'now'; -- but see tip below - - - - - - Do not use the third form when specifying a value to be evaluated later, - for example in a DEFAULT clause for a table column. - The system will convert now - to a timestamp as soon as the constant is parsed, so that when - the default value is needed, - the time of the table creation would be used! The first two - forms will not be evaluated until the default value is used, - because they are function calls. Thus they will give the desired - behavior of defaulting to the time of row insertion. - (See also .) - - - - - - Delaying Execution - - - pg_sleep - - - pg_sleep_for - - - pg_sleep_until - - - sleep - - - delay - - - - The following functions are available to delay execution of the server - process: - -pg_sleep ( double precision ) -pg_sleep_for ( interval ) -pg_sleep_until ( timestamp with time zone ) - - - pg_sleep makes the current session's process - sleep until the given number of seconds have - elapsed. Fractional-second delays can be specified. - pg_sleep_for is a convenience function to - allow the sleep time to be specified as an interval. - pg_sleep_until is a convenience function for when - a specific wake-up time is desired. - For example: - - -SELECT pg_sleep(1.5); -SELECT pg_sleep_for('5 minutes'); -SELECT pg_sleep_until('tomorrow 03:00'); - - - - - - The effective resolution of the sleep interval is platform-specific; - 0.01 seconds is a common value. The sleep delay will be at least as long - as specified. It might be longer depending on factors such as server load. - In particular, pg_sleep_until is not guaranteed to - wake up exactly at the specified time, but it will not wake up any earlier. - - - - - - Make sure that your session does not hold more locks than necessary - when calling pg_sleep or its variants. Otherwise - other sessions might have to wait for your sleeping process, slowing down - the entire system. - - - - -
- - - - Enum Support Functions - - - For enum types (described in ), - there are several functions that allow cleaner programming without - hard-coding particular values of an enum type. - These are listed in . The examples - assume an enum type created as: - - -CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple'); - - - - - - Enum Support Functions - - - - - Function - - - Description - - - Example(s) - - - - - - - - - enum_first - - enum_first ( anyenum ) - anyenum - - - Returns the first value of the input enum type. - - - enum_first(null::rainbow) - red - - - - - - enum_last - - enum_last ( anyenum ) - anyenum - - - Returns the last value of the input enum type. - - - enum_last(null::rainbow) - purple - - - - - - enum_range - - enum_range ( anyenum ) - anyarray - - - Returns all values of the input enum type in an ordered array. - - - enum_range(null::rainbow) - {red,orange,yellow,&zwsp;green,blue,purple} - - - - - enum_range ( anyenum, anyenum ) - anyarray - - - Returns the range between the two given enum values, as an ordered - array. The values must be from the same enum type. If the first - parameter is null, the result will start with the first value of - the enum type. - If the second parameter is null, the result will end with the last - value of the enum type. - - - enum_range('orange'::rainbow, 'green'::rainbow) - {orange,yellow,green} - - - enum_range(NULL, 'green'::rainbow) - {red,orange,&zwsp;yellow,green} - - - enum_range('orange'::rainbow, NULL) - {orange,yellow,green,&zwsp;blue,purple} - - - - -
- - - Notice that except for the two-argument form of enum_range, - these functions disregard the specific value passed to them; they care - only about its declared data type. Either null or a specific value of - the type can be passed, with the same result. It is more common to - apply these functions to a table column or function argument than to - a hardwired type name as used in the examples. - -
- - - Geometric Functions and Operators - - - The geometric types point, box, - lseg, line, path, - polygon, and circle have a large set of - native support functions and operators, shown in , , and . - - - - Geometric Operators - - - - - Operator - - - Description - - - Example(s) - - - - - - - - geometric_type + point - geometric_type - - - Adds the coordinates of the second point to those of each - point of the first argument, thus performing translation. - Available for point, box, path, - circle. - - - box '(1,1),(0,0)' + point '(2,0)' - (3,1),(2,0) - - - - - - path + path - path - - - Concatenates two open paths (returns NULL if either path is closed). - - - path '[(0,0),(1,1)]' + path '[(2,2),(3,3),(4,4)]' - [(0,0),(1,1),(2,2),(3,3),(4,4)] - - - - - - geometric_type - point - geometric_type - - - Subtracts the coordinates of the second point from those - of each point of the first argument, thus performing translation. - Available for point, box, path, - circle. - - - box '(1,1),(0,0)' - point '(2,0)' - (-1,1),(-2,0) - - - - - - geometric_type * point - geometric_type - - - Multiplies each point of the first argument by the second - point (treating a point as being a complex number - represented by real and imaginary parts, and performing standard - complex multiplication). If one interprets - the second point as a vector, this is equivalent to - scaling the object's size and distance from the origin by the length - of the vector, and rotating it counterclockwise around the origin by - the vector's angle from the x axis. - Available for point, box,Rotating a - box with these operators only moves its corner points: the box is - still considered to have sides parallel to the axes. Hence the box's - size is not preserved, as a true rotation would do. - path, circle. - - - path '((0,0),(1,0),(1,1))' * point '(3.0,0)' - ((0,0),(3,0),(3,3)) - - - path '((0,0),(1,0),(1,1))' * point(cosd(45), sind(45)) - ((0,0),&zwsp;(0.7071067811865475,0.7071067811865475),&zwsp;(0,1.414213562373095)) - - - - - - geometric_type / point - geometric_type - - - Divides each point of the first argument by the second - point (treating a point as being a complex number - represented by real and imaginary parts, and performing standard - complex division). If one interprets - the second point as a vector, this is equivalent to - scaling the object's size and distance from the origin down by the - length of the vector, and rotating it clockwise around the origin by - the vector's angle from the x axis. - Available for point, box, path, - circle. - - - path '((0,0),(1,0),(1,1))' / point '(2.0,0)' - ((0,0),(0.5,0),(0.5,0.5)) - - - path '((0,0),(1,0),(1,1))' / point(cosd(45), sind(45)) - ((0,0),&zwsp;(0.7071067811865476,-0.7071067811865476),&zwsp;(1.4142135623730951,0)) - - - - - - @-@ geometric_type - double precision - - - Computes the total length. - Available for lseg, path. - - - @-@ path '[(0,0),(1,0),(1,1)]' - 2 - - - - - - @@ geometric_type - point - - - Computes the center point. - Available for box, lseg, - polygon, circle. - - - @@ box '(2,2),(0,0)' - (1,1) - - - - - - # geometric_type - integer - - - Returns the number of points. - Available for path, polygon. - - - # path '((1,0),(0,1),(-1,0))' - 3 - - - - - - geometric_type # geometric_type - point - - - Computes the point of intersection, or NULL if there is none. - Available for lseg, line. - - - lseg '[(0,0),(1,1)]' # lseg '[(1,0),(0,1)]' - (0.5,0.5) - - - - - - box # box - box - - - Computes the intersection of two boxes, or NULL if there is none. - - - box '(2,2),(-1,-1)' # box '(1,1),(-2,-2)' - (1,1),(-1,-1) - - - - - - geometric_type ## geometric_type - point - - - Computes the closest point to the first object on the second object. - Available for these pairs of types: - (point, box), - (point, lseg), - (point, line), - (lseg, box), - (lseg, lseg), - (line, lseg). - - - point '(0,0)' ## lseg '[(2,0),(0,2)]' - (1,1) - - - - - - geometric_type <-> geometric_type - double precision - - - Computes the distance between the objects. - Available for all seven geometric types, for all combinations - of point with another geometric type, and for - these additional pairs of types: - (box, lseg), - (lseg, line), - (polygon, circle) - (and the commutator cases). - - - circle '<(0,0),1>' <-> circle '<(5,0),1>' - 3 - - - - - - geometric_type @> geometric_type - boolean - - - Does first object contain second? - Available for these pairs of types: - (box, point), - (box, box), - (path, point), - (polygon, point), - (polygon, polygon), - (circle, point), - (circle, circle). - - - circle '<(0,0),2>' @> point '(1,1)' - t - - - - - - geometric_type <@ geometric_type - boolean - - - Is first object contained in or on second? - Available for these pairs of types: - (point, box), - (point, lseg), - (point, line), - (point, path), - (point, polygon), - (point, circle), - (box, box), - (lseg, box), - (lseg, line), - (polygon, polygon), - (circle, circle). - - - point '(1,1)' <@ circle '<(0,0),2>' - t - - - - - - geometric_type && geometric_type - boolean - - - Do these objects overlap? (One point in common makes this true.) - Available for box, polygon, - circle. - - - box '(1,1),(0,0)' && box '(2,2),(0,0)' - t - - - - - - geometric_type << geometric_type - boolean - - - Is first object strictly left of second? - Available for point, box, - polygon, circle. - - - circle '<(0,0),1>' << circle '<(5,0),1>' - t - - - - - - geometric_type >> geometric_type - boolean - - - Is first object strictly right of second? - Available for point, box, - polygon, circle. - - - circle '<(5,0),1>' >> circle '<(0,0),1>' - t - - - - - - geometric_type &< geometric_type - boolean - - - Does first object not extend to the right of second? - Available for box, polygon, - circle. - - - box '(1,1),(0,0)' &< box '(2,2),(0,0)' - t - - - - - - geometric_type &> geometric_type - boolean - - - Does first object not extend to the left of second? - Available for box, polygon, - circle. - - - box '(3,3),(0,0)' &> box '(2,2),(0,0)' - t - - - - - - geometric_type <<| geometric_type - boolean - - - Is first object strictly below second? - Available for point, box, polygon, - circle. - - - box '(3,3),(0,0)' <<| box '(5,5),(3,4)' - t - - - - - - geometric_type |>> geometric_type - boolean - - - Is first object strictly above second? - Available for point, box, polygon, - circle. - - - box '(5,5),(3,4)' |>> box '(3,3),(0,0)' - t - - - - - - geometric_type &<| geometric_type - boolean - - - Does first object not extend above second? - Available for box, polygon, - circle. - - - box '(1,1),(0,0)' &<| box '(2,2),(0,0)' - t - - - - - - geometric_type |&> geometric_type - boolean - - - Does first object not extend below second? - Available for box, polygon, - circle. - - - box '(3,3),(0,0)' |&> box '(2,2),(0,0)' - t - - - - - - box <^ box - boolean - - - Is first object below second (allows edges to touch)? - - - box '((1,1),(0,0))' <^ box '((2,2),(1,1))' - t - - - - - - box >^ box - boolean - - - Is first object above second (allows edges to touch)? - - - box '((2,2),(1,1))' >^ box '((1,1),(0,0))' - t - - - - - - geometric_type ?# geometric_type - boolean - - - Do these objects intersect? - Available for these pairs of types: - (box, box), - (lseg, box), - (lseg, lseg), - (lseg, line), - (line, box), - (line, line), - (path, path). - - - lseg '[(-1,0),(1,0)]' ?# box '(2,2),(-2,-2)' - t - - - - - - ?- line - boolean - - - ?- lseg - boolean - - - Is line horizontal? - - - ?- lseg '[(-1,0),(1,0)]' - t - - - - - - point ?- point - boolean - - - Are points horizontally aligned (that is, have same y coordinate)? - - - point '(1,0)' ?- point '(0,0)' - t - - - - - - ?| line - boolean - - - ?| lseg - boolean - - - Is line vertical? - - - ?| lseg '[(-1,0),(1,0)]' - f - - - - - - point ?| point - boolean - - - Are points vertically aligned (that is, have same x coordinate)? - - - point '(0,1)' ?| point '(0,0)' - t - - - - - - line ?-| line - boolean - - - lseg ?-| lseg - boolean - - - Are lines perpendicular? - - - lseg '[(0,0),(0,1)]' ?-| lseg '[(0,0),(1,0)]' - t - - - - - - line ?|| line - boolean - - - lseg ?|| lseg - boolean - - - Are lines parallel? - - - lseg '[(-1,0),(1,0)]' ?|| lseg '[(-1,2),(1,2)]' - t - - - - - - geometric_type ~= geometric_type - boolean - - - Are these objects the same? - Available for point, box, - polygon, circle. - - - polygon '((0,0),(1,1))' ~= polygon '((1,1),(0,0))' - t - - - - -
- - - - Note that the same as operator, ~=, - represents the usual notion of equality for the point, - box, polygon, and circle types. - Some of the geometric types also have an = operator, but - = compares for equal areas only. - The other scalar comparison operators (<= and so - on), where available for these types, likewise compare areas. - - - - - - Before PostgreSQL 14, the point - is strictly below/above comparison operators point - <<| point and point - |>> point were respectively - called <^ and >^. These - names are still available, but are deprecated and will eventually be - removed. - - - - - Geometric Functions - - - - - Function - - - Description - - - Example(s) - - - - - - - - - area - - area ( geometric_type ) - double precision - - - Computes area. - Available for box, path, circle. - A path input must be closed, else NULL is returned. - Also, if the path is self-intersecting, the result may be - meaningless. - - - area(box '(2,2),(0,0)') - 4 - - - - - - - center - - center ( geometric_type ) - point - - - Computes center point. - Available for box, circle. - - - center(box '(1,2),(0,0)') - (0.5,1) - - - - - - - diagonal - - diagonal ( box ) - lseg - - - Extracts box's diagonal as a line segment - (same as lseg(box)). - - - diagonal(box '(1,2),(0,0)') - [(1,2),(0,0)] - - - - - - - diameter - - diameter ( circle ) - double precision - - - Computes diameter of circle. - - - diameter(circle '<(0,0),2>') - 4 - - - - - - - height - - height ( box ) - double precision - - - Computes vertical size of box. - - - height(box '(1,2),(0,0)') - 2 - - - - - - - isclosed - - isclosed ( path ) - boolean - - - Is path closed? - - - isclosed(path '((0,0),(1,1),(2,0))') - t - - - - - - - isopen - - isopen ( path ) - boolean - - - Is path open? - - - isopen(path '[(0,0),(1,1),(2,0)]') - t - - - - - - - length - - length ( geometric_type ) - double precision - - - Computes the total length. - Available for lseg, path. - - - length(path '((-1,0),(1,0))') - 4 - - - - - - - npoints - - npoints ( geometric_type ) - integer - - - Returns the number of points. - Available for path, polygon. - - - npoints(path '[(0,0),(1,1),(2,0)]') - 3 - - - - - - - pclose - - pclose ( path ) - path - - - Converts path to closed form. - - - pclose(path '[(0,0),(1,1),(2,0)]') - ((0,0),(1,1),(2,0)) - - - - - - - popen - - popen ( path ) - path - - - Converts path to open form. - - - popen(path '((0,0),(1,1),(2,0))') - [(0,0),(1,1),(2,0)] - - - - - - - radius - - radius ( circle ) - double precision - - - Computes radius of circle. - - - radius(circle '<(0,0),2>') - 2 - - - - - - - slope - - slope ( point, point ) - double precision - - - Computes slope of a line drawn through the two points. - - - slope(point '(0,0)', point '(2,1)') - 0.5 - - - - - - - width - - width ( box ) - double precision - - - Computes horizontal size of box. - - - width(box '(1,2),(0,0)') - 1 - - - - -
- - - Geometric Type Conversion Functions - - - - - Function - - - Description - - - Example(s) - - - - - - - - - box - - box ( circle ) - box - - - Computes box inscribed within the circle. - - - box(circle '<(0,0),2>') - (1.414213562373095,1.414213562373095),&zwsp;(-1.414213562373095,-1.414213562373095) - - - - - - box ( point ) - box - - - Converts point to empty box. - - - box(point '(1,0)') - (1,0),(1,0) - - - - - - box ( point, point ) - box - - - Converts any two corner points to box. - - - box(point '(0,1)', point '(1,0)') - (1,1),(0,0) - - - - - - box ( polygon ) - box - - - Computes bounding box of polygon. - - - box(polygon '((0,0),(1,1),(2,0))') - (2,1),(0,0) - - - - - - - bound_box - - bound_box ( box, box ) - box - - - Computes bounding box of two boxes. - - - bound_box(box '(1,1),(0,0)', box '(4,4),(3,3)') - (4,4),(0,0) - - - - - - - circle - - circle ( box ) - circle - - - Computes smallest circle enclosing box. - - - circle(box '(1,1),(0,0)') - <(0.5,0.5),0.7071067811865476> - - - - - - circle ( point, double precision ) - circle - - - Constructs circle from center and radius. - - - circle(point '(0,0)', 2.0) - <(0,0),2> - - - - - - circle ( polygon ) - circle - - - Converts polygon to circle. The circle's center is the mean of the - positions of the polygon's points, and the radius is the average - distance of the polygon's points from that center. - - - circle(polygon '((0,0),(1,3),(2,0))') - <(1,1),1.6094757082487299> - - - - - - - line - - line ( point, point ) - line - - - Converts two points to the line through them. - - - line(point '(-1,0)', point '(1,0)') - {0,-1,0} - - - - - - - lseg - - lseg ( box ) - lseg - - - Extracts box's diagonal as a line segment. - - - lseg(box '(1,0),(-1,0)') - [(1,0),(-1,0)] - - - - - - lseg ( point, point ) - lseg - - - Constructs line segment from two endpoints. - - - lseg(point '(-1,0)', point '(1,0)') - [(-1,0),(1,0)] - - - - - - - path - - path ( polygon ) - path - - - Converts polygon to a closed path with the same list of points. - - - path(polygon '((0,0),(1,1),(2,0))') - ((0,0),(1,1),(2,0)) - - - - - - - point - - point ( double precision, double precision ) - point - - - Constructs point from its coordinates. - - - point(23.4, -44.5) - (23.4,-44.5) - - - - - - point ( box ) - point - - - Computes center of box. - - - point(box '(1,0),(-1,0)') - (0,0) - - - - - - point ( circle ) - point - - - Computes center of circle. - - - point(circle '<(0,0),2>') - (0,0) - - - - - - point ( lseg ) - point - - - Computes center of line segment. - - - point(lseg '[(-1,0),(1,0)]') - (0,0) - - - - - - point ( polygon ) - point - - - Computes center of polygon (the mean of the - positions of the polygon's points). - - - point(polygon '((0,0),(1,1),(2,0))') - (1,0.3333333333333333) - - - - - - - polygon - - polygon ( box ) - polygon - - - Converts box to a 4-point polygon. - - - polygon(box '(1,1),(0,0)') - ((0,0),(0,1),(1,1),(1,0)) - - - - - - polygon ( circle ) - polygon - - - Converts circle to a 12-point polygon. - - - polygon(circle '<(0,0),2>') - ((-2,0),&zwsp;(-1.7320508075688774,0.9999999999999999),&zwsp;(-1.0000000000000002,1.7320508075688772),&zwsp;(-1.2246063538223773e-16,2),&zwsp;(0.9999999999999996,1.7320508075688774),&zwsp;(1.732050807568877,1.0000000000000007),&zwsp;(2,2.4492127076447545e-16),&zwsp;(1.7320508075688776,-0.9999999999999994),&zwsp;(1.0000000000000009,-1.7320508075688767),&zwsp;(3.673819061467132e-16,-2),&zwsp;(-0.9999999999999987,-1.732050807568878),&zwsp;(-1.7320508075688767,-1.0000000000000009)) - - - - - - polygon ( integer, circle ) - polygon - - - Converts circle to an n-point polygon. - - - polygon(4, circle '<(3,0),1>') - ((2,0),&zwsp;(3,1),&zwsp;(4,1.2246063538223773e-16),&zwsp;(3,-1)) - - - - - - polygon ( path ) - polygon - - - Converts closed path to a polygon with the same list of points. - - - polygon(path '((0,0),(1,1),(2,0))') - ((0,0),(1,1),(2,0)) - - - - - -
- - - It is possible to access the two component numbers of a point - as though the point were an array with indexes 0 and 1. For example, if - t.p is a point column then - SELECT p[0] FROM t retrieves the X coordinate and - UPDATE t SET p[1] = ... changes the Y coordinate. - In the same way, a value of type box or lseg can be treated - as an array of two point values. - - -
- - - - Network Address Functions and Operators - - - The IP network address types, cidr and inet, - support the usual comparison operators shown in - - as well as the specialized operators and functions shown in - and - . - - - - Any cidr value can be cast to inet implicitly; - therefore, the operators and functions shown below as operating on - inet also work on cidr values. (Where there are - separate functions for inet and cidr, it is - because the behavior should be different for the two cases.) - Also, it is permitted to cast an inet value - to cidr. When this is done, any bits to the right of the - netmask are silently zeroed to create a valid cidr value. - - - - IP Address Operators - - - - - Operator - - - Description - - - Example(s) - - - - - - - - inet << inet - boolean - - - Is subnet strictly contained by subnet? - This operator, and the next four, test for subnet inclusion. They - consider only the network parts of the two addresses (ignoring any - bits to the right of the netmasks) and determine whether one network - is identical to or a subnet of the other. - - - inet '192.168.1.5' << inet '192.168.1/24' - t - - - inet '192.168.0.5' << inet '192.168.1/24' - f - - - inet '192.168.1/24' << inet '192.168.1/24' - f - - - - - - inet <<= inet - boolean - - - Is subnet contained by or equal to subnet? - - - inet '192.168.1/24' <<= inet '192.168.1/24' - t - - - - - - inet >> inet - boolean - - - Does subnet strictly contain subnet? - - - inet '192.168.1/24' >> inet '192.168.1.5' - t - - - - - - inet >>= inet - boolean - - - Does subnet contain or equal subnet? - - - inet '192.168.1/24' >>= inet '192.168.1/24' - t - - - - - - inet && inet - boolean - - - Does either subnet contain or equal the other? - - - inet '192.168.1/24' && inet '192.168.1.80/28' - t - - - inet '192.168.1/24' && inet '192.168.2.0/28' - f - - - - - - ~ inet - inet - - - Computes bitwise NOT. - - - ~ inet '192.168.1.6' - 63.87.254.249 - - - - - - inet & inet - inet - - - Computes bitwise AND. - - - inet '192.168.1.6' & inet '0.0.0.255' - 0.0.0.6 - - - - - - inet | inet - inet - - - Computes bitwise OR. - - - inet '192.168.1.6' | inet '0.0.0.255' - 192.168.1.255 - - - - - - inet + bigint - inet - - - Adds an offset to an address. - - - inet '192.168.1.6' + 25 - 192.168.1.31 - - - - - - bigint + inet - inet - - - Adds an offset to an address. - - - 200 + inet '::ffff:fff0:1' - ::ffff:255.240.0.201 - - - - - - inet - bigint - inet - - - Subtracts an offset from an address. - - - inet '192.168.1.43' - 36 - 192.168.1.7 - - - - - - inet - inet - bigint - - - Computes the difference of two addresses. - - - inet '192.168.1.43' - inet '192.168.1.19' - 24 - - - inet '::1' - inet '::ffff:1' - -4294901760 - - - - -
- - - IP Address Functions - - - - - Function - - - Description - - - Example(s) - - - - - - - - - abbrev - - abbrev ( inet ) - text - - - Creates an abbreviated display format as text. - (The result is the same as the inet output function - produces; it is abbreviated only in comparison to the - result of an explicit cast to text, which for historical - reasons will never suppress the netmask part.) - - - abbrev(inet '10.1.0.0/32') - 10.1.0.0 - - - - - - abbrev ( cidr ) - text - - - Creates an abbreviated display format as text. - (The abbreviation consists of dropping all-zero octets to the right - of the netmask; more examples are in - .) - - - abbrev(cidr '10.1.0.0/16') - 10.1/16 - - - - - - - broadcast - - broadcast ( inet ) - inet - - - Computes the broadcast address for the address's network. - - - broadcast(inet '192.168.1.5/24') - 192.168.1.255/24 - - - - - - - family - - family ( inet ) - integer - - - Returns the address's family: 4 for IPv4, - 6 for IPv6. - - - family(inet '::1') - 6 - - - - - - - host - - host ( inet ) - text - - - Returns the IP address as text, ignoring the netmask. - - - host(inet '192.168.1.0/24') - 192.168.1.0 - - - - - - - hostmask - - hostmask ( inet ) - inet - - - Computes the host mask for the address's network. - - - hostmask(inet '192.168.23.20/30') - 0.0.0.3 - - - - - - - inet_merge - - inet_merge ( inet, inet ) - cidr - - - Computes the smallest network that includes both of the given networks. - - - inet_merge(inet '192.168.1.5/24', inet '192.168.2.5/24') - 192.168.0.0/22 - - - - - - - inet_same_family - - inet_same_family ( inet, inet ) - boolean - - - Tests whether the addresses belong to the same IP family. - - - inet_same_family(inet '192.168.1.5/24', inet '::1') - f - - - - - - - masklen - - masklen ( inet ) - integer - - - Returns the netmask length in bits. - - - masklen(inet '192.168.1.5/24') - 24 - - - - - - - netmask - - netmask ( inet ) - inet - - - Computes the network mask for the address's network. - - - netmask(inet '192.168.1.5/24') - 255.255.255.0 - - - - - - - network - - network ( inet ) - cidr - - - Returns the network part of the address, zeroing out - whatever is to the right of the netmask. - (This is equivalent to casting the value to cidr.) - - - network(inet '192.168.1.5/24') - 192.168.1.0/24 - - - - - - - set_masklen - - set_masklen ( inet, integer ) - inet - - - Sets the netmask length for an inet value. - The address part does not change. - - - set_masklen(inet '192.168.1.5/24', 16) - 192.168.1.5/16 - - - - - - set_masklen ( cidr, integer ) - cidr - - - Sets the netmask length for a cidr value. - Address bits to the right of the new netmask are set to zero. - - - set_masklen(cidr '192.168.1.0/24', 16) - 192.168.0.0/16 - - - - - - - text - - text ( inet ) - text - - - Returns the unabbreviated IP address and netmask length as text. - (This has the same result as an explicit cast to text.) - - - text(inet '192.168.1.5') - 192.168.1.5/32 - - - - -
- - - - The abbrev, host, - and text functions are primarily intended to offer - alternative display formats for IP addresses. - - - - - The MAC address types, macaddr and macaddr8, - support the usual comparison operators shown in - - as well as the specialized functions shown in - . - In addition, they support the bitwise logical operators - ~, & and | - (NOT, AND and OR), just as shown above for IP addresses. - - - - MAC Address Functions - - - - - Function - - - Description - - - Example(s) - - - - - - - - - trunc - - trunc ( macaddr ) - macaddr - - - Sets the last 3 bytes of the address to zero. The remaining prefix - can be associated with a particular manufacturer (using data not - included in PostgreSQL). - - - trunc(macaddr '12:34:56:78:90:ab') - 12:34:56:00:00:00 - - - - - - trunc ( macaddr8 ) - macaddr8 - - - Sets the last 5 bytes of the address to zero. The remaining prefix - can be associated with a particular manufacturer (using data not - included in PostgreSQL). - - - trunc(macaddr8 '12:34:56:78:90:ab:cd:ef') - 12:34:56:00:00:00:00:00 - - - - - - - macaddr8_set7bit - - macaddr8_set7bit ( macaddr8 ) - macaddr8 - - - Sets the 7th bit of the address to one, creating what is known as - modified EUI-64, for inclusion in an IPv6 address. - - - macaddr8_set7bit(macaddr8 '00:34:56:ab:cd:ef') - 02:34:56:ff:fe:ab:cd:ef - - - - -
- -
- - - - Text Search Functions and Operators - - - full text search - functions and operators - - - - text search - functions and operators - - - - , - and - - summarize the functions and operators that are provided - for full text searching. See for a detailed - explanation of PostgreSQL's text search - facility. - - - - Text Search Operators - - - - - Operator - - - Description - - - Example(s) - - - - - - - - tsvector @@ tsquery - boolean - - - tsquery @@ tsvector - boolean - - - Does tsvector match tsquery? - (The arguments can be given in either order.) - - - to_tsvector('fat cats ate rats') @@ to_tsquery('cat & rat') - t - - - - - - text @@ tsquery - boolean - - - Does text string, after implicit invocation - of to_tsvector(), match tsquery? - - - 'fat cats ate rats' @@ to_tsquery('cat & rat') - t - - - - - - tsvector || tsvector - tsvector - - - Concatenates two tsvectors. If both inputs contain - lexeme positions, the second input's positions are adjusted - accordingly. - - - 'a:1 b:2'::tsvector || 'c:1 d:2 b:3'::tsvector - 'a':1 'b':2,5 'c':3 'd':4 - - - - - - tsquery && tsquery - tsquery - - - ANDs two tsquerys together, producing a query that - matches documents that match both input queries. - - - 'fat | rat'::tsquery && 'cat'::tsquery - ( 'fat' | 'rat' ) & 'cat' - - - - - - tsquery || tsquery - tsquery - - - ORs two tsquerys together, producing a query that - matches documents that match either input query. - - - 'fat | rat'::tsquery || 'cat'::tsquery - 'fat' | 'rat' | 'cat' - - - - - - !! tsquery - tsquery - - - Negates a tsquery, producing a query that matches - documents that do not match the input query. - - - !! 'cat'::tsquery - !'cat' - - - - - - tsquery <-> tsquery - tsquery - - - Constructs a phrase query, which matches if the two input queries - match at successive lexemes. - - - to_tsquery('fat') <-> to_tsquery('rat') - 'fat' <-> 'rat' - - - - - - tsquery @> tsquery - boolean - - - Does first tsquery contain the second? (This considers - only whether all the lexemes appearing in one query appear in the - other, ignoring the combining operators.) - - - 'cat'::tsquery @> 'cat & rat'::tsquery - f - - - - - - tsquery <@ tsquery - boolean - - - Is first tsquery contained in the second? (This - considers only whether all the lexemes appearing in one query appear - in the other, ignoring the combining operators.) - - - 'cat'::tsquery <@ 'cat & rat'::tsquery - t - - - 'cat'::tsquery <@ '!cat & rat'::tsquery - t - - - - -
- - - In addition to these specialized operators, the usual comparison - operators shown in are - available for types tsvector and tsquery. - These are not very - useful for text searching but allow, for example, unique indexes to be - built on columns of these types. - - - - Text Search Functions - - - - - Function - - - Description - - - Example(s) - - - - - - - - - array_to_tsvector - - array_to_tsvector ( text[] ) - tsvector - - - Converts an array of text strings to a tsvector. - The given strings are used as lexemes as-is, without further - processing. Array elements must not be empty strings - or NULL. - - - array_to_tsvector('{fat,cat,rat}'::text[]) - 'cat' 'fat' 'rat' - - - - - - - get_current_ts_config - - get_current_ts_config ( ) - regconfig - - - Returns the OID of the current default text search configuration - (as set by ). - - - get_current_ts_config() - english - - - - - - - length - - length ( tsvector ) - integer - - - Returns the number of lexemes in the tsvector. - - - length('fat:2,4 cat:3 rat:5A'::tsvector) - 3 - - - - - - - numnode - - numnode ( tsquery ) - integer - - - Returns the number of lexemes plus operators in - the tsquery. - - - numnode('(fat & rat) | cat'::tsquery) - 5 - - - - - - - plainto_tsquery - - plainto_tsquery ( - config regconfig, - query text ) - tsquery - - - Converts text to a tsquery, normalizing words according to - the specified or default configuration. Any punctuation in the string - is ignored (it does not determine query operators). The resulting - query matches documents containing all non-stopwords in the text. - - - plainto_tsquery('english', 'The Fat Rats') - 'fat' & 'rat' - - - - - - - phraseto_tsquery - - phraseto_tsquery ( - config regconfig, - query text ) - tsquery - - - Converts text to a tsquery, normalizing words according to - the specified or default configuration. Any punctuation in the string - is ignored (it does not determine query operators). The resulting - query matches phrases containing all non-stopwords in the text. - - - phraseto_tsquery('english', 'The Fat Rats') - 'fat' <-> 'rat' - - - phraseto_tsquery('english', 'The Cat and Rats') - 'cat' <2> 'rat' - - - - - - - websearch_to_tsquery - - websearch_to_tsquery ( - config regconfig, - query text ) - tsquery - - - Converts text to a tsquery, normalizing words according - to the specified or default configuration. Quoted word sequences are - converted to phrase tests. The word or is understood - as producing an OR operator, and a dash produces a NOT operator; - other punctuation is ignored. - This approximates the behavior of some common web search tools. - - - websearch_to_tsquery('english', '"fat rat" or cat dog') - 'fat' <-> 'rat' | 'cat' & 'dog' - - - - - - - querytree - - querytree ( tsquery ) - text - - - Produces a representation of the indexable portion of - a tsquery. A result that is empty or - just T indicates a non-indexable query. - - - querytree('foo & ! bar'::tsquery) - 'foo' - - - - - - - setweight - - setweight ( vector tsvector, weight "char" ) - tsvector - - - Assigns the specified weight to each element - of the vector. - - - setweight('fat:2,4 cat:3 rat:5B'::tsvector, 'A') - 'cat':3A 'fat':2A,4A 'rat':5A - - - - - - - setweight - setweight for specific lexeme(s) - - setweight ( vector tsvector, weight "char", lexemes text[] ) - tsvector - - - Assigns the specified weight to elements - of the vector that are listed - in lexemes. - The strings in lexemes are taken as lexemes - as-is, without further processing. Strings that do not match any - lexeme in vector are ignored. - - - setweight('fat:2,4 cat:3 rat:5,6B'::tsvector, 'A', '{cat,rat}') - 'cat':3A 'fat':2,4 'rat':5A,6A - - - - - - - strip - - strip ( tsvector ) - tsvector - - - Removes positions and weights from the tsvector. - - - strip('fat:2,4 cat:3 rat:5A'::tsvector) - 'cat' 'fat' 'rat' - - - - - - - to_tsquery - - to_tsquery ( - config regconfig, - query text ) - tsquery - - - Converts text to a tsquery, normalizing words according to - the specified or default configuration. The words must be combined - by valid tsquery operators. - - - to_tsquery('english', 'The & Fat & Rats') - 'fat' & 'rat' - - - - - - - to_tsvector - - to_tsvector ( - config regconfig, - document text ) - tsvector - - - Converts text to a tsvector, normalizing words according - to the specified or default configuration. Position information is - included in the result. - - - to_tsvector('english', 'The Fat Rats') - 'fat':2 'rat':3 - - - - - - to_tsvector ( - config regconfig, - document json ) - tsvector - - - to_tsvector ( - config regconfig, - document jsonb ) - tsvector - - - Converts each string value in the JSON document to - a tsvector, normalizing words according to the specified - or default configuration. The results are then concatenated in - document order to produce the output. Position information is - generated as though one stopword exists between each pair of string - values. (Beware that document order of the fields of a - JSON object is implementation-dependent when the input - is jsonb; observe the difference in the examples.) - - - to_tsvector('english', '{"aa": "The Fat Rats", "b": "dog"}'::json) - 'dog':5 'fat':2 'rat':3 - - - to_tsvector('english', '{"aa": "The Fat Rats", "b": "dog"}'::jsonb) - 'dog':1 'fat':4 'rat':5 - - - - - - - json_to_tsvector - - json_to_tsvector ( - config regconfig, - document json, - filter jsonb ) - tsvector - - - - jsonb_to_tsvector - - jsonb_to_tsvector ( - config regconfig, - document jsonb, - filter jsonb ) - tsvector - - - Selects each item in the JSON document that is requested by - the filter and converts each one to - a tsvector, normalizing words according to the specified - or default configuration. The results are then concatenated in - document order to produce the output. Position information is - generated as though one stopword exists between each pair of selected - items. (Beware that document order of the fields of a - JSON object is implementation-dependent when the input - is jsonb.) - The filter must be a jsonb - array containing zero or more of these keywords: - "string" (to include all string values), - "numeric" (to include all numeric values), - "boolean" (to include all boolean values), - "key" (to include all keys), or - "all" (to include all the above). - As a special case, the filter can also be a - simple JSON value that is one of these keywords. - - - json_to_tsvector('english', '{"a": "The Fat Rats", "b": 123}'::json, '["string", "numeric"]') - '123':5 'fat':2 'rat':3 - - - json_to_tsvector('english', '{"cat": "The Fat Rats", "dog": 123}'::json, '"all"') - '123':9 'cat':1 'dog':7 'fat':4 'rat':5 - - - - - - - ts_delete - - ts_delete ( vector tsvector, lexeme text ) - tsvector - - - Removes any occurrence of the given lexeme - from the vector. - The lexeme string is treated as a lexeme as-is, - without further processing. - - - ts_delete('fat:2,4 cat:3 rat:5A'::tsvector, 'fat') - 'cat':3 'rat':5A - - - - - - ts_delete ( vector tsvector, lexemes text[] ) - tsvector - - - Removes any occurrences of the lexemes - in lexemes - from the vector. - The strings in lexemes are taken as lexemes - as-is, without further processing. Strings that do not match any - lexeme in vector are ignored. - - - ts_delete('fat:2,4 cat:3 rat:5A'::tsvector, ARRAY['fat','rat']) - 'cat':3 - - - - - - - ts_filter - - ts_filter ( vector tsvector, weights "char"[] ) - tsvector - - - Selects only elements with the given weights - from the vector. - - - ts_filter('fat:2,4 cat:3b,7c rat:5A'::tsvector, '{a,b}') - 'cat':3B 'rat':5A - - - - - - - ts_headline - - ts_headline ( - config regconfig, - document text, - query tsquery - , options text ) - text - - - Displays, in an abbreviated form, the match(es) for - the query in - the document, which must be raw text not - a tsvector. Words in the document are normalized - according to the specified or default configuration before matching to - the query. Use of this function is discussed in - , which also describes the - available options. - - - ts_headline('The fat cat ate the rat.', 'cat') - The fat <b>cat</b> ate the rat. - - - - - - ts_headline ( - config regconfig, - document json, - query tsquery - , options text ) - text - - - ts_headline ( - config regconfig, - document jsonb, - query tsquery - , options text ) - text - - - Displays, in an abbreviated form, match(es) for - the query that occur in string values - within the JSON document. - See for more details. - - - ts_headline('{"cat":"raining cats and dogs"}'::jsonb, 'cat') - {"cat": "raining <b>cats</b> and dogs"} - - - - - - - ts_rank - - ts_rank ( - weights real[], - vector tsvector, - query tsquery - , normalization integer ) - real - - - Computes a score showing how well - the vector matches - the query. See - for details. - - - ts_rank(to_tsvector('raining cats and dogs'), 'cat') - 0.06079271 - - - - - - - ts_rank_cd - - ts_rank_cd ( - weights real[], - vector tsvector, - query tsquery - , normalization integer ) - real - - - Computes a score showing how well - the vector matches - the query, using a cover density - algorithm. See for details. - - - ts_rank_cd(to_tsvector('raining cats and dogs'), 'cat') - 0.1 - - - - - - - ts_rewrite - - ts_rewrite ( query tsquery, - target tsquery, - substitute tsquery ) - tsquery - - - Replaces occurrences of target - with substitute - within the query. - See for details. - - - ts_rewrite('a & b'::tsquery, 'a'::tsquery, 'foo|bar'::tsquery) - 'b' & ( 'foo' | 'bar' ) - - - - - - ts_rewrite ( query tsquery, - select text ) - tsquery - - - Replaces portions of the query according to - target(s) and substitute(s) obtained by executing - a SELECT command. - See for details. - - - SELECT ts_rewrite('a & b'::tsquery, 'SELECT t,s FROM aliases') - 'b' & ( 'foo' | 'bar' ) - - - - - - - tsquery_phrase - - tsquery_phrase ( query1 tsquery, query2 tsquery ) - tsquery - - - Constructs a phrase query that searches - for matches of query1 - and query2 at successive lexemes (same - as <-> operator). - - - tsquery_phrase(to_tsquery('fat'), to_tsquery('cat')) - 'fat' <-> 'cat' - - - - - - tsquery_phrase ( query1 tsquery, query2 tsquery, distance integer ) - tsquery - - - Constructs a phrase query that searches - for matches of query1 and - query2 that occur exactly - distance lexemes apart. - - - tsquery_phrase(to_tsquery('fat'), to_tsquery('cat'), 10) - 'fat' <10> 'cat' - - - - - - - tsvector_to_array - - tsvector_to_array ( tsvector ) - text[] - - - Converts a tsvector to an array of lexemes. - - - tsvector_to_array('fat:2,4 cat:3 rat:5A'::tsvector) - {cat,fat,rat} - - - - - - - unnest - for tsvector - - unnest ( tsvector ) - setof record - ( lexeme text, - positions smallint[], - weights text ) - - - Expands a tsvector into a set of rows, one per lexeme. - - - select * from unnest('cat:3 fat:2,4 rat:5A'::tsvector) - - - lexeme | positions | weights ---------+-----------+--------- - cat | {3} | {D} - fat | {2,4} | {D,D} - rat | {5} | {A} - - - - - -
- - - - All the text search functions that accept an optional regconfig - argument will use the configuration specified by - - when that argument is omitted. - - - - - The functions in - - are listed separately because they are not usually used in everyday text - searching operations. They are primarily helpful for development and - debugging of new text search configurations. - - - - Text Search Debugging Functions - - - - - Function - - - Description - - - Example(s) - - - - - - - - - ts_debug - - ts_debug ( - config regconfig, - document text ) - setof record - ( alias text, - description text, - token text, - dictionaries regdictionary[], - dictionary regdictionary, - lexemes text[] ) - - - Extracts and normalizes tokens from - the document according to the specified or - default text search configuration, and returns information about how - each token was processed. - See for details. - - - ts_debug('english', 'The Brightest supernovaes') - (asciiword,"Word, all ASCII",The,{english_stem},english_stem,{}) ... - - - - - - - ts_lexize - - ts_lexize ( dict regdictionary, token text ) - text[] - - - Returns an array of replacement lexemes if the input token is known to - the dictionary, or an empty array if the token is known to the - dictionary but it is a stop word, or NULL if it is not a known word. - See for details. - - - ts_lexize('english_stem', 'stars') - {star} - - - - - - - ts_parse - - ts_parse ( parser_name text, - document text ) - setof record - ( tokid integer, - token text ) - - - Extracts tokens from the document using the - named parser. - See for details. - - - ts_parse('default', 'foo - bar') - (1,foo) ... - - - - - - ts_parse ( parser_oid oid, - document text ) - setof record - ( tokid integer, - token text ) - - - Extracts tokens from the document using a - parser specified by OID. - See for details. - - - ts_parse(3722, 'foo - bar') - (1,foo) ... - - - - - - - ts_token_type - - ts_token_type ( parser_name text ) - setof record - ( tokid integer, - alias text, - description text ) - - - Returns a table that describes each type of token the named parser can - recognize. - See for details. - - - ts_token_type('default') - (1,asciiword,"Word, all ASCII") ... - - - - - - ts_token_type ( parser_oid oid ) - setof record - ( tokid integer, - alias text, - description text ) - - - Returns a table that describes each type of token a parser specified - by OID can recognize. - See for details. - - - ts_token_type(3722) - (1,asciiword,"Word, all ASCII") ... - - - - - - - ts_stat - - ts_stat ( sqlquery text - , weights text ) - setof record - ( word text, - ndoc integer, - nentry integer ) - - - Executes the sqlquery, which must return a - single tsvector column, and returns statistics about each - distinct lexeme contained in the data. - See for details. - - - ts_stat('SELECT vector FROM apod') - (foo,10,15) ... - - - - -
- -
- - - UUID Functions - - - UUID - generating - - - - gen_random_uuid - - - - uuidv4 - - - - uuidv7 - - - - uuid_extract_timestamp - - - - uuid_extract_version - - - - shows the PostgreSQL - functions that can be used to generate UUIDs. - - - - <acronym>UUID</acronym> Generation Functions - - - - - - Function - - - Description - - - Example(s) - - - - - - - - - - gen_random_uuid - uuid - - - uuidv4 - uuid - - - Generate a version 4 (random) UUID. - - - gen_random_uuid() - 5b30857f-0bfa-48b5-ac0b-5c64e28078d1 - - - uuidv4() - b42410ee-132f-42ee-9e4f-09a6485c95b8 - - - - - - - uuidv7 - ( shift interval ) - uuid - - - Generate a version 7 (time-ordered) UUID. The timestamp is computed using UNIX timestamp - with millisecond precision + sub-millisecond timestamp + random. The optional parameter - shift will shift the computed timestamp by the given interval. - - - uuidv7() - 019535d9-3df7-79fb-b466-fa907fa17f9e - - - - - -
- - - - The module provides additional functions that - implement other standard algorithms for generating UUIDs. - - - - - shows the PostgreSQL - functions that can be used to extract information from UUIDs. - - - - <acronym>UUID</acronym> Extraction Functions - - - - - - Function - - - Description - - - Example(s) - - - - - - - - - - uuid_extract_timestamp - ( uuid ) - timestamp with time zone - - - Extracts a timestamp with time zone from UUID - version 1 and 7. For other versions, this function returns null. Note that - the extracted timestamp is not necessarily exactly equal to the time the - UUID was generated; this depends on the implementation that generated the - UUID. - - - uuid_extract_timestamp('019535d9-3df7-79fb-b466-&zwsp;fa907fa17f9e'::uuid) - 2025-02-23 21:46:24.503-05 - - - - - - - uuid_extract_version - ( uuid ) - smallint - - - Extracts the version from a UUID of the variant described by - RFC 9562. For - other variants, this function returns null. For example, for a UUID - generated by gen_random_uuid, this function will - return 4. - - - uuid_extract_version('41db1265-8bc1-4ab3-992f-&zwsp;885799a4af1d'::uuid) - 4 - - - uuid_extract_version('019535d9-3df7-79fb-b466-&zwsp;fa907fa17f9e'::uuid) - 7 - - - - - -
- - - PostgreSQL also provides the usual comparison - operators shown in for - UUIDs. - - - See for details on the data type - uuid in PostgreSQL. - -
- - - - XML Functions - - - XML Functions - - - - The functions and function-like expressions described in this - section operate on values of type xml. See for information about the xml - type. The function-like expressions xmlparse - and xmlserialize for converting to and from - type xml are documented there, not in this section. - - - - Use of most of these functions - requires PostgreSQL to have been built - with configure --with-libxml. - - - - Producing XML Content - - - A set of functions and function-like expressions is available for - producing XML content from SQL data. As such, they are - particularly suitable for formatting query results into XML - documents for processing in client applications. - - - - <literal>xmltext</literal> - - - xmltext - - - -xmltext ( text ) xml - - - - The function xmltext returns an XML value with a single - text node containing the input argument as its content. Predefined entities - like ampersand (), left and right angle brackets - (]]>), and quotation marks () - are escaped. - - - - Example: -'); - xmltext -------------------------- - < foo & bar > -]]> - - - - - <literal>xmlcomment</literal> - - - xmlcomment - - - -xmlcomment ( text ) xml - - - - The function xmlcomment creates an XML value - containing an XML comment with the specified text as content. - The text cannot contain -- or end with a - -, otherwise the resulting construct - would not be a valid XML comment. - If the argument is null, the result is null. - - - - Example: - -]]> - - - - - <literal>xmlconcat</literal> - - - xmlconcat - - - -xmlconcat ( xml , ... ) xml - - - - The function xmlconcat concatenates a list - of individual XML values to create a single value containing an - XML content fragment. Null values are omitted; the result is - only null if there are no nonnull arguments. - - - - Example: -', 'foo'); - - xmlconcat ----------------------- - foo -]]> - - - - XML declarations, if present, are combined as follows. If all - argument values have the same XML version declaration, that - version is used in the result, else no version is used. If all - argument values have the standalone declaration value - yes, then that value is used in the result. If - all argument values have a standalone declaration value and at - least one is no, then that is used in the result. - Else the result will have no standalone declaration. If the - result is determined to require a standalone declaration but no - version declaration, a version declaration with version 1.0 will - be used because XML requires an XML declaration to contain a - version declaration. Encoding declarations are ignored and - removed in all cases. - - - - Example: -', ''); - - xmlconcat ------------------------------------ - -]]> - - - - - <literal>xmlelement</literal> - - - xmlelement - - - -xmlelement ( NAME name , XMLATTRIBUTES ( attvalue AS attname , ... ) , content , ... ) xml - - - - The xmlelement expression produces an XML - element with the given name, attributes, and content. - The name - and attname items shown in the syntax are - simple identifiers, not values. The attvalue - and content items are expressions, which can - yield any PostgreSQL data type. The - argument(s) within XMLATTRIBUTES generate attributes - of the XML element; the content value(s) are - concatenated to form its content. - - - - Examples: - - -SELECT xmlelement(name foo, xmlattributes('xyz' as bar)); - - xmlelement ------------------- - - -SELECT xmlelement(name foo, xmlattributes(current_date as bar), 'cont', 'ent'); - - xmlelement -------------------------------------- - content -]]> - - - - Element and attribute names that are not valid XML names are - escaped by replacing the offending characters by the sequence - _xHHHH_, where - HHHH is the character's Unicode - codepoint in hexadecimal notation. For example: - -]]> - - - - An explicit attribute name need not be specified if the attribute - value is a column reference, in which case the column's name will - be used as the attribute name by default. In other cases, the - attribute must be given an explicit name. So this example is - valid: - -CREATE TABLE test (a xml, b xml); -SELECT xmlelement(name test, xmlattributes(a, b)) FROM test; - - But these are not: - -SELECT xmlelement(name test, xmlattributes('constant'), a, b) FROM test; -SELECT xmlelement(name test, xmlattributes(func(a, b))) FROM test; - - - - - Element content, if specified, will be formatted according to - its data type. If the content is itself of type xml, - complex XML documents can be constructed. For example: - -]]> - - Content of other types will be formatted into valid XML character - data. This means in particular that the characters <, >, - and & will be converted to entities. Binary data (data type - bytea) will be represented in base64 or hex - encoding, depending on the setting of the configuration parameter - . The particular behavior for - individual data types is expected to evolve in order to align the - PostgreSQL mappings with those specified in SQL:2006 and later, - as discussed in . - - - - - <literal>xmlforest</literal> - - - xmlforest - - - -xmlforest ( content AS name , ... ) xml - - - - The xmlforest expression produces an XML - forest (sequence) of elements using the given names and content. - As for xmlelement, - each name must be a simple identifier, while - the content expressions can have any data - type. - - - - Examples: - -SELECT xmlforest('abc' AS foo, 123 AS bar); - - xmlforest ------------------------------- - <foo>abc</foo><bar>123</bar> - - -SELECT xmlforest(table_name, column_name) -FROM information_schema.columns -WHERE table_schema = 'pg_catalog'; - - xmlforest -------------------------------------&zwsp;----------------------------------- - <table_name>pg_authid</table_name>&zwsp;<column_name>rolname</column_name> - <table_name>pg_authid</table_name>&zwsp;<column_name>rolsuper</column_name> - ... - - - As seen in the second example, the element name can be omitted if - the content value is a column reference, in which case the column - name is used by default. Otherwise, a name must be specified. - - - - Element names that are not valid XML names are escaped as shown - for xmlelement above. Similarly, content - data is escaped to make valid XML content, unless it is already - of type xml. - - - - Note that XML forests are not valid XML documents if they consist - of more than one element, so it might be useful to wrap - xmlforest expressions in - xmlelement. - - - - - <literal>xmlpi</literal> - - - xmlpi - - - -xmlpi ( NAME name , content ) xml - - - - The xmlpi expression creates an XML - processing instruction. - As for xmlelement, - the name must be a simple identifier, while - the content expression can have any data type. - The content, if present, must not contain the - character sequence ?>. - - - - Example: - -]]> - - - - - <literal>xmlroot</literal> - - - xmlroot - - - -xmlroot ( xml, VERSION {text|NO VALUE} , STANDALONE {YES|NO|NO VALUE} ) xml - - - - The xmlroot expression alters the properties - of the root node of an XML value. If a version is specified, - it replaces the value in the root node's version declaration; if a - standalone setting is specified, it replaces the value in the - root node's standalone declaration. - - - -abc'), - version '1.0', standalone yes); - - xmlroot ----------------------------------------- - - abc -]]> - - - - - <literal>xmlagg</literal> - - - xmlagg - - - -xmlagg ( xml ) xml - - - - The function xmlagg is, unlike the other - functions described here, an aggregate function. It concatenates the - input values to the aggregate function call, - much like xmlconcat does, except that concatenation - occurs across rows rather than across expressions in a single row. - See for additional information - about aggregate functions. - - - - Example: -abc'); -INSERT INTO test VALUES (2, ''); -SELECT xmlagg(x) FROM test; - xmlagg ----------------------- - abc -]]> - - - - To determine the order of the concatenation, an ORDER BY - clause may be added to the aggregate call as described in - . For example: - -abc -]]> - - - - The following non-standard approach used to be recommended - in previous versions, and may still be useful in specific - cases: - -abc -]]> - - - - - - XML Predicates - - - The expressions described in this section check properties - of xml values. - - - - <literal>IS DOCUMENT</literal> - - - IS DOCUMENT - - - -xml IS DOCUMENT boolean - - - - The expression IS DOCUMENT returns true if the - argument XML value is a proper XML document, false if it is not - (that is, it is a content fragment), or null if the argument is - null. See about the difference - between documents and content fragments. - - - - - <literal>IS NOT DOCUMENT</literal> - - - IS NOT DOCUMENT - - - -xml IS NOT DOCUMENT boolean - - - - The expression IS NOT DOCUMENT returns false if the - argument XML value is a proper XML document, true if it is not (that is, - it is a content fragment), or null if the argument is null. - - - - - <literal>XMLEXISTS</literal> - - - XMLEXISTS - - - -XMLEXISTS ( text PASSING BY {REF|VALUE} xml BY {REF|VALUE} ) boolean - - - - The function xmlexists evaluates an XPath 1.0 - expression (the first argument), with the passed XML value as its context - item. The function returns false if the result of that evaluation - yields an empty node-set, true if it yields any other value. The - function returns null if any argument is null. A nonnull value - passed as the context item must be an XML document, not a content - fragment or any non-XML value. - - - - Example: - TorontoOttawa'); - - xmlexists ------------- - t -(1 row) -]]> - - - - The BY REF and BY VALUE clauses - are accepted in PostgreSQL, but are ignored, - as discussed in . - - - - In the SQL standard, the xmlexists function - evaluates an expression in the XML Query language, - but PostgreSQL allows only an XPath 1.0 - expression, as discussed in - . - - - - - <literal>xml_is_well_formed</literal> - - - xml_is_well_formed - - - - xml_is_well_formed_document - - - - xml_is_well_formed_content - - - -xml_is_well_formed ( text ) boolean -xml_is_well_formed_document ( text ) boolean -xml_is_well_formed_content ( text ) boolean - - - - These functions check whether a text string represents - well-formed XML, returning a Boolean result. - xml_is_well_formed_document checks for a well-formed - document, while xml_is_well_formed_content checks - for well-formed content. xml_is_well_formed does - the former if the configuration - parameter is set to DOCUMENT, or the latter if it is set to - CONTENT. This means that - xml_is_well_formed is useful for seeing whether - a simple cast to type xml will succeed, whereas the other two - functions are useful for seeing whether the corresponding variants of - XMLPARSE will succeed. - - - - Examples: - -'); - xml_is_well_formed --------------------- - f -(1 row) - -SELECT xml_is_well_formed(''); - xml_is_well_formed --------------------- - t -(1 row) - -SET xmloption TO CONTENT; -SELECT xml_is_well_formed('abc'); - xml_is_well_formed --------------------- - t -(1 row) - -SELECT xml_is_well_formed_document('bar'); - xml_is_well_formed_document ------------------------------ - t -(1 row) - -SELECT xml_is_well_formed_document('bar'); - xml_is_well_formed_document ------------------------------ - f -(1 row) -]]> - - The last example shows that the checks include whether - namespaces are correctly matched. - - - - - - Processing XML - - - To process values of data type xml, PostgreSQL offers - the functions xpath and - xpath_exists, which evaluate XPath 1.0 - expressions, and the XMLTABLE - table function. - - - - <literal>xpath</literal> - - - XPath - - - -xpath ( xpath text, xml xml , nsarray text[] ) xml[] - - - - The function xpath evaluates the XPath 1.0 - expression xpath (given as text) - against the XML value - xml. It returns an array of XML values - corresponding to the node-set produced by the XPath expression. - If the XPath expression returns a scalar value rather than a node-set, - a single-element array is returned. - - - - The second argument must be a well formed XML document. In particular, - it must have a single root node element. - - - - The optional third argument of the function is an array of namespace - mappings. This array should be a two-dimensional text array with - the length of the second axis being equal to 2 (i.e., it should be an - array of arrays, each of which consists of exactly 2 elements). - The first element of each array entry is the namespace name (alias), the - second the namespace URI. It is not required that aliases provided in - this array be the same as those being used in the XML document itself (in - other words, both in the XML document and in the xpath - function context, aliases are local). - - - - Example: -test', - ARRAY[ARRAY['my', 'http://example.com']]); - - xpath --------- - {test} -(1 row) -]]> - - - - To deal with default (anonymous) namespaces, do something like this: -test', - ARRAY[ARRAY['mydefns', 'http://example.com']]); - - xpath --------- - {test} -(1 row) -]]> - - - - - <literal>xpath_exists</literal> - - - xpath_exists - - - -xpath_exists ( xpath text, xml xml , nsarray text[] ) boolean - - - - The function xpath_exists is a specialized form - of the xpath function. Instead of returning the - individual XML values that satisfy the XPath 1.0 expression, this function - returns a Boolean indicating whether the query was satisfied or not - (specifically, whether it produced any value other than an empty node-set). - This function is equivalent to the XMLEXISTS predicate, - except that it also offers support for a namespace mapping argument. - - - - Example: -test', - ARRAY[ARRAY['my', 'http://example.com']]); - - xpath_exists --------------- - t -(1 row) -]]> - - - - - <literal>xmltable</literal> - - - xmltable - - - - table function - XMLTABLE - - - -XMLTABLE ( - XMLNAMESPACES ( namespace_uri AS namespace_name , ... ), - row_expression PASSING BY {REF|VALUE} document_expression BY {REF|VALUE} - COLUMNS name { type PATH column_expression DEFAULT default_expression NOT NULL | NULL - | FOR ORDINALITY } - , ... -) setof record - - - - The xmltable expression produces a table based - on an XML value, an XPath filter to extract rows, and a - set of column definitions. - Although it syntactically resembles a function, it can only appear - as a table in a query's FROM clause. - - - - The optional XMLNAMESPACES clause gives a - comma-separated list of namespace definitions, where - each namespace_uri is a text - expression and each namespace_name is a simple - identifier. It specifies the XML namespaces used in the document and - their aliases. A default namespace specification is not currently - supported. - - - - The required row_expression argument is an - XPath 1.0 expression (given as text) that is evaluated, - passing the XML value document_expression as - its context item, to obtain a set of XML nodes. These nodes are what - xmltable transforms into output rows. No rows - will be produced if the document_expression - is null, nor if the row_expression produces - an empty node-set or any value other than a node-set. - - - - document_expression provides the context - item for the row_expression. It must be a - well-formed XML document; fragments/forests are not accepted. - The BY REF and BY VALUE clauses - are accepted but ignored, as discussed in - . - - - - In the SQL standard, the xmltable function - evaluates expressions in the XML Query language, - but PostgreSQL allows only XPath 1.0 - expressions, as discussed in - . - - - - The required COLUMNS clause specifies the - column(s) that will be produced in the output table. - See the syntax summary above for the format. - A name is required for each column, as is a data type - (unless FOR ORDINALITY is specified, in which case - type integer is implicit). The path, default and - nullability clauses are optional. - - - - A column marked FOR ORDINALITY will be populated - with row numbers, starting with 1, in the order of nodes retrieved from - the row_expression's result node-set. - At most one column may be marked FOR ORDINALITY. - - - - - XPath 1.0 does not specify an order for nodes in a node-set, so code - that relies on a particular order of the results will be - implementation-dependent. Details can be found in - . - - - - - The column_expression for a column is an - XPath 1.0 expression that is evaluated for each row, with the current - node from the row_expression result as its - context item, to find the value of the column. If - no column_expression is given, then the - column name is used as an implicit path. - - - - If a column's XPath expression returns a non-XML value (which is limited - to string, boolean, or double in XPath 1.0) and the column has a - PostgreSQL type other than xml, the column will be set - as if by assigning the value's string representation to the PostgreSQL - type. (If the value is a boolean, its string representation is taken - to be 1 or 0 if the output - column's type category is numeric, otherwise true or - false.) - - - - If a column's XPath expression returns a non-empty set of XML nodes - and the column's PostgreSQL type is xml, the column will - be assigned the expression result exactly, if it is of document or - content form. - - - A result containing more than one element node at the top level, or - non-whitespace text outside of an element, is an example of content form. - An XPath result can be of neither form, for example if it returns an - attribute node selected from the element that contains it. Such a result - will be put into content form with each such disallowed node replaced by - its string value, as defined for the XPath 1.0 - string function. - - - - - - A non-XML result assigned to an xml output column produces - content, a single text node with the string value of the result. - An XML result assigned to a column of any other type may not have more than - one node, or an error is raised. If there is exactly one node, the column - will be set as if by assigning the node's string - value (as defined for the XPath 1.0 string function) - to the PostgreSQL type. - - - - The string value of an XML element is the concatenation, in document order, - of all text nodes contained in that element and its descendants. The string - value of an element with no descendant text nodes is an - empty string (not NULL). - Any xsi:nil attributes are ignored. - Note that the whitespace-only text() node between two non-text - elements is preserved, and that leading whitespace on a text() - node is not flattened. - The XPath 1.0 string function may be consulted for the - rules defining the string value of other XML node types and non-XML values. - - - - The conversion rules presented here are not exactly those of the SQL - standard, as discussed in . - - - - If the path expression returns an empty node-set - (typically, when it does not match) - for a given row, the column will be set to NULL, unless - a default_expression is specified; then the - value resulting from evaluating that expression is used. - - - - A default_expression, rather than being - evaluated immediately when xmltable is called, - is evaluated each time a default is needed for the column. - If the expression qualifies as stable or immutable, the repeat - evaluation may be skipped. - This means that you can usefully use volatile functions like - nextval in - default_expression. - - - - Columns may be marked NOT NULL. If the - column_expression for a NOT - NULL column does not match anything and there is - no DEFAULT or - the default_expression also evaluates to null, - an error is reported. - - - - Examples: - - - AU - Australia - - - JP - Japan - Shinzo Abe - 145935 - - - SG - Singapore - 697 - - -$$ AS data; - -SELECT xmltable.* - FROM xmldata, - XMLTABLE('//ROWS/ROW' - PASSING data - COLUMNS id int PATH '@id', - ordinality FOR ORDINALITY, - "COUNTRY_NAME" text, - country_id text PATH 'COUNTRY_ID', - size_sq_km float PATH 'SIZE[@unit = "sq_km"]', - size_other text PATH - 'concat(SIZE[@unit!="sq_km"], " ", SIZE[@unit!="sq_km"]/@unit)', - premier_name text PATH 'PREMIER_NAME' DEFAULT 'not specified'); - - id | ordinality | COUNTRY_NAME | country_id | size_sq_km | size_other | premier_name -----+------------+--------------+------------+------------+--------------+--------------- - 1 | 1 | Australia | AU | | | not specified - 5 | 2 | Japan | JP | | 145935 sq_mi | Shinzo Abe - 6 | 3 | Singapore | SG | 697 | | not specified -]]> - - The following example shows concatenation of multiple text() nodes, - usage of the column name as XPath filter, and the treatment of whitespace, - XML comments and processing instructions: - - - Hello2a2 bbbxxxCC - -$$ AS data; - -SELECT xmltable.* - FROM xmlelements, XMLTABLE('/root' PASSING data COLUMNS element text); - element -------------------------- - Hello2a2 bbbxxxCC -]]> - - - - The following example illustrates how - the XMLNAMESPACES clause can be used to specify - a list of namespaces - used in the XML document as well as in the XPath expressions: - - - - - -'::xml) -) -SELECT xmltable.* - FROM XMLTABLE(XMLNAMESPACES('http://example.com/myns' AS x, - 'http://example.com/b' AS "B"), - '/x:example/x:item' - PASSING (SELECT data FROM xmldata) - COLUMNS foo int PATH '@foo', - bar int PATH '@B:bar'); - foo | bar ------+----- - 1 | 2 - 3 | 4 - 4 | 5 -(3 rows) -]]> - - - - - - Mapping Tables to XML - - - XML export - - - - The following functions map the contents of relational tables to - XML values. They can be thought of as XML export functionality: - -table_to_xml ( table regclass, nulls boolean, - tableforest boolean, targetns text ) xml -query_to_xml ( query text, nulls boolean, - tableforest boolean, targetns text ) xml -cursor_to_xml ( cursor refcursor, count integer, nulls boolean, - tableforest boolean, targetns text ) xml - - - - - table_to_xml maps the content of the named - table, passed as parameter table. The - regclass type accepts strings identifying tables using the - usual notation, including optional schema qualification and - double quotes (see for details). - query_to_xml executes the - query whose text is passed as parameter - query and maps the result set. - cursor_to_xml fetches the indicated number of - rows from the cursor specified by the parameter - cursor. This variant is recommended if - large tables have to be mapped, because the result value is built - up in memory by each function. - - - - If tableforest is false, then the resulting - XML document looks like this: - - - data - data - - - - ... - - - ... - -]]> - - If tableforest is true, the result is an - XML content fragment that looks like this: - - data - data - - - - ... - - -... -]]> - - If no table name is available, that is, when mapping a query or a - cursor, the string table is used in the first - format, row in the second format. - - - - The choice between these formats is up to the user. The first - format is a proper XML document, which will be important in many - applications. The second format tends to be more useful in the - cursor_to_xml function if the result values are to be - reassembled into one document later on. The functions for - producing XML content discussed above, in particular - xmlelement, can be used to alter the results - to taste. - - - - The data values are mapped in the same way as described for the - function xmlelement above. - - - - The parameter nulls determines whether null - values should be included in the output. If true, null values in - columns are represented as: - -]]> - where xsi is the XML namespace prefix for XML - Schema Instance. An appropriate namespace declaration will be - added to the result value. If false, columns containing null - values are simply omitted from the output. - - - - The parameter targetns specifies the - desired XML namespace of the result. If no particular namespace - is wanted, an empty string should be passed. - - - - The following functions return XML Schema documents describing the - mappings performed by the corresponding functions above: - -table_to_xmlschema ( table regclass, nulls boolean, - tableforest boolean, targetns text ) xml -query_to_xmlschema ( query text, nulls boolean, - tableforest boolean, targetns text ) xml -cursor_to_xmlschema ( cursor refcursor, nulls boolean, - tableforest boolean, targetns text ) xml - - It is essential that the same parameters are passed in order to - obtain matching XML data mappings and XML Schema documents. - - - - The following functions produce XML data mappings and the - corresponding XML Schema in one document (or forest), linked - together. They can be useful where self-contained and - self-describing results are wanted: - -table_to_xml_and_xmlschema ( table regclass, nulls boolean, - tableforest boolean, targetns text ) xml -query_to_xml_and_xmlschema ( query text, nulls boolean, - tableforest boolean, targetns text ) xml - - - - - In addition, the following functions are available to produce - analogous mappings of entire schemas or the entire current - database: - -schema_to_xml ( schema name, nulls boolean, - tableforest boolean, targetns text ) xml -schema_to_xmlschema ( schema name, nulls boolean, - tableforest boolean, targetns text ) xml -schema_to_xml_and_xmlschema ( schema name, nulls boolean, - tableforest boolean, targetns text ) xml - -database_to_xml ( nulls boolean, - tableforest boolean, targetns text ) xml -database_to_xmlschema ( nulls boolean, - tableforest boolean, targetns text ) xml -database_to_xml_and_xmlschema ( nulls boolean, - tableforest boolean, targetns text ) xml - - - These functions ignore tables that are not readable by the current user. - The database-wide functions additionally ignore schemas that the current - user does not have USAGE (lookup) privilege for. - - - - Note that these potentially produce a lot of data, which needs to - be built up in memory. When requesting content mappings of large - schemas or databases, it might be worthwhile to consider mapping the - tables separately instead, possibly even through a cursor. - - - - The result of a schema content mapping looks like this: - - - -table1-mapping - -table2-mapping - -... - -]]> - - where the format of a table mapping depends on the - tableforest parameter as explained above. - - - - The result of a database content mapping looks like this: - - - - - ... - - - - ... - - -... - -]]> - - where the schema mapping is as above. - - - - As an example of using the output produced by these functions, - shows an XSLT stylesheet that - converts the output of - table_to_xml_and_xmlschema to an HTML - document containing a tabular rendition of the table data. In a - similar manner, the results from these functions can be - converted into other XML-based formats. - - - - XSLT Stylesheet for Converting SQL/XML Output to HTML - - - - - - - - - - - - - <xsl:value-of select="name(current())"/> - - - - - - - - - - - - - - - - -
- - -
- -
-]]>
-
-
-
- - - JSON Functions and Operators - - - JSON - functions and operators - - - SQL/JSON - functions and expressions - - - - This section describes: - - - - - functions and operators for processing and creating JSON data - - - - - the SQL/JSON path language - - - - - the SQL/JSON query functions - - - - - - - To provide native support for JSON data types within the SQL environment, - PostgreSQL implements the - SQL/JSON data model. - This model comprises sequences of items. Each item can hold SQL scalar - values, with an additional SQL/JSON null value, and composite data structures - that use JSON arrays and objects. The model is a formalization of the implied - data model in the JSON specification - RFC 7159. - - - - SQL/JSON allows you to handle JSON data alongside regular SQL data, - with transaction support, including: - - - - - Uploading JSON data into the database and storing it in - regular SQL columns as character or binary strings. - - - - - Generating JSON objects and arrays from relational data. - - - - - Querying JSON data using SQL/JSON query functions and - SQL/JSON path language expressions. - - - - - - - To learn more about the SQL/JSON standard, see - . For details on JSON types - supported in PostgreSQL, - see . - - - - Processing and Creating JSON Data - - - shows the operators that - are available for use with JSON data types (see ). - In addition, the usual comparison operators shown in are available for - jsonb, though not for json. The comparison - operators follow the ordering rules for B-tree operations outlined in - . - See also for the aggregate - function json_agg which aggregates record - values as JSON, the aggregate function - json_object_agg which aggregates pairs of values - into a JSON object, and their jsonb equivalents, - jsonb_agg and jsonb_object_agg. - - - - <type>json</type> and <type>jsonb</type> Operators - - - - - Operator - - - Description - - - Example(s) - - - - - - - - json -> integer - json - - - jsonb -> integer - jsonb - - - Extracts n'th element of JSON array - (array elements are indexed from zero, but negative integers count - from the end). - - - '[{"a":"foo"},{"b":"bar"},{"c":"baz"}]'::json -> 2 - {"c":"baz"} - - - '[{"a":"foo"},{"b":"bar"},{"c":"baz"}]'::json -> -3 - {"a":"foo"} - - - - - - json -> text - json - - - jsonb -> text - jsonb - - - Extracts JSON object field with the given key. - - - '{"a": {"b":"foo"}}'::json -> 'a' - {"b":"foo"} - - - - - - json ->> integer - text - - - jsonb ->> integer - text - - - Extracts n'th element of JSON array, - as text. - - - '[1,2,3]'::json ->> 2 - 3 - - - - - - json ->> text - text - - - jsonb ->> text - text - - - Extracts JSON object field with the given key, as text. - - - '{"a":1,"b":2}'::json ->> 'b' - 2 - - - - - - json #> text[] - json - - - jsonb #> text[] - jsonb - - - Extracts JSON sub-object at the specified path, where path elements - can be either field keys or array indexes. - - - '{"a": {"b": ["foo","bar"]}}'::json #> '{a,b,1}' - "bar" - - - - - - json #>> text[] - text - - - jsonb #>> text[] - text - - - Extracts JSON sub-object at the specified path as text. - - - '{"a": {"b": ["foo","bar"]}}'::json #>> '{a,b,1}' - bar - - - - -
- - - - The field/element/path extraction operators return NULL, rather than - failing, if the JSON input does not have the right structure to match - the request; for example if no such key or array element exists. - - - - - Some further operators exist only for jsonb, as shown - in . - - describes how these operators can be used to effectively search indexed - jsonb data. - - - - Additional <type>jsonb</type> Operators - - - - - Operator - - - Description - - - Example(s) - - - - - - - - jsonb @> jsonb - boolean - - - Does the first JSON value contain the second? - (See for details about containment.) - - - '{"a":1, "b":2}'::jsonb @> '{"b":2}'::jsonb - t - - - - - - jsonb <@ jsonb - boolean - - - Is the first JSON value contained in the second? - - - '{"b":2}'::jsonb <@ '{"a":1, "b":2}'::jsonb - t - - - - - - jsonb ? text - boolean - - - Does the text string exist as a top-level key or array element within - the JSON value? - - - '{"a":1, "b":2}'::jsonb ? 'b' - t - - - '["a", "b", "c"]'::jsonb ? 'b' - t - - - - - - jsonb ?| text[] - boolean - - - Do any of the strings in the text array exist as top-level keys or - array elements? - - - '{"a":1, "b":2, "c":3}'::jsonb ?| array['b', 'd'] - t - - - - - - jsonb ?& text[] - boolean - - - Do all of the strings in the text array exist as top-level keys or - array elements? - - - '["a", "b", "c"]'::jsonb ?& array['a', 'b'] - t - - - - - - jsonb || jsonb - jsonb - - - Concatenates two jsonb values. - Concatenating two arrays generates an array containing all the - elements of each input. Concatenating two objects generates an - object containing the union of their - keys, taking the second object's value when there are duplicate keys. - All other cases are treated by converting a non-array input into a - single-element array, and then proceeding as for two arrays. - Does not operate recursively: only the top-level array or object - structure is merged. - - - '["a", "b"]'::jsonb || '["a", "d"]'::jsonb - ["a", "b", "a", "d"] - - - '{"a": "b"}'::jsonb || '{"c": "d"}'::jsonb - {"a": "b", "c": "d"} - - - '[1, 2]'::jsonb || '3'::jsonb - [1, 2, 3] - - - '{"a": "b"}'::jsonb || '42'::jsonb - [{"a": "b"}, 42] - - - To append an array to another array as a single entry, wrap it - in an additional layer of array, for example: - - - '[1, 2]'::jsonb || jsonb_build_array('[3, 4]'::jsonb) - [1, 2, [3, 4]] - - - - - - jsonb - text - jsonb - - - Deletes a key (and its value) from a JSON object, or matching string - value(s) from a JSON array. - - - '{"a": "b", "c": "d"}'::jsonb - 'a' - {"c": "d"} - - - '["a", "b", "c", "b"]'::jsonb - 'b' - ["a", "c"] - - - - - - jsonb - text[] - jsonb - - - Deletes all matching keys or array elements from the left operand. - - - '{"a": "b", "c": "d"}'::jsonb - '{a,c}'::text[] - {} - - - - - - jsonb - integer - jsonb - - - Deletes the array element with specified index (negative - integers count from the end). Throws an error if JSON value - is not an array. - - - '["a", "b"]'::jsonb - 1 - ["a"] - - - - - - jsonb #- text[] - jsonb - - - Deletes the field or array element at the specified path, where path - elements can be either field keys or array indexes. - - - '["a", {"b":1}]'::jsonb #- '{1,b}' - ["a", {}] - - - - - - jsonb @? jsonpath - boolean - - - Does JSON path return any item for the specified JSON value? - (This is useful only with SQL-standard JSON path expressions, not - predicate check - expressions, since those always return a value.) - - - '{"a":[1,2,3,4,5]}'::jsonb @? '$.a[*] ? (@ > 2)' - t - - - - - - jsonb @@ jsonpath - boolean - - - Returns the result of a JSON path predicate check for the - specified JSON value. - (This is useful only - with predicate - check expressions, not SQL-standard JSON path expressions, - since it will return NULL if the path result is - not a single boolean value.) - - - '{"a":[1,2,3,4,5]}'::jsonb @@ '$.a[*] > 2' - t - - - - -
- - - - The jsonpath operators @? - and @@ suppress the following errors: missing object - field or array element, unexpected JSON item type, datetime and numeric - errors. The jsonpath-related functions described below can - also be told to suppress these types of errors. This behavior might be - helpful when searching JSON document collections of varying structure. - - - - - shows the functions that are - available for constructing json and jsonb values. - Some functions in this table have a RETURNING clause, - which specifies the data type returned. It must be one of json, - jsonb, bytea, a character string type (text, - char, or varchar), or a type - that can be cast to json. - By default, the json type is returned. - - - - JSON Creation Functions - - - - - Function - - - Description - - - Example(s) - - - - - - - - - to_json - - to_json ( anyelement ) - json - - - - to_jsonb - - to_jsonb ( anyelement ) - jsonb - - - Converts any SQL value to json or jsonb. - Arrays and composites are converted recursively to arrays and - objects (multidimensional arrays become arrays of arrays in JSON). - Otherwise, if there is a cast from the SQL data type - to json, the cast function will be used to perform the - conversion; - - For example, the extension has a cast - from hstore to json, so that - hstore values converted via the JSON creation functions - will be represented as JSON objects, not as primitive string values. - - - otherwise, a scalar JSON value is produced. For any scalar other than - a number, a Boolean, or a null value, the text representation will be - used, with escaping as necessary to make it a valid JSON string value. - - - to_json('Fred said "Hi."'::text) - "Fred said \"Hi.\"" - - - to_jsonb(row(42, 'Fred said "Hi."'::text)) - {"f1": 42, "f2": "Fred said \"Hi.\""} - - - - - - - array_to_json - - array_to_json ( anyarray , boolean ) - json - - - Converts an SQL array to a JSON array. The behavior is the same - as to_json except that line feeds will be added - between top-level array elements if the optional boolean parameter is - true. - - - array_to_json('{{1,5},{99,100}}'::int[]) - [[1,5],[99,100]] - - - - - - - json_array - json_array ( - { value_expression FORMAT JSON } , ... - { NULL | ABSENT } ON NULL - RETURNING data_type FORMAT JSON ENCODING UTF8 ) - - - json_array ( - query_expression - RETURNING data_type FORMAT JSON ENCODING UTF8 ) - - - Constructs a JSON array from either a series of - value_expression parameters or from the results - of query_expression, - which must be a SELECT query returning a single column. If - ABSENT ON NULL is specified, NULL values are ignored. - This is always the case if a - query_expression is used. - - - json_array(1,true,json '{"a":null}') - [1, true, {"a":null}] - - - json_array(SELECT * FROM (VALUES(1),(2)) t) - [1, 2] - - - - - - - row_to_json - - row_to_json ( record , boolean ) - json - - - Converts an SQL composite value to a JSON object. The behavior is the - same as to_json except that line feeds will be - added between top-level elements if the optional boolean parameter is - true. - - - row_to_json(row(1,'foo')) - {"f1":1,"f2":"foo"} - - - - - - - json_build_array - - json_build_array ( VARIADIC "any" ) - json - - - - jsonb_build_array - - jsonb_build_array ( VARIADIC "any" ) - jsonb - - - Builds a possibly-heterogeneously-typed JSON array out of a variadic - argument list. Each argument is converted as - per to_json or to_jsonb. - - - json_build_array(1, 2, 'foo', 4, 5) - [1, 2, "foo", 4, 5] - - - - - - - json_build_object - - json_build_object ( VARIADIC "any" ) - json - - - - jsonb_build_object - - jsonb_build_object ( VARIADIC "any" ) - jsonb - - - Builds a JSON object out of a variadic argument list. By convention, - the argument list consists of alternating keys and values. Key - arguments are coerced to text; value arguments are converted as - per to_json or to_jsonb. - - - json_build_object('foo', 1, 2, row(3,'bar')) - {"foo" : 1, "2" : {"f1":3,"f2":"bar"}} - - - - - - json_object - json_object ( - { key_expression { VALUE | ':' } - value_expression FORMAT JSON ENCODING UTF8 }, ... - { NULL | ABSENT } ON NULL - { WITH | WITHOUT } UNIQUE KEYS - RETURNING data_type FORMAT JSON ENCODING UTF8 ) - - - Constructs a JSON object of all the key/value pairs given, - or an empty object if none are given. - key_expression is a scalar expression - defining the JSON key, which is - converted to the text type. - It cannot be NULL nor can it - belong to a type that has a cast to the json type. - If WITH UNIQUE KEYS is specified, there must not - be any duplicate key_expression. - Any pair for which the value_expression - evaluates to NULL is omitted from the output - if ABSENT ON NULL is specified; - if NULL ON NULL is specified or the clause - omitted, the key is included with value NULL. - - - json_object('code' VALUE 'P123', 'title': 'Jaws') - {"code" : "P123", "title" : "Jaws"} - - - - - - - json_object - - json_object ( text[] ) - json - - - - jsonb_object - - jsonb_object ( text[] ) - jsonb - - - Builds a JSON object out of a text array. The array must have either - exactly one dimension with an even number of members, in which case - they are taken as alternating key/value pairs, or two dimensions - such that each inner array has exactly two elements, which - are taken as a key/value pair. All values are converted to JSON - strings. - - - json_object('{a, 1, b, "def", c, 3.5}') - {"a" : "1", "b" : "def", "c" : "3.5"} - - json_object('{{a, 1}, {b, "def"}, {c, 3.5}}') - {"a" : "1", "b" : "def", "c" : "3.5"} - - - - - - json_object ( keys text[], values text[] ) - json - - - jsonb_object ( keys text[], values text[] ) - jsonb - - - This form of json_object takes keys and values - pairwise from separate text arrays. Otherwise it is identical to - the one-argument form. - - - json_object('{a,b}', '{1,2}') - {"a": "1", "b": "2"} - - - - - - json constructor - json ( - expression - FORMAT JSON ENCODING UTF8 - { WITH | WITHOUT } UNIQUE KEYS ) - json - - - Converts a given expression specified as text or - bytea string (in UTF8 encoding) into a JSON - value. If expression is NULL, an - SQL null value is returned. - If WITH UNIQUE is specified, the - expression must not contain any duplicate - object keys. - - - json('{"a":123, "b":[true,"foo"], "a":"bar"}') - {"a":123, "b":[true,"foo"], "a":"bar"} - - - - - - - json_scalar - json_scalar ( expression ) - - - Converts a given SQL scalar value into a JSON scalar value. - If the input is NULL, an SQL null is returned. If - the input is number or a boolean value, a corresponding JSON number - or boolean value is returned. For any other value, a JSON string is - returned. - - - json_scalar(123.45) - 123.45 - - - json_scalar(CURRENT_TIMESTAMP) - "2022-05-10T10:51:04.62128-04:00" - - - - - - json_serialize ( - expression FORMAT JSON ENCODING UTF8 - RETURNING data_type FORMAT JSON ENCODING UTF8 ) - - - Converts an SQL/JSON expression into a character or binary string. The - expression can be of any JSON type, any - character string type, or bytea in UTF8 encoding. - The returned type used in RETURNING can be any - character string type or bytea. The default is - text. - - - json_serialize('{ "a" : 1 } ' RETURNING bytea) - \x7b20226122203a2031207d20 - - - - -
- - - details SQL/JSON - facilities for testing JSON. - - - - SQL/JSON Testing Functions - - - - - Function signature - - - Description - - - Example(s) - - - - - - - IS JSON - expression IS NOT JSON - { VALUE | SCALAR | ARRAY | OBJECT } - { WITH | WITHOUT } UNIQUE KEYS - - - This predicate tests whether expression can be - parsed as JSON, possibly of a specified type. - If SCALAR or ARRAY or - OBJECT is specified, the - test is whether or not the JSON is of that particular type. If - WITH UNIQUE KEYS is specified, then any object in the - expression is also tested to see if it - has duplicate keys. - - - -SELECT js, - js IS JSON "json?", - js IS JSON SCALAR "scalar?", - js IS JSON OBJECT "object?", - js IS JSON ARRAY "array?" -FROM (VALUES - ('123'), ('"abc"'), ('{"a": "b"}'), ('[1,2]'),('abc')) foo(js); - js | json? | scalar? | object? | array? -------------+-------+---------+---------+-------- - 123 | t | t | f | f - "abc" | t | t | f | f - {"a": "b"} | t | f | t | f - [1,2] | t | f | f | t - abc | f | f | f | f - - - - -SELECT js, - js IS JSON OBJECT "object?", - js IS JSON ARRAY "array?", - js IS JSON ARRAY WITH UNIQUE KEYS "array w. UK?", - js IS JSON ARRAY WITHOUT UNIQUE KEYS "array w/o UK?" -FROM (VALUES ('[{"a":"1"}, - {"b":"2","b":"3"}]')) foo(js); --[ RECORD 1 ]-+-------------------- -js | [{"a":"1"}, + - | {"b":"2","b":"3"}] -object? | f -array? | t -array w. UK? | f -array w/o UK? | t - - - - - -
- - - shows the functions that - are available for processing json and jsonb values. - - - - JSON Processing Functions - - - - - Function - - - Description - - - Example(s) - - - - - - - - - json_array_elements - - json_array_elements ( json ) - setof json - - - - jsonb_array_elements - - jsonb_array_elements ( jsonb ) - setof jsonb - - - Expands the top-level JSON array into a set of JSON values. - - - select * from json_array_elements('[1,true, [2,false]]') - - - value ------------ - 1 - true - [2,false] - - - - - - - - json_array_elements_text - - json_array_elements_text ( json ) - setof text - - - - jsonb_array_elements_text - - jsonb_array_elements_text ( jsonb ) - setof text - - - Expands the top-level JSON array into a set of text values. - - - select * from json_array_elements_text('["foo", "bar"]') - - - value ------------ - foo - bar - - - - - - - - json_array_length - - json_array_length ( json ) - integer - - - - jsonb_array_length - - jsonb_array_length ( jsonb ) - integer - - - Returns the number of elements in the top-level JSON array. - - - json_array_length('[1,2,3,{"f1":1,"f2":[5,6]},4]') - 5 - - - jsonb_array_length('[]') - 0 - - - - - - - json_each - - json_each ( json ) - setof record - ( key text, - value json ) - - - - jsonb_each - - jsonb_each ( jsonb ) - setof record - ( key text, - value jsonb ) - - - Expands the top-level JSON object into a set of key/value pairs. - - - select * from json_each('{"a":"foo", "b":"bar"}') - - - key | value ------+------- - a | "foo" - b | "bar" - - - - - - - - json_each_text - - json_each_text ( json ) - setof record - ( key text, - value text ) - - - - jsonb_each_text - - jsonb_each_text ( jsonb ) - setof record - ( key text, - value text ) - - - Expands the top-level JSON object into a set of key/value pairs. - The returned values will be of - type text. - - - select * from json_each_text('{"a":"foo", "b":"bar"}') - - - key | value ------+------- - a | foo - b | bar - - - - - - - - json_extract_path - - json_extract_path ( from_json json, VARIADIC path_elems text[] ) - json - - - - jsonb_extract_path - - jsonb_extract_path ( from_json jsonb, VARIADIC path_elems text[] ) - jsonb - - - Extracts JSON sub-object at the specified path. - (This is functionally equivalent to the #> - operator, but writing the path out as a variadic list can be more - convenient in some cases.) - - - json_extract_path('{"f2":{"f3":1},"f4":{"f5":99,"f6":"foo"}}', 'f4', 'f6') - "foo" - - - - - - - json_extract_path_text - - json_extract_path_text ( from_json json, VARIADIC path_elems text[] ) - text - - - - jsonb_extract_path_text - - jsonb_extract_path_text ( from_json jsonb, VARIADIC path_elems text[] ) - text - - - Extracts JSON sub-object at the specified path as text. - (This is functionally equivalent to the #>> - operator.) - - - json_extract_path_text('{"f2":{"f3":1},"f4":{"f5":99,"f6":"foo"}}', 'f4', 'f6') - foo - - - - - - - json_object_keys - - json_object_keys ( json ) - setof text - - - - jsonb_object_keys - - jsonb_object_keys ( jsonb ) - setof text - - - Returns the set of keys in the top-level JSON object. - - - select * from json_object_keys('{"f1":"abc","f2":{"f3":"a", "f4":"b"}}') - - - json_object_keys ------------------- - f1 - f2 - - - - - - - - json_populate_record - - json_populate_record ( base anyelement, from_json json ) - anyelement - - - - jsonb_populate_record - - jsonb_populate_record ( base anyelement, from_json jsonb ) - anyelement - - - Expands the top-level JSON object to a row having the composite type - of the base argument. The JSON object - is scanned for fields whose names match column names of the output row - type, and their values are inserted into those columns of the output. - (Fields that do not correspond to any output column name are ignored.) - In typical use, the value of base is just - NULL, which means that any output columns that do - not match any object field will be filled with nulls. However, - if base isn't NULL then - the values it contains will be used for unmatched columns. - - - To convert a JSON value to the SQL type of an output column, the - following rules are applied in sequence: - - - - A JSON null value is converted to an SQL null in all cases. - - - - - If the output column is of type json - or jsonb, the JSON value is just reproduced exactly. - - - - - If the output column is a composite (row) type, and the JSON value - is a JSON object, the fields of the object are converted to columns - of the output row type by recursive application of these rules. - - - - - Likewise, if the output column is an array type and the JSON value - is a JSON array, the elements of the JSON array are converted to - elements of the output array by recursive application of these - rules. - - - - - Otherwise, if the JSON value is a string, the contents of the - string are fed to the input conversion function for the column's - data type. - - - - - Otherwise, the ordinary text representation of the JSON value is - fed to the input conversion function for the column's data type. - - - - - - While the example below uses a constant JSON value, typical use would - be to reference a json or jsonb column - laterally from another table in the query's FROM - clause. Writing json_populate_record in - the FROM clause is good practice, since all of the - extracted columns are available for use without duplicate function - calls. - - - create type subrowtype as (d int, e text); - create type myrowtype as (a int, b text[], c subrowtype); - - - select * from json_populate_record(null::myrowtype, - '{"a": 1, "b": ["2", "a b"], "c": {"d": 4, "e": "a b c"}, "x": "foo"}') - - - a | b | c ----+-----------+------------- - 1 | {2,"a b"} | (4,"a b c") - - - - - - - - jsonb_populate_record_valid - - jsonb_populate_record_valid ( base anyelement, from_json json ) - boolean - - - Function for testing jsonb_populate_record. Returns - true if the input jsonb_populate_record - would finish without an error for the given input JSON object; that is, it's - valid input, false otherwise. - - - create type jsb_char2 as (a char(2)); - - - select jsonb_populate_record_valid(NULL::jsb_char2, '{"a": "aaa"}'); - - - jsonb_populate_record_valid ------------------------------ - f -(1 row) - - - select * from jsonb_populate_record(NULL::jsb_char2, '{"a": "aaa"}') q; - - -ERROR: value too long for type character(2) - - select jsonb_populate_record_valid(NULL::jsb_char2, '{"a": "aa"}'); - - - jsonb_populate_record_valid ------------------------------ - t -(1 row) - - - select * from jsonb_populate_record(NULL::jsb_char2, '{"a": "aa"}') q; - - - a ----- - aa -(1 row) - - - - - - - - json_populate_recordset - - json_populate_recordset ( base anyelement, from_json json ) - setof anyelement - - - - jsonb_populate_recordset - - jsonb_populate_recordset ( base anyelement, from_json jsonb ) - setof anyelement - - - Expands the top-level JSON array of objects to a set of rows having - the composite type of the base argument. - Each element of the JSON array is processed as described above - for json[b]_populate_record. - - - create type twoints as (a int, b int); - - - select * from json_populate_recordset(null::twoints, '[{"a":1,"b":2}, {"a":3,"b":4}]') - - - a | b ----+--- - 1 | 2 - 3 | 4 - - - - - - - - json_to_record - - json_to_record ( json ) - record - - - - jsonb_to_record - - jsonb_to_record ( jsonb ) - record - - - Expands the top-level JSON object to a row having the composite type - defined by an AS clause. (As with all functions - returning record, the calling query must explicitly - define the structure of the record with an AS - clause.) The output record is filled from fields of the JSON object, - in the same way as described above - for json[b]_populate_record. Since there is no - input record value, unmatched columns are always filled with nulls. - - - create type myrowtype as (a int, b text); - - - select * from json_to_record('{"a":1,"b":[1,2,3],"c":[1,2,3],"e":"bar","r": {"a": 123, "b": "a b c"}}') as x(a int, b text, c int[], d text, r myrowtype) - - - a | b | c | d | r ----+---------+---------+---+--------------- - 1 | [1,2,3] | {1,2,3} | | (123,"a b c") - - - - - - - - json_to_recordset - - json_to_recordset ( json ) - setof record - - - - jsonb_to_recordset - - jsonb_to_recordset ( jsonb ) - setof record - - - Expands the top-level JSON array of objects to a set of rows having - the composite type defined by an AS clause. (As - with all functions returning record, the calling query - must explicitly define the structure of the record with - an AS clause.) Each element of the JSON array is - processed as described above - for json[b]_populate_record. - - - select * from json_to_recordset('[{"a":1,"b":"foo"}, {"a":"2","c":"bar"}]') as x(a int, b text) - - - a | b ----+----- - 1 | foo - 2 | - - - - - - - - jsonb_set - - jsonb_set ( target jsonb, path text[], new_value jsonb , create_if_missing boolean ) - jsonb - - - Returns target - with the item designated by path - replaced by new_value, or with - new_value added if - create_if_missing is true (which is the - default) and the item designated by path - does not exist. - All earlier steps in the path must exist, or - the target is returned unchanged. - As with the path oriented operators, negative integers that - appear in the path count from the end - of JSON arrays. - If the last path step is an array index that is out of range, - and create_if_missing is true, the new - value is added at the beginning of the array if the index is negative, - or at the end of the array if it is positive. - - - jsonb_set('[{"f1":1,"f2":null},2,null,3]', '{0,f1}', '[2,3,4]', false) - [{"f1": [2, 3, 4], "f2": null}, 2, null, 3] - - - jsonb_set('[{"f1":1,"f2":null},2]', '{0,f3}', '[2,3,4]') - [{"f1": 1, "f2": null, "f3": [2, 3, 4]}, 2] - - - - - - - jsonb_set_lax - - jsonb_set_lax ( target jsonb, path text[], new_value jsonb , create_if_missing boolean , null_value_treatment text ) - jsonb - - - If new_value is not NULL, - behaves identically to jsonb_set. Otherwise behaves - according to the value - of null_value_treatment which must be one - of 'raise_exception', - 'use_json_null', 'delete_key', or - 'return_target'. The default is - 'use_json_null'. - - - jsonb_set_lax('[{"f1":1,"f2":null},2,null,3]', '{0,f1}', null) - [{"f1": null, "f2": null}, 2, null, 3] - - - jsonb_set_lax('[{"f1":99,"f2":null},2]', '{0,f3}', null, true, 'return_target') - [{"f1": 99, "f2": null}, 2] - - - - - - - jsonb_insert - - jsonb_insert ( target jsonb, path text[], new_value jsonb , insert_after boolean ) - jsonb - - - Returns target - with new_value inserted. If the item - designated by the path is an array - element, new_value will be inserted before - that item if insert_after is false (which - is the default), or after it - if insert_after is true. If the item - designated by the path is an object - field, new_value will be inserted only if - the object does not already contain that key. - All earlier steps in the path must exist, or - the target is returned unchanged. - As with the path oriented operators, negative integers that - appear in the path count from the end - of JSON arrays. - If the last path step is an array index that is out of range, the new - value is added at the beginning of the array if the index is negative, - or at the end of the array if it is positive. - - - jsonb_insert('{"a": [0,1,2]}', '{a, 1}', '"new_value"') - {"a": [0, "new_value", 1, 2]} - - - jsonb_insert('{"a": [0,1,2]}', '{a, 1}', '"new_value"', true) - {"a": [0, 1, "new_value", 2]} - - - - - - - json_strip_nulls - - json_strip_nulls ( target json ,strip_in_arrays boolean ) - json - - - - jsonb_strip_nulls - - jsonb_strip_nulls ( target jsonb ,strip_in_arrays boolean ) - jsonb - - - Deletes all object fields that have null values from the given JSON - value, recursively. - If strip_in_arrays is true (the default is false), - null array elements are also stripped. - Otherwise they are not stripped. Bare null values are never stripped. - - - json_strip_nulls('[{"f1":1, "f2":null}, 2, null, 3]') - [{"f1":1},2,null,3] - - - jsonb_strip_nulls('[1,2,null,3,4]', true); - [1,2,3,4] - - - - - - - - jsonb_path_exists - - jsonb_path_exists ( target jsonb, path jsonpath , vars jsonb , silent boolean ) - boolean - - - Checks whether the JSON path returns any item for the specified JSON - value. - (This is useful only with SQL-standard JSON path expressions, not - predicate check - expressions, since those always return a value.) - If the vars argument is specified, it must - be a JSON object, and its fields provide named values to be - substituted into the jsonpath expression. - If the silent argument is specified and - is true, the function suppresses the same errors - as the @? and @@ operators do. - - - jsonb_path_exists('{"a":[1,2,3,4,5]}', '$.a[*] ? (@ >= $min && @ <= $max)', '{"min":2, "max":4}') - t - - - - - - - jsonb_path_match - - jsonb_path_match ( target jsonb, path jsonpath , vars jsonb , silent boolean ) - boolean - - - Returns the SQL boolean result of a JSON path predicate check - for the specified JSON value. - (This is useful only - with predicate - check expressions, not SQL-standard JSON path expressions, - since it will either fail or return NULL if the - path result is not a single boolean value.) - The optional vars - and silent arguments act the same as - for jsonb_path_exists. - - - jsonb_path_match('{"a":[1,2,3,4,5]}', 'exists($.a[*] ? (@ >= $min && @ <= $max))', '{"min":2, "max":4}') - t - - - - - - - jsonb_path_query - - jsonb_path_query ( target jsonb, path jsonpath , vars jsonb , silent boolean ) - setof jsonb - - - Returns all JSON items returned by the JSON path for the specified - JSON value. - For SQL-standard JSON path expressions it returns the JSON - values selected from target. - For predicate - check expressions it returns the result of the predicate - check: true, false, - or null. - The optional vars - and silent arguments act the same as - for jsonb_path_exists. - - - select * from jsonb_path_query('{"a":[1,2,3,4,5]}', '$.a[*] ? (@ >= $min && @ <= $max)', '{"min":2, "max":4}') - - - jsonb_path_query ------------------- - 2 - 3 - 4 - - - - - - - - jsonb_path_query_array - - jsonb_path_query_array ( target jsonb, path jsonpath , vars jsonb , silent boolean ) - jsonb - - - Returns all JSON items returned by the JSON path for the specified - JSON value, as a JSON array. - The parameters are the same as - for jsonb_path_query. - - - jsonb_path_query_array('{"a":[1,2,3,4,5]}', '$.a[*] ? (@ >= $min && @ <= $max)', '{"min":2, "max":4}') - [2, 3, 4] - - - - - - - jsonb_path_query_first - - jsonb_path_query_first ( target jsonb, path jsonpath , vars jsonb , silent boolean ) - jsonb - - - Returns the first JSON item returned by the JSON path for the - specified JSON value, or NULL if there are no - results. - The parameters are the same as - for jsonb_path_query. - - - jsonb_path_query_first('{"a":[1,2,3,4,5]}', '$.a[*] ? (@ >= $min && @ <= $max)', '{"min":2, "max":4}') - 2 - - - - - - - jsonb_path_exists_tz - - jsonb_path_exists_tz ( target jsonb, path jsonpath , vars jsonb , silent boolean ) - boolean - - - - jsonb_path_match_tz - - jsonb_path_match_tz ( target jsonb, path jsonpath , vars jsonb , silent boolean ) - boolean - - - - jsonb_path_query_tz - - jsonb_path_query_tz ( target jsonb, path jsonpath , vars jsonb , silent boolean ) - setof jsonb - - - - jsonb_path_query_array_tz - - jsonb_path_query_array_tz ( target jsonb, path jsonpath , vars jsonb , silent boolean ) - jsonb - - - - jsonb_path_query_first_tz - - jsonb_path_query_first_tz ( target jsonb, path jsonpath , vars jsonb , silent boolean ) - jsonb - - - These functions act like their counterparts described above without - the _tz suffix, except that these functions support - comparisons of date/time values that require timezone-aware - conversions. The example below requires interpretation of the - date-only value 2015-08-02 as a timestamp with time - zone, so the result depends on the current - setting. Due to this dependency, these - functions are marked as stable, which means these functions cannot be - used in indexes. Their counterparts are immutable, and so can be used - in indexes; but they will throw errors if asked to make such - comparisons. - - - jsonb_path_exists_tz('["2015-08-01 12:00:00-05"]', '$[*] ? (@.datetime() < "2015-08-02".datetime())') - t - - - - - - - jsonb_pretty - - jsonb_pretty ( jsonb ) - text - - - Converts the given JSON value to pretty-printed, indented text. - - - jsonb_pretty('[{"f1":1,"f2":null}, 2]') - - -[ - { - "f1": 1, - "f2": null - }, - 2 -] - - - - - - - - json_typeof - - json_typeof ( json ) - text - - - - jsonb_typeof - - jsonb_typeof ( jsonb ) - text - - - Returns the type of the top-level JSON value as a text string. - Possible types are - object, array, - string, number, - boolean, and null. - (The null result should not be confused - with an SQL NULL; see the examples.) - - - json_typeof('-123.4') - number - - - json_typeof('null'::json) - null - - - json_typeof(NULL::json) IS NULL - t - - - - -
-
- - - The SQL/JSON Path Language - - - SQL/JSON path language - - - - SQL/JSON path expressions specify item(s) to be retrieved - from a JSON value, similarly to XPath expressions used - for access to XML content. In PostgreSQL, - path expressions are implemented as the jsonpath - data type and can use any elements described in - . - - - - JSON query functions and operators - pass the provided path expression to the path engine - for evaluation. If the expression matches the queried JSON data, - the corresponding JSON item, or set of items, is returned. - If there is no match, the result will be NULL, - false, or an error, depending on the function. - Path expressions are written in the SQL/JSON path language - and can include arithmetic expressions and functions. - - - - A path expression consists of a sequence of elements allowed - by the jsonpath data type. - The path expression is normally evaluated from left to right, but - you can use parentheses to change the order of operations. - If the evaluation is successful, a sequence of JSON items is produced, - and the evaluation result is returned to the JSON query function - that completes the specified computation. - - - - To refer to the JSON value being queried (the - context item), use the $ variable - in the path expression. The first element of a path must always - be $. It can be followed by one or more - accessor operators, - which go down the JSON structure level by level to retrieve sub-items - of the context item. Each accessor operator acts on the - result(s) of the previous evaluation step, producing zero, one, or more - output items from each input item. - - - - For example, suppose you have some JSON data from a GPS tracker that you - would like to parse, such as: - -SELECT '{ - "track": { - "segments": [ - { - "location": [ 47.763, 13.4034 ], - "start time": "2018-10-14 10:05:14", - "HR": 73 - }, - { - "location": [ 47.706, 13.2635 ], - "start time": "2018-10-14 10:39:21", - "HR": 135 - } - ] - } -}' AS json \gset - - (The above example can be copied-and-pasted - into psql to set things up for the following - examples. Then psql will - expand :'json' into a suitably-quoted string - constant containing the JSON value.) - - - - To retrieve the available track segments, you need to use the - .key accessor - operator to descend through surrounding JSON objects, for example: - -=> select jsonb_path_query(:'json', '$.track.segments'); - jsonb_path_query ------------------------------------------------------------&zwsp;-----------------------------------------------------------&zwsp;--------------------------------------------- - [{"HR": 73, "location": [47.763, 13.4034], "start time": "2018-10-14 10:05:14"}, {"HR": 135, "location": [47.706, 13.2635], "start time": "2018-10-14 10:39:21"}] - - - - - To retrieve the contents of an array, you typically use the - [*] operator. - The following example will return the location coordinates for all - the available track segments: - -=> select jsonb_path_query(:'json', '$.track.segments[*].location'); - jsonb_path_query -------------------- - [47.763, 13.4034] - [47.706, 13.2635] - - Here we started with the whole JSON input value ($), - then the .track accessor selected the JSON object - associated with the "track" object key, then - the .segments accessor selected the JSON array - associated with the "segments" key within that - object, then the [*] accessor selected each element - of that array (producing a series of items), then - the .location accessor selected the JSON array - associated with the "location" key within each of - those objects. In this example, each of those objects had - a "location" key; but if any of them did not, - the .location accessor would have simply produced no - output for that input item. - - - - To return the coordinates of the first segment only, you can - specify the corresponding subscript in the [] - accessor operator. Recall that JSON array indexes are 0-relative: - -=> select jsonb_path_query(:'json', '$.track.segments[0].location'); - jsonb_path_query -------------------- - [47.763, 13.4034] - - - - - The result of each path evaluation step can be processed - by one or more of the jsonpath operators and methods - listed in . - Each method name must be preceded by a dot. For example, - you can get the size of an array: - -=> select jsonb_path_query(:'json', '$.track.segments.size()'); - jsonb_path_query ------------------- - 2 - - More examples of using jsonpath operators - and methods within path expressions appear below in - . - - - - A path can also contain - filter expressions that work similarly to the - WHERE clause in SQL. A filter expression begins with - a question mark and provides a condition in parentheses: - - -? (condition) - - - - - Filter expressions must be written just after the path evaluation step - to which they should apply. The result of that step is filtered to include - only those items that satisfy the provided condition. SQL/JSON defines - three-valued logic, so the condition can - produce true, false, - or unknown. The unknown value - plays the same role as SQL NULL and can be tested - for with the is unknown predicate. Further path - evaluation steps use only those items for which the filter expression - returned true. - - - - The functions and operators that can be used in filter expressions are - listed in . Within a - filter expression, the @ variable denotes the value - being considered (i.e., one result of the preceding path step). You can - write accessor operators after @ to retrieve component - items. - - - - For example, suppose you would like to retrieve all heart rate values higher - than 130. You can achieve this as follows: - -=> select jsonb_path_query(:'json', '$.track.segments[*].HR ? (@ > 130)'); - jsonb_path_query ------------------- - 135 - - - - - To get the start times of segments with such values, you have to - filter out irrelevant segments before selecting the start times, so the - filter expression is applied to the previous step, and the path used - in the condition is different: - -=> select jsonb_path_query(:'json', '$.track.segments[*] ? (@.HR > 130)."start time"'); - jsonb_path_query ------------------------ - "2018-10-14 10:39:21" - - - - - You can use several filter expressions in sequence, if required. - The following example selects start times of all segments that - contain locations with relevant coordinates and high heart rate values: - -=> select jsonb_path_query(:'json', '$.track.segments[*] ? (@.location[1] < 13.4) ? (@.HR > 130)."start time"'); - jsonb_path_query ------------------------ - "2018-10-14 10:39:21" - - - - - Using filter expressions at different nesting levels is also allowed. - The following example first filters all segments by location, and then - returns high heart rate values for these segments, if available: - -=> select jsonb_path_query(:'json', '$.track.segments[*] ? (@.location[1] < 13.4).HR ? (@ > 130)'); - jsonb_path_query ------------------- - 135 - - - - - You can also nest filter expressions within each other. - This example returns the size of the track if it contains any - segments with high heart rate values, or an empty sequence otherwise: - -=> select jsonb_path_query(:'json', '$.track ? (exists(@.segments[*] ? (@.HR > 130))).segments.size()'); - jsonb_path_query ------------------- - 2 - - - - - Deviations from the SQL Standard - - PostgreSQL's implementation of the SQL/JSON path - language has the following deviations from the SQL/JSON standard. - - - - Boolean Predicate Check Expressions - - As an extension to the SQL standard, - a PostgreSQL path expression can be a - Boolean predicate, whereas the SQL standard allows predicates only within - filters. While SQL-standard path expressions return the relevant - element(s) of the queried JSON value, predicate check expressions - return the single three-valued jsonb result of the - predicate: true, - false, or null. - For example, we could write this SQL-standard filter expression: - -=> select jsonb_path_query(:'json', '$.track.segments ?(@[*].HR > 130)'); - jsonb_path_query ------------------------------------------------------------&zwsp;---------------------- - {"HR": 135, "location": [47.706, 13.2635], "start time": "2018-10-14 10:39:21"} - - The similar predicate check expression simply - returns true, indicating that a match exists: - -=> select jsonb_path_query(:'json', '$.track.segments[*].HR > 130'); - jsonb_path_query ------------------- - true - - - - - - Predicate check expressions are required in the - @@ operator (and the - jsonb_path_match function), and should not be used - with the @? operator (or the - jsonb_path_exists function). - - - - - - Regular Expression Interpretation - - There are minor differences in the interpretation of regular - expression patterns used in like_regex filters, as - described in . - - - - - - Strict and Lax Modes - - When you query JSON data, the path expression may not match the - actual JSON data structure. An attempt to access a non-existent - member of an object or element of an array is defined as a - structural error. SQL/JSON path expressions have two modes - of handling structural errors: - - - - - - lax (default) — the path engine implicitly adapts - the queried data to the specified path. - Any structural errors that cannot be fixed as described below - are suppressed, producing no match. - - - - - strict — if a structural error occurs, an error is raised. - - - - - - Lax mode facilitates matching of a JSON document and path - expression when the JSON data does not conform to the expected schema. - If an operand does not match the requirements of a particular operation, - it can be automatically wrapped as an SQL/JSON array, or unwrapped by - converting its elements into an SQL/JSON sequence before performing - the operation. Also, comparison operators automatically unwrap their - operands in lax mode, so you can compare SQL/JSON arrays - out-of-the-box. An array of size 1 is considered equal to its sole element. - Automatic unwrapping is not performed when: - - - - The path expression contains type() or - size() methods that return the type - and the number of elements in the array, respectively. - - - - - The queried JSON data contain nested arrays. In this case, only - the outermost array is unwrapped, while all the inner arrays - remain unchanged. Thus, implicit unwrapping can only go one - level down within each path evaluation step. - - - - - - - For example, when querying the GPS data listed above, you can - abstract from the fact that it stores an array of segments - when using lax mode: - -=> select jsonb_path_query(:'json', 'lax $.track.segments.location'); - jsonb_path_query -------------------- - [47.763, 13.4034] - [47.706, 13.2635] - - - - - In strict mode, the specified path must exactly match the structure of - the queried JSON document, so using this path - expression will cause an error: - -=> select jsonb_path_query(:'json', 'strict $.track.segments.location'); -ERROR: jsonpath member accessor can only be applied to an object - - To get the same result as in lax mode, you have to explicitly unwrap the - segments array: - -=> select jsonb_path_query(:'json', 'strict $.track.segments[*].location'); - jsonb_path_query -------------------- - [47.763, 13.4034] - [47.706, 13.2635] - - - - - The unwrapping behavior of lax mode can lead to surprising results. For - instance, the following query using the .** accessor - selects every HR value twice: - -=> select jsonb_path_query(:'json', 'lax $.**.HR'); - jsonb_path_query ------------------- - 73 - 135 - 73 - 135 - - This happens because the .** accessor selects both - the segments array and each of its elements, while - the .HR accessor automatically unwraps arrays when - using lax mode. To avoid surprising results, we recommend using - the .** accessor only in strict mode. The - following query selects each HR value just once: - -=> select jsonb_path_query(:'json', 'strict $.**.HR'); - jsonb_path_query ------------------- - 73 - 135 - - - - - The unwrapping of arrays can also lead to unexpected results. Consider this - example, which selects all the location arrays: - -=> select jsonb_path_query(:'json', 'lax $.track.segments[*].location'); - jsonb_path_query -------------------- - [47.763, 13.4034] - [47.706, 13.2635] -(2 rows) - - As expected it returns the full arrays. But applying a filter expression - causes the arrays to be unwrapped to evaluate each item, returning only the - items that match the expression: - -=> select jsonb_path_query(:'json', 'lax $.track.segments[*].location ?(@[*] > 15)'); - jsonb_path_query ------------------- - 47.763 - 47.706 -(2 rows) - - This despite the fact that the full arrays are selected by the path - expression. Use strict mode to restore selecting the arrays: - -=> select jsonb_path_query(:'json', 'strict $.track.segments[*].location ?(@[*] > 15)'); - jsonb_path_query -------------------- - [47.763, 13.4034] - [47.706, 13.2635] -(2 rows) - - - - - - SQL/JSON Path Operators and Methods - - - shows the operators and - methods available in jsonpath. Note that while the unary - operators and methods can be applied to multiple values resulting from a - preceding path step, the binary operators (addition etc.) can only be - applied to single values. In lax mode, methods applied to an array will be - executed for each value in the array. The exceptions are - .type() and .size(), which apply to - the array itself. - - - - <type>jsonpath</type> Operators and Methods - - - - - Operator/Method - - - Description - - - Example(s) - - - - - - - - number + number - number - - - Addition - - - jsonb_path_query('[2]', '$[0] + 3') - 5 - - - - - - + number - number - - - Unary plus (no operation); unlike addition, this can iterate over - multiple values - - - jsonb_path_query_array('{"x": [2,3,4]}', '+ $.x') - [2, 3, 4] - - - - - - number - number - number - - - Subtraction - - - jsonb_path_query('[2]', '7 - $[0]') - 5 - - - - - - - number - number - - - Negation; unlike subtraction, this can iterate over - multiple values - - - jsonb_path_query_array('{"x": [2,3,4]}', '- $.x') - [-2, -3, -4] - - - - - - number * number - number - - - Multiplication - - - jsonb_path_query('[4]', '2 * $[0]') - 8 - - - - - - number / number - number - - - Division - - - jsonb_path_query('[8.5]', '$[0] / 2') - 4.2500000000000000 - - - - - - number % number - number - - - Modulo (remainder) - - - jsonb_path_query('[32]', '$[0] % 10') - 2 - - - - - - value . type() - string - - - Type of the JSON item (see json_typeof) - - - jsonb_path_query_array('[1, "2", {}]', '$[*].type()') - ["number", "string", "object"] - - - - - - value . size() - number - - - Size of the JSON item (number of array elements, or 1 if not an - array) - - - jsonb_path_query('{"m": [11, 15]}', '$.m.size()') - 2 - - - - - - value . boolean() - boolean - - - Boolean value converted from a JSON boolean, number, or string - - - jsonb_path_query_array('[1, "yes", false]', '$[*].boolean()') - [true, true, false] - - - - - - value . string() - string - - - String value converted from a JSON boolean, number, string, or - datetime - - - jsonb_path_query_array('[1.23, "xyz", false]', '$[*].string()') - ["1.23", "xyz", "false"] - - - jsonb_path_query('"2023-08-15 12:34:56"', '$.timestamp().string()') - "2023-08-15T12:34:56" - - - - - - value . double() - number - - - Approximate floating-point number converted from a JSON number or - string - - - jsonb_path_query('{"len": "1.9"}', '$.len.double() * 2') - 3.8 - - - - - - number . ceiling() - number - - - Nearest integer greater than or equal to the given number - - - jsonb_path_query('{"h": 1.3}', '$.h.ceiling()') - 2 - - - - - - number . floor() - number - - - Nearest integer less than or equal to the given number - - - jsonb_path_query('{"h": 1.7}', '$.h.floor()') - 1 - - - - - - number . abs() - number - - - Absolute value of the given number - - - jsonb_path_query('{"z": -0.3}', '$.z.abs()') - 0.3 - - - - - - value . bigint() - bigint - - - Big integer value converted from a JSON number or string - - - jsonb_path_query('{"len": "9876543219"}', '$.len.bigint()') - 9876543219 - - - - - - value . decimal( [ precision [ , scale ] ] ) - decimal - - - Rounded decimal value converted from a JSON number or string - (precision and scale must be - integer values) - - - jsonb_path_query('1234.5678', '$.decimal(6, 2)') - 1234.57 - - - - - - value . integer() - integer - - - Integer value converted from a JSON number or string - - - jsonb_path_query('{"len": "12345"}', '$.len.integer()') - 12345 - - - - - - value . number() - numeric - - - Numeric value converted from a JSON number or string - - - jsonb_path_query('{"len": "123.45"}', '$.len.number()') - 123.45 - - - - - - string . datetime() - datetime_type - (see note) - - - Date/time value converted from a string - - - jsonb_path_query('["2015-8-1", "2015-08-12"]', '$[*] ? (@.datetime() < "2015-08-2".datetime())') - "2015-8-1" - - - - - - string . datetime(template) - datetime_type - (see note) - - - Date/time value converted from a string using the - specified to_timestamp template - - - jsonb_path_query_array('["12:30", "18:40"]', '$[*].datetime("HH24:MI")') - ["12:30:00", "18:40:00"] - - - - - - string . date() - date - - - Date value converted from a string - - - jsonb_path_query('"2023-08-15"', '$.date()') - "2023-08-15" - - - - - - string . time() - time without time zone - - - Time without time zone value converted from a string - - - jsonb_path_query('"12:34:56"', '$.time()') - "12:34:56" - - - - - - string . time(precision) - time without time zone - - - Time without time zone value converted from a string, with fractional - seconds adjusted to the given precision - - - jsonb_path_query('"12:34:56.789"', '$.time(2)') - "12:34:56.79" - - - - - - string . time_tz() - time with time zone - - - Time with time zone value converted from a string - - - jsonb_path_query('"12:34:56 +05:30"', '$.time_tz()') - "12:34:56+05:30" - - - - - - string . time_tz(precision) - time with time zone - - - Time with time zone value converted from a string, with fractional - seconds adjusted to the given precision - - - jsonb_path_query('"12:34:56.789 +05:30"', '$.time_tz(2)') - "12:34:56.79+05:30" - - - - - - string . timestamp() - timestamp without time zone - - - Timestamp without time zone value converted from a string - - - jsonb_path_query('"2023-08-15 12:34:56"', '$.timestamp()') - "2023-08-15T12:34:56" - - - - - - string . timestamp(precision) - timestamp without time zone - - - Timestamp without time zone value converted from a string, with - fractional seconds adjusted to the given precision - - - jsonb_path_query('"2023-08-15 12:34:56.789"', '$.timestamp(2)') - "2023-08-15T12:34:56.79" - - - - - - string . timestamp_tz() - timestamp with time zone - - - Timestamp with time zone value converted from a string - - - jsonb_path_query('"2023-08-15 12:34:56 +05:30"', '$.timestamp_tz()') - "2023-08-15T12:34:56+05:30" - - - - - - string . timestamp_tz(precision) - timestamp with time zone - - - Timestamp with time zone value converted from a string, with fractional - seconds adjusted to the given precision - - - jsonb_path_query('"2023-08-15 12:34:56.789 +05:30"', '$.timestamp_tz(2)') - "2023-08-15T12:34:56.79+05:30" - - - - - - object . keyvalue() - array - - - The object's key-value pairs, represented as an array of objects - containing three fields: "key", - "value", and "id"; - "id" is a unique identifier of the object the - key-value pair belongs to - - - jsonb_path_query_array('{"x": "20", "y": 32}', '$.keyvalue()') - [{"id": 0, "key": "x", "value": "20"}, {"id": 0, "key": "y", "value": 32}] - - - - -
- - - - The result type of the datetime() and - datetime(template) - methods can be date, timetz, time, - timestamptz, or timestamp. - Both methods determine their result type dynamically. - - - The datetime() method sequentially tries to - match its input string to the ISO formats - for date, timetz, time, - timestamptz, and timestamp. It stops on - the first matching format and emits the corresponding data type. - - - The datetime(template) - method determines the result type according to the fields used in the - provided template string. - - - The datetime() and - datetime(template) methods - use the same parsing rules as the to_timestamp SQL - function does (see ), with three - exceptions. First, these methods don't allow unmatched template - patterns. Second, only the following separators are allowed in the - template string: minus sign, period, solidus (slash), comma, apostrophe, - semicolon, colon and space. Third, separators in the template string - must exactly match the input string. - - - If different date/time types need to be compared, an implicit cast is - applied. A date value can be cast to timestamp - or timestamptz, timestamp can be cast to - timestamptz, and time to timetz. - However, all but the first of these conversions depend on the current - setting, and thus can only be performed - within timezone-aware jsonpath functions. Similarly, other - date/time-related methods that convert strings to date/time types - also do this casting, which may involve the current - setting. Therefore, these conversions can - also only be performed within timezone-aware jsonpath - functions. - - - - - shows the available - filter expression elements. - - - - <type>jsonpath</type> Filter Expression Elements - - - - - Predicate/Value - - - Description - - - Example(s) - - - - - - - - value == value - boolean - - - Equality comparison (this, and the other comparison operators, work on - all JSON scalar values) - - - jsonb_path_query_array('[1, "a", 1, 3]', '$[*] ? (@ == 1)') - [1, 1] - - - jsonb_path_query_array('[1, "a", 1, 3]', '$[*] ? (@ == "a")') - ["a"] - - - - - - value != value - boolean - - - value <> value - boolean - - - Non-equality comparison - - - jsonb_path_query_array('[1, 2, 1, 3]', '$[*] ? (@ != 1)') - [2, 3] - - - jsonb_path_query_array('["a", "b", "c"]', '$[*] ? (@ <> "b")') - ["a", "c"] - - - - - - value < value - boolean - - - Less-than comparison - - - jsonb_path_query_array('[1, 2, 3]', '$[*] ? (@ < 2)') - [1] - - - - - - value <= value - boolean - - - Less-than-or-equal-to comparison - - - jsonb_path_query_array('["a", "b", "c"]', '$[*] ? (@ <= "b")') - ["a", "b"] - - - - - - value > value - boolean - - - Greater-than comparison - - - jsonb_path_query_array('[1, 2, 3]', '$[*] ? (@ > 2)') - [3] - - - - - - value >= value - boolean - - - Greater-than-or-equal-to comparison - - - jsonb_path_query_array('[1, 2, 3]', '$[*] ? (@ >= 2)') - [2, 3] - - - - - - true - boolean - - - JSON constant true - - - jsonb_path_query('[{"name": "John", "parent": false}, {"name": "Chris", "parent": true}]', '$[*] ? (@.parent == true)') - {"name": "Chris", "parent": true} - - - - - - false - boolean - - - JSON constant false - - - jsonb_path_query('[{"name": "John", "parent": false}, {"name": "Chris", "parent": true}]', '$[*] ? (@.parent == false)') - {"name": "John", "parent": false} - - - - - - null - value - - - JSON constant null (note that, unlike in SQL, - comparison to null works normally) - - - jsonb_path_query('[{"name": "Mary", "job": null}, {"name": "Michael", "job": "driver"}]', '$[*] ? (@.job == null) .name') - "Mary" - - - - - - boolean && boolean - boolean - - - Boolean AND - - - jsonb_path_query('[1, 3, 7]', '$[*] ? (@ > 1 && @ < 5)') - 3 - - - - - - boolean || boolean - boolean - - - Boolean OR - - - jsonb_path_query('[1, 3, 7]', '$[*] ? (@ < 1 || @ > 5)') - 7 - - - - - - ! boolean - boolean - - - Boolean NOT - - - jsonb_path_query('[1, 3, 7]', '$[*] ? (!(@ < 5))') - 7 - - - - - - boolean is unknown - boolean - - - Tests whether a Boolean condition is unknown. - - - jsonb_path_query('[-1, 2, 7, "foo"]', '$[*] ? ((@ > 0) is unknown)') - "foo" - - - - - - string like_regex string flag string - boolean - - - Tests whether the first operand matches the regular expression - given by the second operand, optionally with modifications - described by a string of flag characters (see - ). - - - jsonb_path_query_array('["abc", "abd", "aBdC", "abdacb", "babc"]', '$[*] ? (@ like_regex "^ab.*c")') - ["abc", "abdacb"] - - - jsonb_path_query_array('["abc", "abd", "aBdC", "abdacb", "babc"]', '$[*] ? (@ like_regex "^ab.*c" flag "i")') - ["abc", "aBdC", "abdacb"] - - - - - - string starts with string - boolean - - - Tests whether the second operand is an initial substring of the first - operand. - - - jsonb_path_query('["John Smith", "Mary Stone", "Bob Johnson"]', '$[*] ? (@ starts with "John")') - "John Smith" - - - - - - exists ( path_expression ) - boolean - - - Tests whether a path expression matches at least one SQL/JSON item. - Returns unknown if the path expression would result - in an error; the second example uses this to avoid a no-such-key error - in strict mode. - - - jsonb_path_query('{"x": [1, 2], "y": [2, 4]}', 'strict $.* ? (exists (@ ? (@[*] > 2)))') - [2, 4] - - - jsonb_path_query_array('{"value": 41}', 'strict $ ? (exists (@.name)) .name') - [] - - - - -
- -
- - - SQL/JSON Regular Expressions - - - LIKE_REGEX - in SQL/JSON - - - - SQL/JSON path expressions allow matching text to a regular expression - with the like_regex filter. For example, the - following SQL/JSON path query would case-insensitively match all - strings in an array that start with an English vowel: - -$[*] ? (@ like_regex "^[aeiou]" flag "i") - - - - - The optional flag string may include one or more of - the characters - i for case-insensitive match, - m to allow ^ - and $ to match at newlines, - s to allow . to match a newline, - and q to quote the whole pattern (reducing the - behavior to a simple substring match). - - - - The SQL/JSON standard borrows its definition for regular expressions - from the LIKE_REGEX operator, which in turn uses the - XQuery standard. PostgreSQL does not currently support the - LIKE_REGEX operator. Therefore, - the like_regex filter is implemented using the - POSIX regular expression engine described in - . This leads to various minor - discrepancies from standard SQL/JSON behavior, which are cataloged in - . - Note, however, that the flag-letter incompatibilities described there - do not apply to SQL/JSON, as it translates the XQuery flag letters to - match what the POSIX engine expects. - - - - Keep in mind that the pattern argument of like_regex - is a JSON path string literal, written according to the rules given in - . This means in particular that any - backslashes you want to use in the regular expression must be doubled. - For example, to match string values of the root document that contain - only digits: - -$.* ? (@ like_regex "^\\d+$") - - - -
- - - SQL/JSON Query Functions - - SQL/JSON functions JSON_EXISTS(), - JSON_QUERY(), and JSON_VALUE() - described in can be used - to query JSON documents. Each of these functions apply a - path_expression (an SQL/JSON path query) to a - context_item (the document). See - for more details on what - the path_expression can contain. The - path_expression can also reference variables, - whose values are specified with their respective names in the - PASSING clause that is supported by each function. - context_item can be a jsonb value - or a character string that can be successfully cast to jsonb. - - - - SQL/JSON Query Functions - - - - - Function signature - - - Description - - - Example(s) - - - - - - - json_exists - -JSON_EXISTS ( -context_item, path_expression - PASSING { value AS varname } , ... -{ TRUE | FALSE | UNKNOWN | ERROR } ON ERROR ) boolean - - - - - - Returns true if the SQL/JSON path_expression - applied to the context_item yields any - items, false otherwise. - - - - - The ON ERROR clause specifies the behavior if - an error occurs during path_expression - evaluation. Specifying ERROR will cause an error to - be thrown with the appropriate message. Other options include - returning boolean values FALSE or - TRUE or the value UNKNOWN which - is actually an SQL NULL. The default when no ON ERROR - clause is specified is to return the boolean value - FALSE. - - - - - Examples: - - - JSON_EXISTS(jsonb '{"key1": [1,2,3]}', 'strict $.key1[*] ? (@ > $x)' PASSING 2 AS x) - t - - - JSON_EXISTS(jsonb '{"a": [1,2,3]}', 'lax $.a[5]' ERROR ON ERROR) - f - - - JSON_EXISTS(jsonb '{"a": [1,2,3]}', 'strict $.a[5]' ERROR ON ERROR) - - -ERROR: jsonpath array subscript is out of bounds - - - - - - json_query - -JSON_QUERY ( -context_item, path_expression - PASSING { value AS varname } , ... - RETURNING data_type FORMAT JSON ENCODING UTF8 - { WITHOUT | WITH { CONDITIONAL | UNCONDITIONAL } } ARRAY WRAPPER - { KEEP | OMIT } QUOTES ON SCALAR STRING - { ERROR | NULL | EMPTY { ARRAY | OBJECT } | DEFAULT expression } ON EMPTY - { ERROR | NULL | EMPTY { ARRAY | OBJECT } | DEFAULT expression } ON ERROR ) jsonb - - - - - - Returns the result of applying the SQL/JSON - path_expression to the - context_item. - - - - - By default, the result is returned as a value of type jsonb, - though the RETURNING clause can be used to return - as some other type to which it can be successfully coerced. - - - - - If the path expression may return multiple values, it might be necessary - to wrap those values using the WITH WRAPPER clause to - make it a valid JSON string, because the default behavior is to not wrap - them, as if WITHOUT WRAPPER were specified. The - WITH WRAPPER clause is by default taken to mean - WITH UNCONDITIONAL WRAPPER, which means that even a - single result value will be wrapped. To apply the wrapper only when - multiple values are present, specify WITH CONDITIONAL WRAPPER. - Getting multiple values in result will be treated as an error if - WITHOUT WRAPPER is specified. - - - - - If the result is a scalar string, by default, the returned value will - be surrounded by quotes, making it a valid JSON value. It can be made - explicit by specifying KEEP QUOTES. Conversely, - quotes can be omitted by specifying OMIT QUOTES. - To ensure that the result is a valid JSON value, OMIT QUOTES - cannot be specified when WITH WRAPPER is also - specified. - - - - - The ON EMPTY clause specifies the behavior if - evaluating path_expression yields an empty - set. The ON ERROR clause specifies the behavior - if an error occurs when evaluating path_expression, - when coercing the result value to the RETURNING type, - or when evaluating the ON EMPTY expression if the - path_expression evaluation returns an empty - set. - - - - - For both ON EMPTY and ON ERROR, - specifying ERROR will cause an error to be thrown with - the appropriate message. Other options include returning an SQL NULL, an - empty array (EMPTY ARRAY), - an empty object (EMPTY OBJECT), or a user-specified - expression (DEFAULT expression) - that can be coerced to jsonb or the type specified in RETURNING. - The default when ON EMPTY or ON ERROR - is not specified is to return an SQL NULL value. - - - - - Examples: - - - JSON_QUERY(jsonb '[1,[2,3],null]', 'lax $[*][$off]' PASSING 1 AS off WITH CONDITIONAL WRAPPER) - 3 - - - JSON_QUERY(jsonb '{"a": "[1, 2]"}', 'lax $.a' OMIT QUOTES) - [1, 2] - - - JSON_QUERY(jsonb '{"a": "[1, 2]"}', 'lax $.a' RETURNING int[] OMIT QUOTES ERROR ON ERROR) - - -ERROR: malformed array literal: "[1, 2]" -DETAIL: Missing "]" after array dimensions. - - - - - - - json_value - -JSON_VALUE ( -context_item, path_expression - PASSING { value AS varname } , ... - RETURNING data_type - { ERROR | NULL | DEFAULT expression } ON EMPTY - { ERROR | NULL | DEFAULT expression } ON ERROR ) text - - - - - - Returns the result of applying the SQL/JSON - path_expression to the - context_item. - - - - - Only use JSON_VALUE() if the extracted value is - expected to be a single SQL/JSON scalar item; - getting multiple values will be treated as an error. If you expect that - extracted value might be an object or an array, use the - JSON_QUERY function instead. - - - - - By default, the result, which must be a single scalar value, is - returned as a value of type text, though the - RETURNING clause can be used to return as some - other type to which it can be successfully coerced. - - - - - The ON ERROR and ON EMPTY - clauses have similar semantics as mentioned in the description of - JSON_QUERY, except the set of values returned in - lieu of throwing an error is different. - - - - - Note that scalar strings returned by JSON_VALUE - always have their quotes removed, equivalent to specifying - OMIT QUOTES in JSON_QUERY. - - - - - Examples: - - - JSON_VALUE(jsonb '"123.45"', '$' RETURNING float) - 123.45 - - - JSON_VALUE(jsonb '"03:04 2015-02-01"', '$.datetime("HH24:MI YYYY-MM-DD")' RETURNING date) - 2015-02-01 - - - JSON_VALUE(jsonb '[1,2]', 'strict $[$off]' PASSING 1 as off) - 2 - - - JSON_VALUE(jsonb '[1,2]', 'strict $[*]' DEFAULT 9 ON ERROR) - 9 - - - - - -
- - - The context_item expression is converted to - jsonb by an implicit cast if the expression is not already of - type jsonb. Note, however, that any parsing errors that occur - during that conversion are thrown unconditionally, that is, are not - handled according to the (specified or implicit) ON ERROR - clause. - - - - - JSON_VALUE() returns an SQL NULL if - path_expression returns a JSON - null, whereas JSON_QUERY() returns - the JSON null as is. - - -
- - - JSON_TABLE - - json_table - - - - JSON_TABLE is an SQL/JSON function which - queries JSON data - and presents the results as a relational view, which can be accessed as a - regular SQL table. You can use JSON_TABLE inside - the FROM clause of a SELECT, - UPDATE, or DELETE and as data source - in a MERGE statement. - - - - Taking JSON data as input, JSON_TABLE uses a JSON path - expression to extract a part of the provided data to use as a - row pattern for the constructed view. Each SQL/JSON - value given by the row pattern serves as source for a separate row in the - constructed view. - - - - To split the row pattern into columns, JSON_TABLE - provides the COLUMNS clause that defines the - schema of the created view. For each column, a separate JSON path expression - can be specified to be evaluated against the row pattern to get an SQL/JSON - value that will become the value for the specified column in a given output - row. - - - - JSON data stored at a nested level of the row pattern can be extracted using - the NESTED PATH clause. Each - NESTED PATH clause can be used to generate one or more - columns using the data from a nested level of the row pattern. Those - columns can be specified using a COLUMNS clause that - looks similar to the top-level COLUMNS clause. Rows constructed from - NESTED COLUMNS are called child rows and are joined - against the row constructed from the columns specified in the parent - COLUMNS clause to get the row in the final view. Child - columns themselves may contain a NESTED PATH - specification thus allowing to extract data located at arbitrary nesting - levels. Columns produced by multiple NESTED PATHs at the - same level are considered to be siblings of each - other and their rows after joining with the parent row are combined using - UNION. - - - - The rows produced by JSON_TABLE are laterally - joined to the row that generated them, so you do not have to explicitly join - the constructed view with the original table holding JSON - data. - - - - The syntax is: - - - -JSON_TABLE ( - context_item, path_expression AS json_path_name PASSING { value AS varname } , ... - COLUMNS ( json_table_column , ... ) - { ERROR | EMPTY ARRAY} ON ERROR -) - - -where json_table_column is: - - name FOR ORDINALITY - | name type - FORMAT JSON ENCODING UTF8 - PATH path_expression - { WITHOUT | WITH { CONDITIONAL | UNCONDITIONAL } } ARRAY WRAPPER - { KEEP | OMIT } QUOTES ON SCALAR STRING - { ERROR | NULL | EMPTY { ARRAY | OBJECT } | DEFAULT expression } ON EMPTY - { ERROR | NULL | EMPTY { ARRAY | OBJECT } | DEFAULT expression } ON ERROR - | name type EXISTS PATH path_expression - { ERROR | TRUE | FALSE | UNKNOWN } ON ERROR - | NESTED PATH path_expression AS json_path_name COLUMNS ( json_table_column , ... ) - - - - Each syntax element is described below in more detail. - - - - - - context_item, path_expression AS json_path_name PASSING { value AS varname } , ... - - - - The context_item specifies the input document - to query, the path_expression is an SQL/JSON - path expression defining the query, and json_path_name - is an optional name for the path_expression. - The optional PASSING clause provides data values for - the variables mentioned in the path_expression. - The result of the input data evaluation using the aforementioned elements - is called the row pattern, which is used as the - source for row values in the constructed view. - - - - - - - COLUMNS ( json_table_column , ... ) - - - - - The COLUMNS clause defining the schema of the - constructed view. In this clause, you can specify each column to be - filled with an SQL/JSON value obtained by applying a JSON path expression - against the row pattern. json_table_column has - the following variants: - - - - - - name FOR ORDINALITY - - - - Adds an ordinality column that provides sequential row numbering starting - from 1. Each NESTED PATH (see below) gets its own - counter for any nested ordinality columns. - - - - - - - name type - FORMAT JSON ENCODING UTF8 - PATH path_expression - - - - Inserts an SQL/JSON value obtained by applying - path_expression against the row pattern into - the view's output row after coercing it to specified - type. - - - Specifying FORMAT JSON makes it explicit that you - expect the value to be a valid json object. It only - makes sense to specify FORMAT JSON if - type is one of bpchar, - bytea, character varying, name, - json, jsonb, text, or a domain over - these types. - - - Optionally, you can specify WRAPPER and - QUOTES clauses to format the output. Note that - specifying OMIT QUOTES overrides - FORMAT JSON if also specified, because unquoted - literals do not constitute valid json values. - - - Optionally, you can use ON EMPTY and - ON ERROR clauses to specify whether to throw the error - or return the specified value when the result of JSON path evaluation is - empty and when an error occurs during JSON path evaluation or when - coercing the SQL/JSON value to the specified type, respectively. The - default for both is to return a NULL value. - - - - This clause is internally turned into and has the same semantics as - JSON_VALUE or JSON_QUERY. - The latter if the specified type is not a scalar type or if either of - FORMAT JSON, WRAPPER, or - QUOTES clause is present. - - - - - - - - name type - EXISTS PATH path_expression - - - - Inserts a boolean value obtained by applying - path_expression against the row pattern - into the view's output row after coercing it to specified - type. - - - The value corresponds to whether applying the PATH - expression to the row pattern yields any values. - - - The specified type should have a cast from the - boolean type. - - - Optionally, you can use ON ERROR to specify whether to - throw the error or return the specified value when an error occurs during - JSON path evaluation or when coercing SQL/JSON value to the specified - type. The default is to return a boolean value - FALSE. - - - - This clause is internally turned into and has the same semantics as - JSON_EXISTS. - - - - - - - - NESTED PATH path_expression AS json_path_name - COLUMNS ( json_table_column , ... ) - - - - - Extracts SQL/JSON values from nested levels of the row pattern, - generates one or more columns as defined by the COLUMNS - subclause, and inserts the extracted SQL/JSON values into those - columns. The json_table_column - expression in the COLUMNS subclause uses the same - syntax as in the parent COLUMNS clause. - - - - The NESTED PATH syntax is recursive, - so you can go down multiple nested levels by specifying several - NESTED PATH subclauses within each other. - It allows to unnest the hierarchy of JSON objects and arrays - in a single function invocation rather than chaining several - JSON_TABLE expressions in an SQL statement. - - - - - - - - In each variant of json_table_column described - above, if the PATH clause is omitted, path expression - $.name is used, where - name is the provided column name. - - - - - - - - - AS json_path_name - - - - - The optional json_path_name serves as an - identifier of the provided path_expression. - The name must be unique and distinct from the column names. - - - - - - - { ERROR | EMPTY } ON ERROR - - - - - The optional ON ERROR can be used to specify how to - handle errors when evaluating the top-level - path_expression. Use ERROR - if you want the errors to be thrown and EMPTY to - return an empty table, that is, a table containing 0 rows. Note that - this clause does not affect the errors that occur when evaluating - columns, for which the behavior depends on whether the - ON ERROR clause is specified against a given column. - - - - - - Examples - - - In the examples that follow, the following table containing JSON data - will be used: - - -CREATE TABLE my_films ( js jsonb ); - -INSERT INTO my_films VALUES ( -'{ "favorites" : [ - { "kind" : "comedy", "films" : [ - { "title" : "Bananas", - "director" : "Woody Allen"}, - { "title" : "The Dinner Game", - "director" : "Francis Veber" } ] }, - { "kind" : "horror", "films" : [ - { "title" : "Psycho", - "director" : "Alfred Hitchcock" } ] }, - { "kind" : "thriller", "films" : [ - { "title" : "Vertigo", - "director" : "Alfred Hitchcock" } ] }, - { "kind" : "drama", "films" : [ - { "title" : "Yojimbo", - "director" : "Akira Kurosawa" } ] } - ] }'); - - - - - The following query shows how to use JSON_TABLE to - turn the JSON objects in the my_films table - to a view containing columns for the keys kind, - title, and director contained in - the original JSON along with an ordinality column: - - -SELECT jt.* FROM - my_films, - JSON_TABLE (js, '$.favorites[*]' COLUMNS ( - id FOR ORDINALITY, - kind text PATH '$.kind', - title text PATH '$.films[*].title' WITH WRAPPER, - director text PATH '$.films[*].director' WITH WRAPPER)) AS jt; - - - - id | kind | title | director -----+----------+--------------------------------+---------------------------------- - 1 | comedy | ["Bananas", "The Dinner Game"] | ["Woody Allen", "Francis Veber"] - 2 | horror | ["Psycho"] | ["Alfred Hitchcock"] - 3 | thriller | ["Vertigo"] | ["Alfred Hitchcock"] - 4 | drama | ["Yojimbo"] | ["Akira Kurosawa"] -(4 rows) - - - - - The following is a modified version of the above query to show the - usage of PASSING arguments in the filter specified in - the top-level JSON path expression and the various options for the - individual columns: - - -SELECT jt.* FROM - my_films, - JSON_TABLE (js, '$.favorites[*] ? (@.films[*].director == $filter)' - PASSING 'Alfred Hitchcock' AS filter - COLUMNS ( - id FOR ORDINALITY, - kind text PATH '$.kind', - title text FORMAT JSON PATH '$.films[*].title' OMIT QUOTES, - director text PATH '$.films[*].director' KEEP QUOTES)) AS jt; - - - - id | kind | title | director -----+----------+---------+-------------------- - 1 | horror | Psycho | "Alfred Hitchcock" - 2 | thriller | Vertigo | "Alfred Hitchcock" -(2 rows) - - - - - The following is a modified version of the above query to show the usage - of NESTED PATH for populating title and director - columns, illustrating how they are joined to the parent columns id and - kind: - - -SELECT jt.* FROM - my_films, - JSON_TABLE ( js, '$.favorites[*] ? (@.films[*].director == $filter)' - PASSING 'Alfred Hitchcock' AS filter - COLUMNS ( - id FOR ORDINALITY, - kind text PATH '$.kind', - NESTED PATH '$.films[*]' COLUMNS ( - title text FORMAT JSON PATH '$.title' OMIT QUOTES, - director text PATH '$.director' KEEP QUOTES))) AS jt; - - - - id | kind | title | director -----+----------+---------+-------------------- - 1 | horror | Psycho | "Alfred Hitchcock" - 2 | thriller | Vertigo | "Alfred Hitchcock" -(2 rows) - - - - - - The following is the same query but without the filter in the root - path: - - -SELECT jt.* FROM - my_films, - JSON_TABLE ( js, '$.favorites[*]' - COLUMNS ( - id FOR ORDINALITY, - kind text PATH '$.kind', - NESTED PATH '$.films[*]' COLUMNS ( - title text FORMAT JSON PATH '$.title' OMIT QUOTES, - director text PATH '$.director' KEEP QUOTES))) AS jt; - - - - id | kind | title | director -----+----------+-----------------+-------------------- - 1 | comedy | Bananas | "Woody Allen" - 1 | comedy | The Dinner Game | "Francis Veber" - 2 | horror | Psycho | "Alfred Hitchcock" - 3 | thriller | Vertigo | "Alfred Hitchcock" - 4 | drama | Yojimbo | "Akira Kurosawa" -(5 rows) - - - - - - The following shows another query using a different JSON - object as input. It shows the UNION "sibling join" between - NESTED paths $.movies[*] and - $.books[*] and also the usage of - FOR ORDINALITY column at NESTED - levels (columns movie_id, book_id, - and author_id): - - -SELECT * FROM JSON_TABLE ( -'{"favorites": - [{"movies": - [{"name": "One", "director": "John Doe"}, - {"name": "Two", "director": "Don Joe"}], - "books": - [{"name": "Mystery", "authors": [{"name": "Brown Dan"}]}, - {"name": "Wonder", "authors": [{"name": "Jun Murakami"}, {"name":"Craig Doe"}]}] -}]}'::json, '$.favorites[*]' -COLUMNS ( - user_id FOR ORDINALITY, - NESTED '$.movies[*]' - COLUMNS ( - movie_id FOR ORDINALITY, - mname text PATH '$.name', - director text), - NESTED '$.books[*]' - COLUMNS ( - book_id FOR ORDINALITY, - bname text PATH '$.name', - NESTED '$.authors[*]' - COLUMNS ( - author_id FOR ORDINALITY, - author_name text PATH '$.name')))); - - - - user_id | movie_id | mname | director | book_id | bname | author_id | author_name ----------+----------+-------+----------+---------+---------+-----------+-------------- - 1 | 1 | One | John Doe | | | | - 1 | 2 | Two | Don Joe | | | | - 1 | | | | 1 | Mystery | 1 | Brown Dan - 1 | | | | 2 | Wonder | 1 | Jun Murakami - 1 | | | | 2 | Wonder | 2 | Craig Doe -(5 rows) - - - - -
- - - Sequence Manipulation Functions - - - sequence - - - - This section describes functions for operating on sequence - objects, also called sequence generators or just sequences. - Sequence objects are special single-row tables created with . - Sequence objects are commonly used to generate unique identifiers - for rows of a table. The sequence functions, listed in , provide simple, multiuser-safe - methods for obtaining successive sequence values from sequence - objects. - - - - Sequence Functions - - - - - Function - - - Description - - - - - - - - - nextval - - nextval ( regclass ) - bigint - - - Advances the sequence object to its next value and returns that value. - This is done atomically: even if multiple sessions - execute nextval concurrently, each will safely - receive a distinct sequence value. - If the sequence object has been created with default parameters, - successive nextval calls will return successive - values beginning with 1. Other behaviors can be obtained by using - appropriate parameters in the - command. - - - This function requires USAGE - or UPDATE privilege on the sequence. - - - - - - - setval - - setval ( regclass, bigint , boolean ) - bigint - - - Sets the sequence object's current value, and optionally - its is_called flag. The two-parameter - form sets the sequence's last_value field to the - specified value and sets its is_called field to - true, meaning that the next - nextval will advance the sequence before - returning a value. The value that will be reported - by currval is also set to the specified value. - In the three-parameter form, is_called can be set - to either true - or false. true has the same - effect as the two-parameter form. If it is set - to false, the next nextval - will return exactly the specified value, and sequence advancement - commences with the following nextval. - Furthermore, the value reported by currval is not - changed in this case. For example, - -SELECT setval('myseq', 42); Next nextval will return 43 -SELECT setval('myseq', 42, true); Same as above -SELECT setval('myseq', 42, false); Next nextval will return 42 - - The result returned by setval is just the value of its - second argument. - - - This function requires UPDATE privilege on the - sequence. - - - - - - - currval - - currval ( regclass ) - bigint - - - Returns the value most recently obtained - by nextval for this sequence in the current - session. (An error is reported if nextval has - never been called for this sequence in this session.) Because this is - returning a session-local value, it gives a predictable answer whether - or not other sessions have executed nextval since - the current session did. - - - This function requires USAGE - or SELECT privilege on the sequence. - - - - - - - lastval - - lastval () - bigint - - - Returns the value most recently returned by - nextval in the current session. This function is - identical to currval, except that instead - of taking the sequence name as an argument it refers to whichever - sequence nextval was most recently applied to - in the current session. It is an error to call - lastval if nextval - has not yet been called in the current session. - - - This function requires USAGE - or SELECT privilege on the last used sequence. - - - - -
- - - - To avoid blocking concurrent transactions that obtain numbers from - the same sequence, the value obtained by nextval - is not reclaimed for re-use if the calling transaction later aborts. - This means that transaction aborts or database crashes can result in - gaps in the sequence of assigned values. That can happen without a - transaction abort, too. For example an INSERT with - an ON CONFLICT clause will compute the to-be-inserted - tuple, including doing any required nextval - calls, before detecting any conflict that would cause it to follow - the ON CONFLICT rule instead. - Thus, PostgreSQL sequence - objects cannot be used to obtain gapless - sequences. - - - - Likewise, sequence state changes made by setval - are immediately visible to other transactions, and are not undone if - the calling transaction rolls back. - - - - If the database cluster crashes before committing a transaction - containing a nextval - or setval call, the sequence state change might - not have made its way to persistent storage, so that it is uncertain - whether the sequence will have its original or updated state after the - cluster restarts. This is harmless for usage of the sequence within - the database, since other effects of uncommitted transactions will not - be visible either. However, if you wish to use a sequence value for - persistent outside-the-database purposes, make sure that the - nextval call has been committed before doing so. - - - - - The sequence to be operated on by a sequence function is specified by - a regclass argument, which is simply the OID of the sequence in the - pg_class system catalog. You do not have to look up the - OID by hand, however, since the regclass data type's input - converter will do the work for you. See - for details. - -
- - - - Conditional Expressions - - - CASE - - - - conditional expression - - - - This section describes the SQL-compliant conditional expressions - available in PostgreSQL. - - - - - If your needs go beyond the capabilities of these conditional - expressions, you might want to consider writing a server-side function - in a more expressive programming language. - - - - - - Although COALESCE, GREATEST, and - LEAST are syntactically similar to functions, they are - not ordinary functions, and thus cannot be used with explicit - VARIADIC array arguments. - - - - - <literal>CASE</literal> - - - The SQL CASE expression is a - generic conditional expression, similar to if/else statements in - other programming languages: - - -CASE WHEN condition THEN result - WHEN ... - ELSE result -END - - - CASE clauses can be used wherever - an expression is valid. Each condition is an - expression that returns a boolean result. If the condition's - result is true, the value of the CASE expression is the - result that follows the condition, and the - remainder of the CASE expression is not processed. If the - condition's result is not true, any subsequent WHEN clauses - are examined in the same manner. If no WHEN - condition yields true, the value of the - CASE expression is the result of the - ELSE clause. If the ELSE clause is - omitted and no condition is true, the result is null. - - - - An example: - -SELECT * FROM test; - - a ---- - 1 - 2 - 3 - - -SELECT a, - CASE WHEN a=1 THEN 'one' - WHEN a=2 THEN 'two' - ELSE 'other' - END - FROM test; - - a | case ----+------- - 1 | one - 2 | two - 3 | other - - - - - The data types of all the result - expressions must be convertible to a single output type. - See for more details. - - - - There is a simple form of CASE expression - that is a variant of the general form above: - - -CASE expression - WHEN value THEN result - WHEN ... - ELSE result -END - - - The first - expression is computed, then compared to - each of the value expressions in the - WHEN clauses until one is found that is equal to it. If - no match is found, the result of the - ELSE clause (or a null value) is returned. This is similar - to the switch statement in C. - - - - The example above can be written using the simple - CASE syntax: - -SELECT a, - CASE a WHEN 1 THEN 'one' - WHEN 2 THEN 'two' - ELSE 'other' - END - FROM test; - - a | case ----+------- - 1 | one - 2 | two - 3 | other - - - - - A CASE expression does not evaluate any subexpressions - that are not needed to determine the result. For example, this is a - possible way of avoiding a division-by-zero failure: - -SELECT ... WHERE CASE WHEN x <> 0 THEN y/x > 1.5 ELSE false END; - - - - - - As described in , there are various - situations in which subexpressions of an expression are evaluated at - different times, so that the principle that CASE - evaluates only necessary subexpressions is not ironclad. For - example a constant 1/0 subexpression will usually result in - a division-by-zero failure at planning time, even if it's within - a CASE arm that would never be entered at run time. - - - - - - <literal>COALESCE</literal> - - - COALESCE - - - - NVL - - - - IFNULL - - - -COALESCE(value , ...) - - - - The COALESCE function returns the first of its - arguments that is not null. Null is returned only if all arguments - are null. It is often used to substitute a default value for - null values when data is retrieved for display, for example: - -SELECT COALESCE(description, short_description, '(none)') ... - - This returns description if it is not null, otherwise - short_description if it is not null, otherwise (none). - - - - The arguments must all be convertible to a common data type, which - will be the type of the result (see - for details). - - - - Like a CASE expression, COALESCE only - evaluates the arguments that are needed to determine the result; - that is, arguments to the right of the first non-null argument are - not evaluated. This SQL-standard function provides capabilities similar - to NVL and IFNULL, which are used in some other - database systems. - - - - - <literal>NULLIF</literal> - - - NULLIF - - - -NULLIF(value1, value2) - - - - The NULLIF function returns a null value if - value1 equals value2; - otherwise it returns value1. - This can be used to perform the inverse operation of the - COALESCE example given above: - -SELECT NULLIF(value, '(none)') ... - - In this example, if value is (none), - null is returned, otherwise the value of value - is returned. - - - - The two arguments must be of comparable types. - To be specific, they are compared exactly as if you had - written value1 - = value2, so there must be a - suitable = operator available. - - - - The result has the same type as the first argument — but there is - a subtlety. What is actually returned is the first argument of the - implied = operator, and in some cases that will have - been promoted to match the second argument's type. For - example, NULLIF(1, 2.2) yields numeric, - because there is no integer = - numeric operator, - only numeric = numeric. - - - - - - <literal>GREATEST</literal> and <literal>LEAST</literal> - - - GREATEST - - - LEAST - - - -GREATEST(value , ...) - - -LEAST(value , ...) - - - - The GREATEST and LEAST functions select the - largest or smallest value from a list of any number of expressions. - The expressions must all be convertible to a common data type, which - will be the type of the result - (see for details). - - - - NULL values in the argument list are ignored. The result will be NULL - only if all the expressions evaluate to NULL. (This is a deviation from - the SQL standard. According to the standard, the return value is NULL if - any argument is NULL. Some other databases behave this way.) - - - - - - Array Functions and Operators - - - shows the specialized operators - available for array types. - In addition to those, the usual comparison operators shown in are available for - arrays. The comparison operators compare the array contents - element-by-element, using the default B-tree comparison function for - the element data type, and sort based on the first difference. - In multidimensional arrays the elements are visited in row-major order - (last subscript varies most rapidly). - If the contents of two arrays are equal but the dimensionality is - different, the first difference in the dimensionality information - determines the sort order. - - - - Array Operators - - - - - Operator - - - Description - - - Example(s) - - - - - - - - anyarray @> anyarray - boolean - - - Does the first array contain the second, that is, does each element - appearing in the second array equal some element of the first array? - (Duplicates are not treated specially, - thus ARRAY[1] and ARRAY[1,1] are - each considered to contain the other.) - - - ARRAY[1,4,3] @> ARRAY[3,1,3] - t - - - - - - anyarray <@ anyarray - boolean - - - Is the first array contained by the second? - - - ARRAY[2,2,7] <@ ARRAY[1,7,4,2,6] - t - - - - - - anyarray && anyarray - boolean - - - Do the arrays overlap, that is, have any elements in common? - - - ARRAY[1,4,3] && ARRAY[2,1] - t - - - - - - anycompatiblearray || anycompatiblearray - anycompatiblearray - - - Concatenates the two arrays. Concatenating a null or empty array is a - no-op; otherwise the arrays must have the same number of dimensions - (as illustrated by the first example) or differ in number of - dimensions by one (as illustrated by the second). - If the arrays are not of identical element types, they will be coerced - to a common type (see ). - - - ARRAY[1,2,3] || ARRAY[4,5,6,7] - {1,2,3,4,5,6,7} - - - ARRAY[1,2,3] || ARRAY[[4,5,6],[7,8,9.9]] - {{1,2,3},{4,5,6},{7,8,9.9}} - - - - - - anycompatible || anycompatiblearray - anycompatiblearray - - - Concatenates an element onto the front of an array (which must be - empty or one-dimensional). - - - 3 || ARRAY[4,5,6] - {3,4,5,6} - - - - - - anycompatiblearray || anycompatible - anycompatiblearray - - - Concatenates an element onto the end of an array (which must be - empty or one-dimensional). - - - ARRAY[4,5,6] || 7 - {4,5,6,7} - - - - -
- - - See for more details about array operator - behavior. See for more details about - which operators support indexed operations. - - - - shows the functions - available for use with array types. See - for more information and examples of the use of these functions. - - - - Array Functions - - - - - Function - - - Description - - - Example(s) - - - - - - - - - array_append - - array_append ( anycompatiblearray, anycompatible ) - anycompatiblearray - - - Appends an element to the end of an array (same as - the anycompatiblearray || anycompatible - operator). - - - array_append(ARRAY[1,2], 3) - {1,2,3} - - - - - - - array_cat - - array_cat ( anycompatiblearray, anycompatiblearray ) - anycompatiblearray - - - Concatenates two arrays (same as - the anycompatiblearray || anycompatiblearray - operator). - - - array_cat(ARRAY[1,2,3], ARRAY[4,5]) - {1,2,3,4,5} - - - - - - - array_dims - - array_dims ( anyarray ) - text - - - Returns a text representation of the array's dimensions. - - - array_dims(ARRAY[[1,2,3], [4,5,6]]) - [1:2][1:3] - - - - - - - array_fill - - array_fill ( anyelement, integer[] - , integer[] ) - anyarray - - - Returns an array filled with copies of the given value, having - dimensions of the lengths specified by the second argument. - The optional third argument supplies lower-bound values for each - dimension (which default to all 1). - - - array_fill(11, ARRAY[2,3]) - {{11,11,11},{11,11,11}} - - - array_fill(7, ARRAY[3], ARRAY[2]) - [2:4]={7,7,7} - - - - - - - array_length - - array_length ( anyarray, integer ) - integer - - - Returns the length of the requested array dimension. - (Produces NULL instead of 0 for empty or missing array dimensions.) - - - array_length(array[1,2,3], 1) - 3 - - - array_length(array[]::int[], 1) - NULL - - - array_length(array['text'], 2) - NULL - - - - - - - array_lower - - array_lower ( anyarray, integer ) - integer - - - Returns the lower bound of the requested array dimension. - - - array_lower('[0:2]={1,2,3}'::integer[], 1) - 0 - - - - - - - array_ndims - - array_ndims ( anyarray ) - integer - - - Returns the number of dimensions of the array. - - - array_ndims(ARRAY[[1,2,3], [4,5,6]]) - 2 - - - - - - - array_position - - array_position ( anycompatiblearray, anycompatible , integer ) - integer - - - Returns the subscript of the first occurrence of the second argument - in the array, or NULL if it's not present. - If the third argument is given, the search begins at that subscript. - The array must be one-dimensional. - Comparisons are done using IS NOT DISTINCT FROM - semantics, so it is possible to search for NULL. - - - array_position(ARRAY['sun', 'mon', 'tue', 'wed', 'thu', 'fri', 'sat'], 'mon') - 2 - - - - - - - array_positions - - array_positions ( anycompatiblearray, anycompatible ) - integer[] - - - Returns an array of the subscripts of all occurrences of the second - argument in the array given as first argument. - The array must be one-dimensional. - Comparisons are done using IS NOT DISTINCT FROM - semantics, so it is possible to search for NULL. - NULL is returned only if the array - is NULL; if the value is not found in the array, an - empty array is returned. - - - array_positions(ARRAY['A','A','B','A'], 'A') - {1,2,4} - - - - - - - array_prepend - - array_prepend ( anycompatible, anycompatiblearray ) - anycompatiblearray - - - Prepends an element to the beginning of an array (same as - the anycompatible || anycompatiblearray - operator). - - - array_prepend(1, ARRAY[2,3]) - {1,2,3} - - - - - - - array_remove - - array_remove ( anycompatiblearray, anycompatible ) - anycompatiblearray - - - Removes all elements equal to the given value from the array. - The array must be one-dimensional. - Comparisons are done using IS NOT DISTINCT FROM - semantics, so it is possible to remove NULLs. - - - array_remove(ARRAY[1,2,3,2], 2) - {1,3} - - - - - - - array_replace - - array_replace ( anycompatiblearray, anycompatible, anycompatible ) - anycompatiblearray - - - Replaces each array element equal to the second argument with the - third argument. - - - array_replace(ARRAY[1,2,5,4], 5, 3) - {1,2,3,4} - - - - - - - array_reverse - - array_reverse ( anyarray ) - anyarray - - - Reverses the first dimension of the array. - - - array_reverse(ARRAY[[1,2],[3,4],[5,6]]) - {{5,6},{3,4},{1,2}} - - - - - - - array_sample - - array_sample ( array anyarray, n integer ) - anyarray - - - Returns an array of n items randomly selected - from array. n may not - exceed the length of array's first dimension. - If array is multi-dimensional, - an item is a slice having a given first subscript. - - - array_sample(ARRAY[1,2,3,4,5,6], 3) - {2,6,1} - - - array_sample(ARRAY[[1,2],[3,4],[5,6]], 2) - {{5,6},{1,2}} - - - - - - - array_shuffle - - array_shuffle ( anyarray ) - anyarray - - - Randomly shuffles the first dimension of the array. - - - array_shuffle(ARRAY[[1,2],[3,4],[5,6]]) - {{5,6},{1,2},{3,4}} - - - - - - - array_sort - - array_sort ( - array anyarray - , descending boolean - , nulls_first boolean - ) - anyarray - - - Sorts the first dimension of the array. - The sort order is determined by the default sort ordering of the - array's element type; however, if the element type is collatable, - the collation to use can be specified by adding - a COLLATE clause to - the array argument. - - - If descending is true then sort in - descending order, otherwise ascending order. If omitted, the - default is ascending order. - If nulls_first is true then nulls appear - before non-null values, otherwise nulls appear after non-null - values. - If omitted, nulls_first is taken to have - the same value as descending. - - - array_sort(ARRAY[[2,4],[2,1],[6,5]]) - {{2,1},{2,4},{6,5}} - - - - - - - array_to_string - - array_to_string ( array anyarray, delimiter text , null_string text ) - text - - - Converts each array element to its text representation, and - concatenates those separated by - the delimiter string. - If null_string is given and is - not NULL, then NULL array - entries are represented by that string; otherwise, they are omitted. - See also string_to_array. - - - array_to_string(ARRAY[1, 2, 3, NULL, 5], ',', '*') - 1,2,3,*,5 - - - - - - - array_upper - - array_upper ( anyarray, integer ) - integer - - - Returns the upper bound of the requested array dimension. - - - array_upper(ARRAY[1,8,3,7], 1) - 4 - - - - - - - cardinality - - cardinality ( anyarray ) - integer - - - Returns the total number of elements in the array, or 0 if the array - is empty. - - - cardinality(ARRAY[[1,2],[3,4]]) - 4 - - - - - - - trim_array - - trim_array ( array anyarray, n integer ) - anyarray - - - Trims an array by removing the last n elements. - If the array is multidimensional, only the first dimension is trimmed. - - - trim_array(ARRAY[1,2,3,4,5,6], 2) - {1,2,3,4} - - - - - - - unnest - - unnest ( anyarray ) - setof anyelement - - - Expands an array into a set of rows. - The array's elements are read out in storage order. - - - unnest(ARRAY[1,2]) - - - 1 - 2 - - - - unnest(ARRAY[['foo','bar'],['baz','quux']]) - - - foo - bar - baz - quux - - - - - - - unnest ( anyarray, anyarray , ... ) - setof anyelement, anyelement [, ... ] - - - Expands multiple arrays (possibly of different data types) into a set of - rows. If the arrays are not all the same length then the shorter ones - are padded with NULLs. This form is only allowed - in a query's FROM clause; see . - - - select * from unnest(ARRAY[1,2], ARRAY['foo','bar','baz']) as x(a,b) - - - a | b ----+----- - 1 | foo - 2 | bar - | baz - - - - - -
- - - See also about the aggregate - function array_agg for use with arrays. - -
- - - Range/Multirange Functions and Operators - - - See for an overview of range types. - - - - shows the specialized operators - available for range types. - shows the specialized operators - available for multirange types. - In addition to those, the usual comparison operators shown in - are available for range - and multirange types. The comparison operators order first by the range lower - bounds, and only if those are equal do they compare the upper bounds. The - multirange operators compare each range until one is unequal. This - does not usually result in a useful overall ordering, but the operators are - provided to allow unique indexes to be constructed on ranges. - - - - Range Operators - - - - - Operator - - - Description - - - Example(s) - - - - - - - - anyrange @> anyrange - boolean - - - Does the first range contain the second? - - - int4range(2,4) @> int4range(2,3) - t - - - - - - anyrange @> anyelement - boolean - - - Does the range contain the element? - - - '[2011-01-01,2011-03-01)'::tsrange @> '2011-01-10'::timestamp - t - - - - - - anyrange <@ anyrange - boolean - - - Is the first range contained by the second? - - - int4range(2,4) <@ int4range(1,7) - t - - - - - - anyelement <@ anyrange - boolean - - - Is the element contained in the range? - - - 42 <@ int4range(1,7) - f - - - - - - anyrange && anyrange - boolean - - - Do the ranges overlap, that is, have any elements in common? - - - int8range(3,7) && int8range(4,12) - t - - - - - - anyrange << anyrange - boolean - - - Is the first range strictly left of the second? - - - int8range(1,10) << int8range(100,110) - t - - - - - - anyrange >> anyrange - boolean - - - Is the first range strictly right of the second? - - - int8range(50,60) >> int8range(20,30) - t - - - - - - anyrange &< anyrange - boolean - - - Does the first range not extend to the right of the second? - - - int8range(1,20) &< int8range(18,20) - t - - - - - - anyrange &> anyrange - boolean - - - Does the first range not extend to the left of the second? - - - int8range(7,20) &> int8range(5,10) - t - - - - - - anyrange -|- anyrange - boolean - - - Are the ranges adjacent? - - - numrange(1.1,2.2) -|- numrange(2.2,3.3) - t - - - - - - anyrange + anyrange - anyrange - - - Computes the union of the ranges. The ranges must overlap or be - adjacent, so that the union is a single range (but - see range_merge()). - - - numrange(5,15) + numrange(10,20) - [5,20) - - - - - - anyrange * anyrange - anyrange - - - Computes the intersection of the ranges. - - - int8range(5,15) * int8range(10,20) - [10,15) - - - - - - anyrange - anyrange - anyrange - - - Computes the difference of the ranges. The second range must not be - contained in the first in such a way that the difference would not be - a single range. - - - int8range(5,15) - int8range(10,20) - [5,10) - - - - -
- - - Multirange Operators - - - - - Operator - - - Description - - - Example(s) - - - - - - - - anymultirange @> anymultirange - boolean - - - Does the first multirange contain the second? - - - '{[2,4)}'::int4multirange @> '{[2,3)}'::int4multirange - t - - - - - - anymultirange @> anyrange - boolean - - - Does the multirange contain the range? - - - '{[2,4)}'::int4multirange @> int4range(2,3) - t - - - - - - anymultirange @> anyelement - boolean - - - Does the multirange contain the element? - - - '{[2011-01-01,2011-03-01)}'::tsmultirange @> '2011-01-10'::timestamp - t - - - - - - anyrange @> anymultirange - boolean - - - Does the range contain the multirange? - - - '[2,4)'::int4range @> '{[2,3)}'::int4multirange - t - - - - - - anymultirange <@ anymultirange - boolean - - - Is the first multirange contained by the second? - - - '{[2,4)}'::int4multirange <@ '{[1,7)}'::int4multirange - t - - - - - - anymultirange <@ anyrange - boolean - - - Is the multirange contained by the range? - - - '{[2,4)}'::int4multirange <@ int4range(1,7) - t - - - - - - anyrange <@ anymultirange - boolean - - - Is the range contained by the multirange? - - - int4range(2,4) <@ '{[1,7)}'::int4multirange - t - - - - - - anyelement <@ anymultirange - boolean - - - Is the element contained by the multirange? - - - 4 <@ '{[1,7)}'::int4multirange - t - - - - - - anymultirange && anymultirange - boolean - - - Do the multiranges overlap, that is, have any elements in common? - - - '{[3,7)}'::int8multirange && '{[4,12)}'::int8multirange - t - - - - - - anymultirange && anyrange - boolean - - - Does the multirange overlap the range? - - - '{[3,7)}'::int8multirange && int8range(4,12) - t - - - - - - anyrange && anymultirange - boolean - - - Does the range overlap the multirange? - - - int8range(3,7) && '{[4,12)}'::int8multirange - t - - - - - - anymultirange << anymultirange - boolean - - - Is the first multirange strictly left of the second? - - - '{[1,10)}'::int8multirange << '{[100,110)}'::int8multirange - t - - - - - - anymultirange << anyrange - boolean - - - Is the multirange strictly left of the range? - - - '{[1,10)}'::int8multirange << int8range(100,110) - t - - - - - - anyrange << anymultirange - boolean - - - Is the range strictly left of the multirange? - - - int8range(1,10) << '{[100,110)}'::int8multirange - t - - - - - - anymultirange >> anymultirange - boolean - - - Is the first multirange strictly right of the second? - - - '{[50,60)}'::int8multirange >> '{[20,30)}'::int8multirange - t - - - - - - anymultirange >> anyrange - boolean - - - Is the multirange strictly right of the range? - - - '{[50,60)}'::int8multirange >> int8range(20,30) - t - - - - - - anyrange >> anymultirange - boolean - - - Is the range strictly right of the multirange? - - - int8range(50,60) >> '{[20,30)}'::int8multirange - t - - - - - - anymultirange &< anymultirange - boolean - - - Does the first multirange not extend to the right of the second? - - - '{[1,20)}'::int8multirange &< '{[18,20)}'::int8multirange - t - - - - - - anymultirange &< anyrange - boolean - - - Does the multirange not extend to the right of the range? - - - '{[1,20)}'::int8multirange &< int8range(18,20) - t - - - - - - anyrange &< anymultirange - boolean - - - Does the range not extend to the right of the multirange? - - - int8range(1,20) &< '{[18,20)}'::int8multirange - t - - - - - - anymultirange &> anymultirange - boolean - - - Does the first multirange not extend to the left of the second? - - - '{[7,20)}'::int8multirange &> '{[5,10)}'::int8multirange - t - - - - - - anymultirange &> anyrange - boolean - - - Does the multirange not extend to the left of the range? - - - '{[7,20)}'::int8multirange &> int8range(5,10) - t - - - - - - anyrange &> anymultirange - boolean - - - Does the range not extend to the left of the multirange? - - - int8range(7,20) &> '{[5,10)}'::int8multirange - t - - - - - - anymultirange -|- anymultirange - boolean - - - Are the multiranges adjacent? - - - '{[1.1,2.2)}'::nummultirange -|- '{[2.2,3.3)}'::nummultirange - t - - - - - - anymultirange -|- anyrange - boolean - - - Is the multirange adjacent to the range? - - - '{[1.1,2.2)}'::nummultirange -|- numrange(2.2,3.3) - t - - - - - - anyrange -|- anymultirange - boolean - - - Is the range adjacent to the multirange? - - - numrange(1.1,2.2) -|- '{[2.2,3.3)}'::nummultirange - t - - - - - - anymultirange + anymultirange - anymultirange - - - Computes the union of the multiranges. The multiranges need not overlap - or be adjacent. - - - '{[5,10)}'::nummultirange + '{[15,20)}'::nummultirange - {[5,10), [15,20)} - - - - - - anymultirange * anymultirange - anymultirange - - - Computes the intersection of the multiranges. - - - '{[5,15)}'::int8multirange * '{[10,20)}'::int8multirange - {[10,15)} - - - - - - anymultirange - anymultirange - anymultirange - - - Computes the difference of the multiranges. - - - '{[5,20)}'::int8multirange - '{[10,15)}'::int8multirange - {[5,10), [15,20)} - - - - -
- - - The left-of/right-of/adjacent operators always return false when an empty - range or multirange is involved; that is, an empty range is not considered to - be either before or after any other range. - - - - Elsewhere empty ranges and multiranges are treated as the additive identity: - anything unioned with an empty value is itself. Anything minus an empty - value is itself. An empty multirange has exactly the same points as an empty - range. Every range contains the empty range. Every multirange contains as many - empty ranges as you like. - - - - The range union and difference operators will fail if the resulting range would - need to contain two disjoint sub-ranges, as such a range cannot be - represented. There are separate operators for union and difference that take - multirange parameters and return a multirange, and they do not fail even if - their arguments are disjoint. So if you need a union or difference operation - for ranges that may be disjoint, you can avoid errors by first casting your - ranges to multiranges. - - - - shows the functions - available for use with range types. - shows the functions - available for use with multirange types. - - - - Range Functions - - - - - Function - - - Description - - - Example(s) - - - - - - - - - lower - - lower ( anyrange ) - anyelement - - - Extracts the lower bound of the range (NULL if the - range is empty or has no lower bound). - - - lower(numrange(1.1,2.2)) - 1.1 - - - - - - - upper - - upper ( anyrange ) - anyelement - - - Extracts the upper bound of the range (NULL if the - range is empty or has no upper bound). - - - upper(numrange(1.1,2.2)) - 2.2 - - - - - - - isempty - - isempty ( anyrange ) - boolean - - - Is the range empty? - - - isempty(numrange(1.1,2.2)) - f - - - - - - - lower_inc - - lower_inc ( anyrange ) - boolean - - - Is the range's lower bound inclusive? - - - lower_inc(numrange(1.1,2.2)) - t - - - - - - - upper_inc - - upper_inc ( anyrange ) - boolean - - - Is the range's upper bound inclusive? - - - upper_inc(numrange(1.1,2.2)) - f - - - - - - - lower_inf - - lower_inf ( anyrange ) - boolean - - - Does the range have no lower bound? (A lower bound of - -Infinity returns false.) - - - lower_inf('(,)'::daterange) - t - - - - - - - upper_inf - - upper_inf ( anyrange ) - boolean - - - Does the range have no upper bound? (An upper bound of - Infinity returns false.) - - - upper_inf('(,)'::daterange) - t - - - - - - - range_merge - - range_merge ( anyrange, anyrange ) - anyrange - - - Computes the smallest range that includes both of the given ranges. - - - range_merge('[1,2)'::int4range, '[3,4)'::int4range) - [1,4) - - - - -
- - - Multirange Functions - - - - - Function - - - Description - - - Example(s) - - - - - - - - lower - - lower ( anymultirange ) - anyelement - - - Extracts the lower bound of the multirange (NULL if the - multirange is empty or has no lower bound). - - - lower('{[1.1,2.2)}'::nummultirange) - 1.1 - - - - - - - upper - - upper ( anymultirange ) - anyelement - - - Extracts the upper bound of the multirange (NULL if the - multirange is empty or has no upper bound). - - - upper('{[1.1,2.2)}'::nummultirange) - 2.2 - - - - - - - isempty - - isempty ( anymultirange ) - boolean - - - Is the multirange empty? - - - isempty('{[1.1,2.2)}'::nummultirange) - f - - - - - - - lower_inc - - lower_inc ( anymultirange ) - boolean - - - Is the multirange's lower bound inclusive? - - - lower_inc('{[1.1,2.2)}'::nummultirange) - t - - - - - - - upper_inc - - upper_inc ( anymultirange ) - boolean - - - Is the multirange's upper bound inclusive? - - - upper_inc('{[1.1,2.2)}'::nummultirange) - f - - - - - - - lower_inf - - lower_inf ( anymultirange ) - boolean - - - Does the multirange have no lower bound? (A lower bound of - -Infinity returns false.) - - - lower_inf('{(,)}'::datemultirange) - t - - - - - - - upper_inf - - upper_inf ( anymultirange ) - boolean - - - Does the multirange have no upper bound? (An upper bound of - Infinity returns false.) - - - upper_inf('{(,)}'::datemultirange) - t - - - - - - - range_merge - - range_merge ( anymultirange ) - anyrange - - - Computes the smallest range that includes the entire multirange. - - - range_merge('{[1,2), [3,4)}'::int4multirange) - [1,4) - - - - - - - multirange (function) - - multirange ( anyrange ) - anymultirange - - - Returns a multirange containing just the given range. - - - multirange('[1,2)'::int4range) - {[1,2)} - - - - - - - unnest - for multirange - - unnest ( anymultirange ) - setof anyrange - - - Expands a multirange into a set of ranges in ascending order. - - - unnest('{[1,2), [3,4)}'::int4multirange) - - - [1,2) - [3,4) - - - - - -
- - - The lower_inc, upper_inc, - lower_inf, and upper_inf - functions all return false for an empty range or multirange. - -
- - - Aggregate Functions - - - aggregate function - built-in - - - - Aggregate functions compute a single result - from a set of input values. The built-in general-purpose aggregate - functions are listed in - while statistical aggregates are in . - The built-in within-group ordered-set aggregate functions - are listed in - while the built-in within-group hypothetical-set ones are in . Grouping operations, - which are closely related to aggregate functions, are listed in - . - The special syntax considerations for aggregate - functions are explained in . - Consult for additional introductory - information. - - - - Aggregate functions that support Partial Mode - are eligible to participate in various optimizations, such as parallel - aggregation. - - - - While all aggregates below accept an optional - ORDER BY clause (as outlined in ), the clause has only been added to - aggregates whose output is affected by ordering. - - - - General-Purpose Aggregate Functions - - - - - - - Function - - - Description - - Partial Mode - - - - - - - - any_value - - any_value ( anyelement ) - same as input type - - - Returns an arbitrary value from the non-null input values. - - Yes - - - - - - array_agg - - array_agg ( anynonarray ORDER BY input_sort_columns ) - anyarray - - - Collects all the input values, including nulls, into an array. - - Yes - - - - - array_agg ( anyarray ORDER BY input_sort_columns ) - anyarray - - - Concatenates all the input arrays into an array of one higher - dimension. (The inputs must all have the same dimensionality, and - cannot be empty or null.) - - Yes - - - - - - average - - - avg - - avg ( smallint ) - numeric - - - avg ( integer ) - numeric - - - avg ( bigint ) - numeric - - - avg ( numeric ) - numeric - - - avg ( real ) - double precision - - - avg ( double precision ) - double precision - - - avg ( interval ) - interval - - - Computes the average (arithmetic mean) of all the non-null input - values. - - Yes - - - - - - bit_and - - bit_and ( smallint ) - smallint - - - bit_and ( integer ) - integer - - - bit_and ( bigint ) - bigint - - - bit_and ( bit ) - bit - - - Computes the bitwise AND of all non-null input values. - - Yes - - - - - - bit_or - - bit_or ( smallint ) - smallint - - - bit_or ( integer ) - integer - - - bit_or ( bigint ) - bigint - - - bit_or ( bit ) - bit - - - Computes the bitwise OR of all non-null input values. - - Yes - - - - - - bit_xor - - bit_xor ( smallint ) - smallint - - - bit_xor ( integer ) - integer - - - bit_xor ( bigint ) - bigint - - - bit_xor ( bit ) - bit - - - Computes the bitwise exclusive OR of all non-null input values. - Can be useful as a checksum for an unordered set of values. - - Yes - - - - - - bool_and - - bool_and ( boolean ) - boolean - - - Returns true if all non-null input values are true, otherwise false. - - Yes - - - - - - bool_or - - bool_or ( boolean ) - boolean - - - Returns true if any non-null input value is true, otherwise false. - - Yes - - - - - - count - - count ( * ) - bigint - - - Computes the number of input rows. - - Yes - - - - - count ( "any" ) - bigint - - - Computes the number of input rows in which the input value is not - null. - - Yes - - - - - - every - - every ( boolean ) - boolean - - - This is the SQL standard's equivalent to bool_and. - - Yes - - - - - - json_agg - - json_agg ( anyelement ORDER BY input_sort_columns ) - json - - - - jsonb_agg - - jsonb_agg ( anyelement ORDER BY input_sort_columns ) - jsonb - - - Collects all the input values, including nulls, into a JSON array. - Values are converted to JSON as per to_json - or to_jsonb. - - No - - - - - - json_agg_strict - - json_agg_strict ( anyelement ) - json - - - - jsonb_agg_strict - - jsonb_agg_strict ( anyelement ) - jsonb - - - Collects all the input values, skipping nulls, into a JSON array. - Values are converted to JSON as per to_json - or to_jsonb. - - No - - - - - json_arrayagg - json_arrayagg ( - value_expression - ORDER BY sort_expression - { NULL | ABSENT } ON NULL - RETURNING data_type FORMAT JSON ENCODING UTF8 ) - - - Behaves in the same way as json_array - but as an aggregate function so it only takes one - value_expression parameter. - If ABSENT ON NULL is specified, any NULL - values are omitted. - If ORDER BY is specified, the elements will - appear in the array in that order rather than in the input order. - - - SELECT json_arrayagg(v) FROM (VALUES(2),(1)) t(v) - [2, 1] - - No - - - - - json_objectagg - json_objectagg ( - { key_expression { VALUE | ':' } value_expression } - { NULL | ABSENT } ON NULL - { WITH | WITHOUT } UNIQUE KEYS - RETURNING data_type FORMAT JSON ENCODING UTF8 ) - - - Behaves like json_object, but as an - aggregate function, so it only takes one - key_expression and one - value_expression parameter. - - - SELECT json_objectagg(k:v) FROM (VALUES ('a'::text,current_date),('b',current_date + 1)) AS t(k,v) - { "a" : "2022-05-10", "b" : "2022-05-11" } - - No - - - - - - json_object_agg - - json_object_agg ( key - "any", value - "any" - ORDER BY input_sort_columns ) - json - - - - jsonb_object_agg - - jsonb_object_agg ( key - "any", value - "any" - ORDER BY input_sort_columns ) - jsonb - - - Collects all the key/value pairs into a JSON object. Key arguments - are coerced to text; value arguments are converted as per - to_json or to_jsonb. - Values can be null, but keys cannot. - - No - - - - - - json_object_agg_strict - - json_object_agg_strict ( - key "any", - value "any" ) - json - - - - jsonb_object_agg_strict - - jsonb_object_agg_strict ( - key "any", - value "any" ) - jsonb - - - Collects all the key/value pairs into a JSON object. Key arguments - are coerced to text; value arguments are converted as per - to_json or to_jsonb. - The key can not be null. If the - value is null then the entry is skipped, - - No - - - - - - json_object_agg_unique - - json_object_agg_unique ( - key "any", - value "any" ) - json - - - - jsonb_object_agg_unique - - jsonb_object_agg_unique ( - key "any", - value "any" ) - jsonb - - - Collects all the key/value pairs into a JSON object. Key arguments - are coerced to text; value arguments are converted as per - to_json or to_jsonb. - Values can be null, but keys cannot. - If there is a duplicate key an error is thrown. - - No - - - - - - json_object_agg_unique_strict - - json_object_agg_unique_strict ( - key "any", - value "any" ) - json - - - - jsonb_object_agg_unique_strict - - jsonb_object_agg_unique_strict ( - key "any", - value "any" ) - jsonb - - - Collects all the key/value pairs into a JSON object. Key arguments - are coerced to text; value arguments are converted as per - to_json or to_jsonb. - The key can not be null. If the - value is null then the entry is skipped. - If there is a duplicate key an error is thrown. - - No - - - - - - max - - max ( see text ) - same as input type - - - Computes the maximum of the non-null input - values. Available for any numeric, string, date/time, or enum type, - as well as bytea, inet, interval, - money, oid, pg_lsn, - tid, xid8, - and also arrays and composite types containing sortable data types. - - Yes - - - - - - min - - min ( see text ) - same as input type - - - Computes the minimum of the non-null input - values. Available for any numeric, string, date/time, or enum type, - as well as bytea, inet, interval, - money, oid, pg_lsn, - tid, xid8, - and also arrays and composite types containing sortable data types. - - Yes - - - - - - range_agg - - range_agg ( value - anyrange ) - anymultirange - - - range_agg ( value - anymultirange ) - anymultirange - - - Computes the union of the non-null input values. - - No - - - - - - range_intersect_agg - - range_intersect_agg ( value - anyrange ) - anyrange - - - range_intersect_agg ( value - anymultirange ) - anymultirange - - - Computes the intersection of the non-null input values. - - No - - - - - - string_agg - - string_agg ( value - text, delimiter text ) - text - - - string_agg ( value - bytea, delimiter bytea - ORDER BY input_sort_columns ) - bytea - - - Concatenates the non-null input values into a string. Each value - after the first is preceded by the - corresponding delimiter (if it's not null). - - Yes - - - - - - sum - - sum ( smallint ) - bigint - - - sum ( integer ) - bigint - - - sum ( bigint ) - numeric - - - sum ( numeric ) - numeric - - - sum ( real ) - real - - - sum ( double precision ) - double precision - - - sum ( interval ) - interval - - - sum ( money ) - money - - - Computes the sum of the non-null input values. - - Yes - - - - - - xmlagg - - xmlagg ( xml ORDER BY input_sort_columns ) - xml - - - Concatenates the non-null XML input values (see - ). - - No - - - -
- - - It should be noted that except for count, - these functions return a null value when no rows are selected. In - particular, sum of no rows returns null, not - zero as one might expect, and array_agg - returns null rather than an empty array when there are no input - rows. The coalesce function can be used to - substitute zero or an empty array for null when necessary. - - - - The aggregate functions array_agg, - json_agg, jsonb_agg, - json_agg_strict, jsonb_agg_strict, - json_object_agg, jsonb_object_agg, - json_object_agg_strict, jsonb_object_agg_strict, - json_object_agg_unique, jsonb_object_agg_unique, - json_object_agg_unique_strict, - jsonb_object_agg_unique_strict, - string_agg, - and xmlagg, as well as similar user-defined - aggregate functions, produce meaningfully different result values - depending on the order of the input values. This ordering is - unspecified by default, but can be controlled by writing an - ORDER BY clause within the aggregate call, as shown in - . - Alternatively, supplying the input values from a sorted subquery - will usually work. For example: - - - - Beware that this approach can fail if the outer query level contains - additional processing, such as a join, because that might cause the - subquery's output to be reordered before the aggregate is computed. - - - - - ANY - - - SOME - - - The boolean aggregates bool_and and - bool_or correspond to the standard SQL aggregates - every and any or - some. - PostgreSQL - supports every, but not any - or some, because there is an ambiguity built into - the standard syntax: - -SELECT b1 = ANY((SELECT b2 FROM t2 ...)) FROM t1 ...; - - Here ANY can be considered either as introducing - a subquery, or as being an aggregate function, if the subquery - returns one row with a Boolean value. - Thus the standard name cannot be given to these aggregates. - - - - - - Users accustomed to working with other SQL database management - systems might be disappointed by the performance of the - count aggregate when it is applied to the - entire table. A query like: - -SELECT count(*) FROM sometable; - - will require effort proportional to the size of the table: - PostgreSQL will need to scan either the - entire table or the entirety of an index that includes all rows in - the table. - - - - - shows - aggregate functions typically used in statistical analysis. - (These are separated out merely to avoid cluttering the listing - of more-commonly-used aggregates.) Functions shown as - accepting numeric_type are available for all - the types smallint, integer, - bigint, numeric, real, - and double precision. - Where the description mentions - N, it means the - number of input rows for which all the input expressions are non-null. - In all cases, null is returned if the computation is meaningless, - for example when N is zero. - - - - statistics - - - linear regression - - - - Aggregate Functions for Statistics - - - - - - - Function - - - Description - - Partial Mode - - - - - - - - correlation - - - corr - - corr ( Y double precision, X double precision ) - double precision - - - Computes the correlation coefficient. - - Yes - - - - - - covariance - population - - - covar_pop - - covar_pop ( Y double precision, X double precision ) - double precision - - - Computes the population covariance. - - Yes - - - - - - covariance - sample - - - covar_samp - - covar_samp ( Y double precision, X double precision ) - double precision - - - Computes the sample covariance. - - Yes - - - - - - regr_avgx - - regr_avgx ( Y double precision, X double precision ) - double precision - - - Computes the average of the independent variable, - sum(X)/N. - - Yes - - - - - - regr_avgy - - regr_avgy ( Y double precision, X double precision ) - double precision - - - Computes the average of the dependent variable, - sum(Y)/N. - - Yes - - - - - - regr_count - - regr_count ( Y double precision, X double precision ) - bigint - - - Computes the number of rows in which both inputs are non-null. - - Yes - - - - - - regression intercept - - - regr_intercept - - regr_intercept ( Y double precision, X double precision ) - double precision - - - Computes the y-intercept of the least-squares-fit linear equation - determined by the - (X, Y) pairs. - - Yes - - - - - - regr_r2 - - regr_r2 ( Y double precision, X double precision ) - double precision - - - Computes the square of the correlation coefficient. - - Yes - - - - - - regression slope - - - regr_slope - - regr_slope ( Y double precision, X double precision ) - double precision - - - Computes the slope of the least-squares-fit linear equation determined - by the (X, Y) - pairs. - - Yes - - - - - - regr_sxx - - regr_sxx ( Y double precision, X double precision ) - double precision - - - Computes the sum of squares of the independent - variable, - sum(X^2) - sum(X)^2/N. - - Yes - - - - - - regr_sxy - - regr_sxy ( Y double precision, X double precision ) - double precision - - - Computes the sum of products of independent times - dependent variables, - sum(X*Y) - sum(X) * sum(Y)/N. - - Yes - - - - - - regr_syy - - regr_syy ( Y double precision, X double precision ) - double precision - - - Computes the sum of squares of the dependent - variable, - sum(Y^2) - sum(Y)^2/N. - - Yes - - - - - - standard deviation - - - stddev - - stddev ( numeric_type ) - double precision - for real or double precision, - otherwise numeric - - - This is a historical alias for stddev_samp. - - Yes - - - - - - standard deviation - population - - - stddev_pop - - stddev_pop ( numeric_type ) - double precision - for real or double precision, - otherwise numeric - - - Computes the population standard deviation of the input values. - - Yes - - - - - - standard deviation - sample - - - stddev_samp - - stddev_samp ( numeric_type ) - double precision - for real or double precision, - otherwise numeric - - - Computes the sample standard deviation of the input values. - - Yes - - - - - - variance - - variance ( numeric_type ) - double precision - for real or double precision, - otherwise numeric - - - This is a historical alias for var_samp. - - Yes - - - - - - variance - population - - - var_pop - - var_pop ( numeric_type ) - double precision - for real or double precision, - otherwise numeric - - - Computes the population variance of the input values (square of the - population standard deviation). - - Yes - - - - - - variance - sample - - - var_samp - - var_samp ( numeric_type ) - double precision - for real or double precision, - otherwise numeric - - - Computes the sample variance of the input values (square of the sample - standard deviation). - - Yes - - - -
- - - shows some - aggregate functions that use the ordered-set aggregate - syntax. These functions are sometimes referred to as inverse - distribution functions. Their aggregated input is introduced by - ORDER BY, and they may also take a direct - argument that is not aggregated, but is computed only once. - All these functions ignore null values in their aggregated input. - For those that take a fraction parameter, the - fraction value must be between 0 and 1; an error is thrown if not. - However, a null fraction value simply produces a - null result. - - - - ordered-set aggregate - built-in - - - inverse distribution - - - - Ordered-Set Aggregate Functions - - - - - - - Function - - - Description - - Partial Mode - - - - - - - - mode - statistical - - mode () WITHIN GROUP ( ORDER BY anyelement ) - anyelement - - - Computes the mode, the most frequent - value of the aggregated argument (arbitrarily choosing the first one - if there are multiple equally-frequent values). The aggregated - argument must be of a sortable type. - - No - - - - - - percentile - continuous - - percentile_cont ( fraction double precision ) WITHIN GROUP ( ORDER BY double precision ) - double precision - - - percentile_cont ( fraction double precision ) WITHIN GROUP ( ORDER BY interval ) - interval - - - Computes the continuous percentile, a value - corresponding to the specified fraction - within the ordered set of aggregated argument values. This will - interpolate between adjacent input items if needed. - - No - - - - - percentile_cont ( fractions double precision[] ) WITHIN GROUP ( ORDER BY double precision ) - double precision[] - - - percentile_cont ( fractions double precision[] ) WITHIN GROUP ( ORDER BY interval ) - interval[] - - - Computes multiple continuous percentiles. The result is an array of - the same dimensions as the fractions - parameter, with each non-null element replaced by the (possibly - interpolated) value corresponding to that percentile. - - No - - - - - - percentile - discrete - - percentile_disc ( fraction double precision ) WITHIN GROUP ( ORDER BY anyelement ) - anyelement - - - Computes the discrete percentile, the first - value within the ordered set of aggregated argument values whose - position in the ordering equals or exceeds the - specified fraction. The aggregated - argument must be of a sortable type. - - No - - - - - percentile_disc ( fractions double precision[] ) WITHIN GROUP ( ORDER BY anyelement ) - anyarray - - - Computes multiple discrete percentiles. The result is an array of the - same dimensions as the fractions parameter, - with each non-null element replaced by the input value corresponding - to that percentile. - The aggregated argument must be of a sortable type. - - No - - - -
- - - hypothetical-set aggregate - built-in - - - - Each of the hypothetical-set aggregates listed in - is associated with a - window function of the same name defined in - . In each case, the aggregate's result - is the value that the associated window function would have - returned for the hypothetical row constructed from - args, if such a row had been added to the sorted - group of rows represented by the sorted_args. - For each of these functions, the list of direct arguments - given in args must match the number and types of - the aggregated arguments given in sorted_args. - Unlike most built-in aggregates, these aggregates are not strict, that is - they do not drop input rows containing nulls. Null values sort according - to the rule specified in the ORDER BY clause. - - - - Hypothetical-Set Aggregate Functions - - - - - - - Function - - - Description - - Partial Mode - - - - - - - - rank - hypothetical - - rank ( args ) WITHIN GROUP ( ORDER BY sorted_args ) - bigint - - - Computes the rank of the hypothetical row, with gaps; that is, the row - number of the first row in its peer group. - - No - - - - - - dense_rank - hypothetical - - dense_rank ( args ) WITHIN GROUP ( ORDER BY sorted_args ) - bigint - - - Computes the rank of the hypothetical row, without gaps; this function - effectively counts peer groups. - - No - - - - - - percent_rank - hypothetical - - percent_rank ( args ) WITHIN GROUP ( ORDER BY sorted_args ) - double precision - - - Computes the relative rank of the hypothetical row, that is - (rank - 1) / (total rows - 1). - The value thus ranges from 0 to 1 inclusive. - - No - - - - - - cume_dist - hypothetical - - cume_dist ( args ) WITHIN GROUP ( ORDER BY sorted_args ) - double precision - - - Computes the cumulative distribution, that is (number of rows - preceding or peers with hypothetical row) / (total rows). The value - thus ranges from 1/N to 1. - - No - - - -
- - - Grouping Operations - - - - - Function - - - Description - - - - - - - - - GROUPING - - GROUPING ( group_by_expression(s) ) - integer - - - Returns a bit mask indicating which GROUP BY - expressions are not included in the current grouping set. - Bits are assigned with the rightmost argument corresponding to the - least-significant bit; each bit is 0 if the corresponding expression - is included in the grouping criteria of the grouping set generating - the current result row, and 1 if it is not included. - - - - -
- - - The grouping operations shown in - are used in conjunction with - grouping sets (see ) to distinguish - result rows. The arguments to the GROUPING function - are not actually evaluated, but they must exactly match expressions given - in the GROUP BY clause of the associated query level. - For example: - -=> SELECT * FROM items_sold; - make | model | sales --------+-------+------- - Foo | GT | 10 - Foo | Tour | 20 - Bar | City | 15 - Bar | Sport | 5 -(4 rows) - -=> SELECT make, model, GROUPING(make,model), sum(sales) FROM items_sold GROUP BY ROLLUP(make,model); - make | model | grouping | sum --------+-------+----------+----- - Foo | GT | 0 | 10 - Foo | Tour | 0 | 20 - Bar | City | 0 | 15 - Bar | Sport | 0 | 5 - Foo | | 1 | 30 - Bar | | 1 | 20 - | | 3 | 50 -(7 rows) - - Here, the grouping value 0 in the - first four rows shows that those have been grouped normally, over both the - grouping columns. The value 1 indicates - that model was not grouped by in the next-to-last two - rows, and the value 3 indicates that - neither make nor model was grouped - by in the last row (which therefore is an aggregate over all the input - rows). - - -
- - - Window Functions - - - window function - built-in - - - - Window functions provide the ability to perform - calculations across sets of rows that are related to the current query - row. See for an introduction to this - feature, and for syntax - details. - - - - The built-in window functions are listed in - . Note that these functions - must be invoked using window function syntax, i.e., an - OVER clause is required. - - - - In addition to these functions, any built-in or user-defined - ordinary aggregate (i.e., not ordered-set or hypothetical-set aggregates) - can be used as a window function; see - for a list of the built-in aggregates. - Aggregate functions act as window functions only when an OVER - clause follows the call; otherwise they act as plain aggregates - and return a single row for the entire set. - - - - General-Purpose Window Functions - - - - - Function - - - Description - - - - - - - - - row_number - - row_number () - bigint - - - Returns the number of the current row within its partition, counting - from 1. - - - - - - - rank - - rank () - bigint - - - Returns the rank of the current row, with gaps; that is, - the row_number of the first row in its peer - group. - - - - - - - dense_rank - - dense_rank () - bigint - - - Returns the rank of the current row, without gaps; this function - effectively counts peer groups. - - - - - - - percent_rank - - percent_rank () - double precision - - - Returns the relative rank of the current row, that is - (rank - 1) / (total partition rows - 1). - The value thus ranges from 0 to 1 inclusive. - - - - - - - cume_dist - - cume_dist () - double precision - - - Returns the cumulative distribution, that is (number of partition rows - preceding or peers with current row) / (total partition rows). - The value thus ranges from 1/N to 1. - - - - - - - ntile - - ntile ( num_buckets integer ) - integer - - - Returns an integer ranging from 1 to the argument value, dividing the - partition as equally as possible. - - - - - - - lag - - lag ( value anycompatible - , offset integer - , default anycompatible ) - anycompatible - - - Returns value evaluated at - the row that is offset - rows before the current row within the partition; if there is no such - row, instead returns default - (which must be of a type compatible with - value). - Both offset and - default are evaluated - with respect to the current row. If omitted, - offset defaults to 1 and - default to NULL. - - - - - - - lead - - lead ( value anycompatible - , offset integer - , default anycompatible ) - anycompatible - - - Returns value evaluated at - the row that is offset - rows after the current row within the partition; if there is no such - row, instead returns default - (which must be of a type compatible with - value). - Both offset and - default are evaluated - with respect to the current row. If omitted, - offset defaults to 1 and - default to NULL. - - - - - - - first_value - - first_value ( value anyelement ) - anyelement - - - Returns value evaluated - at the row that is the first row of the window frame. - - - - - - - last_value - - last_value ( value anyelement ) - anyelement - - - Returns value evaluated - at the row that is the last row of the window frame. - - - - - - - nth_value - - nth_value ( value anyelement, n integer ) - anyelement - - - Returns value evaluated - at the row that is the n'th - row of the window frame (counting from 1); - returns NULL if there is no such row. - - - - -
- - - All of the functions listed in - depend on the sort ordering - specified by the ORDER BY clause of the associated window - definition. Rows that are not distinct when considering only the - ORDER BY columns are said to be peers. - The four ranking functions (including cume_dist) are - defined so that they give the same answer for all rows of a peer group. - - - - Note that first_value, last_value, and - nth_value consider only the rows within the window - frame, which by default contains the rows from the start of the - partition through the last peer of the current row. This is - likely to give unhelpful results for last_value and - sometimes also nth_value. You can redefine the frame by - adding a suitable frame specification (RANGE, - ROWS or GROUPS) to - the OVER clause. - See for more information - about frame specifications. - - - - When an aggregate function is used as a window function, it aggregates - over the rows within the current row's window frame. - An aggregate used with ORDER BY and the default window frame - definition produces a running sum type of behavior, which may or - may not be what's wanted. To obtain - aggregation over the whole partition, omit ORDER BY or use - ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING. - Other frame specifications can be used to obtain other effects. - - - - - The SQL standard defines a RESPECT NULLS or - IGNORE NULLS option for lead, lag, - first_value, last_value, and - nth_value. This is not implemented in - PostgreSQL: the behavior is always the - same as the standard's default, namely RESPECT NULLS. - Likewise, the standard's FROM FIRST or FROM LAST - option for nth_value is not implemented: only the - default FROM FIRST behavior is supported. (You can achieve - the result of FROM LAST by reversing the ORDER BY - ordering.) - - - -
- - - Merge Support Functions - - - MERGE - RETURNING - - - - PostgreSQL includes one merge support function - that may be used in the RETURNING list of a - command to identify the action taken for each - row; see . - - - - Merge Support Functions - - - - - - Function - - - Description - - - - - - - - - merge_action - - merge_action ( ) - text - - - Returns the merge action command executed for the current row. This - will be 'INSERT', 'UPDATE', or - 'DELETE'. - - - - -
- - - Example: - 0 THEN - UPDATE SET in_stock = true, quantity = s.quantity - WHEN MATCHED THEN - UPDATE SET in_stock = false, quantity = 0 - WHEN NOT MATCHED THEN - INSERT (product_id, in_stock, quantity) - VALUES (s.product_id, true, s.quantity) - RETURNING merge_action(), p.*; - - merge_action | product_id | in_stock | quantity ---------------+------------+----------+---------- - UPDATE | 1001 | t | 50 - UPDATE | 1002 | f | 0 - INSERT | 1003 | t | 10 -]]> - - - - Note that this function can only be used in the RETURNING - list of a MERGE command. It is an error to use it in any - other part of a query. - - -
- - - Subquery Expressions - - - EXISTS - - - - IN - - - - NOT IN - - - - ANY - - - - ALL - - - - SOME - - - - subquery - - - - This section describes the SQL-compliant subquery - expressions available in PostgreSQL. - All of the expression forms documented in this section return - Boolean (true/false) results. - - - - <literal>EXISTS</literal> - - -EXISTS (subquery) - - - - The argument of EXISTS is an arbitrary SELECT statement, - or subquery. The - subquery is evaluated to determine whether it returns any rows. - If it returns at least one row, the result of EXISTS is - true; if the subquery returns no rows, the result of EXISTS - is false. - - - - The subquery can refer to variables from the surrounding query, - which will act as constants during any one evaluation of the subquery. - - - - The subquery will generally only be executed long enough to determine - whether at least one row is returned, not all the way to completion. - It is unwise to write a subquery that has side effects (such as - calling sequence functions); whether the side effects occur - might be unpredictable. - - - - Since the result depends only on whether any rows are returned, - and not on the contents of those rows, the output list of the - subquery is normally unimportant. A common coding convention is - to write all EXISTS tests in the form - EXISTS(SELECT 1 WHERE ...). There are exceptions to - this rule however, such as subqueries that use INTERSECT. - - - - This simple example is like an inner join on col2, but - it produces at most one output row for each tab1 row, - even if there are several matching tab2 rows: - -SELECT col1 -FROM tab1 -WHERE EXISTS (SELECT 1 FROM tab2 WHERE col2 = tab1.col2); - - - - - - <literal>IN</literal> - - -expression IN (subquery) - - - - The right-hand side is a parenthesized - subquery, which must return exactly one column. The left-hand expression - is evaluated and compared to each row of the subquery result. - The result of IN is true if any equal subquery row is found. - The result is false if no equal row is found (including the - case where the subquery returns no rows). - - - - Note that if the left-hand expression yields null, or if there are - no equal right-hand values and at least one right-hand row yields - null, the result of the IN construct will be null, not false. - This is in accordance with SQL's normal rules for Boolean combinations - of null values. - - - - As with EXISTS, it's unwise to assume that the subquery will - be evaluated completely. - - - -row_constructor IN (subquery) - - - - The left-hand side of this form of IN is a row constructor, - as described in . - The right-hand side is a parenthesized - subquery, which must return exactly as many columns as there are - expressions in the left-hand row. The left-hand expressions are - evaluated and compared row-wise to each row of the subquery result. - The result of IN is true if any equal subquery row is found. - The result is false if no equal row is found (including the - case where the subquery returns no rows). - - - - As usual, null values in the rows are combined per - the normal rules of SQL Boolean expressions. Two rows are considered - equal if all their corresponding members are non-null and equal; the rows - are unequal if any corresponding members are non-null and unequal; - otherwise the result of that row comparison is unknown (null). - If all the per-row results are either unequal or null, with at least one - null, then the result of IN is null. - - - - - <literal>NOT IN</literal> - - -expression NOT IN (subquery) - - - - The right-hand side is a parenthesized - subquery, which must return exactly one column. The left-hand expression - is evaluated and compared to each row of the subquery result. - The result of NOT IN is true if only unequal subquery rows - are found (including the case where the subquery returns no rows). - The result is false if any equal row is found. - - - - Note that if the left-hand expression yields null, or if there are - no equal right-hand values and at least one right-hand row yields - null, the result of the NOT IN construct will be null, not true. - This is in accordance with SQL's normal rules for Boolean combinations - of null values. - - - - As with EXISTS, it's unwise to assume that the subquery will - be evaluated completely. - - - -row_constructor NOT IN (subquery) - - - - The left-hand side of this form of NOT IN is a row constructor, - as described in . - The right-hand side is a parenthesized - subquery, which must return exactly as many columns as there are - expressions in the left-hand row. The left-hand expressions are - evaluated and compared row-wise to each row of the subquery result. - The result of NOT IN is true if only unequal subquery rows - are found (including the case where the subquery returns no rows). - The result is false if any equal row is found. - - - - As usual, null values in the rows are combined per - the normal rules of SQL Boolean expressions. Two rows are considered - equal if all their corresponding members are non-null and equal; the rows - are unequal if any corresponding members are non-null and unequal; - otherwise the result of that row comparison is unknown (null). - If all the per-row results are either unequal or null, with at least one - null, then the result of NOT IN is null. - - - - - <literal>ANY</literal>/<literal>SOME</literal> - - -expression operator ANY (subquery) -expression operator SOME (subquery) - - - - The right-hand side is a parenthesized - subquery, which must return exactly one column. The left-hand expression - is evaluated and compared to each row of the subquery result using the - given operator, which must yield a Boolean - result. - The result of ANY is true if any true result is obtained. - The result is false if no true result is found (including the - case where the subquery returns no rows). - - - - SOME is a synonym for ANY. - IN is equivalent to = ANY. - - - - Note that if there are no successes and at least one right-hand row yields - null for the operator's result, the result of the ANY construct - will be null, not false. - This is in accordance with SQL's normal rules for Boolean combinations - of null values. - - - - As with EXISTS, it's unwise to assume that the subquery will - be evaluated completely. - - - -row_constructor operator ANY (subquery) -row_constructor operator SOME (subquery) - - - - The left-hand side of this form of ANY is a row constructor, - as described in . - The right-hand side is a parenthesized - subquery, which must return exactly as many columns as there are - expressions in the left-hand row. The left-hand expressions are - evaluated and compared row-wise to each row of the subquery result, - using the given operator. - The result of ANY is true if the comparison - returns true for any subquery row. - The result is false if the comparison returns false for every - subquery row (including the case where the subquery returns no - rows). - The result is NULL if no comparison with a subquery row returns true, - and at least one comparison returns NULL. - - - - See for details about the meaning - of a row constructor comparison. - - - - - <literal>ALL</literal> - - -expression operator ALL (subquery) - - - - The right-hand side is a parenthesized - subquery, which must return exactly one column. The left-hand expression - is evaluated and compared to each row of the subquery result using the - given operator, which must yield a Boolean - result. - The result of ALL is true if all rows yield true - (including the case where the subquery returns no rows). - The result is false if any false result is found. - The result is NULL if no comparison with a subquery row returns false, - and at least one comparison returns NULL. - - - - NOT IN is equivalent to <> ALL. - - - - As with EXISTS, it's unwise to assume that the subquery will - be evaluated completely. - - - -row_constructor operator ALL (subquery) - - - - The left-hand side of this form of ALL is a row constructor, - as described in . - The right-hand side is a parenthesized - subquery, which must return exactly as many columns as there are - expressions in the left-hand row. The left-hand expressions are - evaluated and compared row-wise to each row of the subquery result, - using the given operator. - The result of ALL is true if the comparison - returns true for all subquery rows (including the - case where the subquery returns no rows). - The result is false if the comparison returns false for any - subquery row. - The result is NULL if no comparison with a subquery row returns false, - and at least one comparison returns NULL. - - - - See for details about the meaning - of a row constructor comparison. - - - - - Single-Row Comparison - - - comparison - subquery result row - - - -row_constructor operator (subquery) - - - - The left-hand side is a row constructor, - as described in . - The right-hand side is a parenthesized subquery, which must return exactly - as many columns as there are expressions in the left-hand row. Furthermore, - the subquery cannot return more than one row. (If it returns zero rows, - the result is taken to be null.) The left-hand side is evaluated and - compared row-wise to the single subquery result row. - - - - See for details about the meaning - of a row constructor comparison. - - - - - - - Row and Array Comparisons - - - IN - - - - NOT IN - - - - ANY - - - - ALL - - - - SOME - - - - composite type - comparison - - - - row-wise comparison - - - - comparison - composite type - - - - comparison - row constructor - - - - IS DISTINCT FROM - - - - IS NOT DISTINCT FROM - - - - This section describes several specialized constructs for making - multiple comparisons between groups of values. These forms are - syntactically related to the subquery forms of the previous section, - but do not involve subqueries. - The forms involving array subexpressions are - PostgreSQL extensions; the rest are - SQL-compliant. - All of the expression forms documented in this section return - Boolean (true/false) results. - - - - <literal>IN</literal> - - -expression IN (value , ...) - - - - The right-hand side is a parenthesized list - of expressions. The result is true if the left-hand expression's - result is equal to any of the right-hand expressions. This is a shorthand - notation for - - -expression = value1 -OR -expression = value2 -OR -... - - - - - Note that if the left-hand expression yields null, or if there are - no equal right-hand values and at least one right-hand expression yields - null, the result of the IN construct will be null, not false. - This is in accordance with SQL's normal rules for Boolean combinations - of null values. - - - - - <literal>NOT IN</literal> - - -expression NOT IN (value , ...) - - - - The right-hand side is a parenthesized list - of expressions. The result is true if the left-hand expression's - result is unequal to all of the right-hand expressions. This is a shorthand - notation for - - -expression <> value1 -AND -expression <> value2 -AND -... - - - - - Note that if the left-hand expression yields null, or if there are - no equal right-hand values and at least one right-hand expression yields - null, the result of the NOT IN construct will be null, not true - as one might naively expect. - This is in accordance with SQL's normal rules for Boolean combinations - of null values. - - - - - x NOT IN y is equivalent to NOT (x IN y) in all - cases. However, null values are much more likely to trip up the novice when - working with NOT IN than when working with IN. - It is best to express your condition positively if possible. - - - - - - <literal>ANY</literal>/<literal>SOME</literal> (array) - - -expression operator ANY (array expression) -expression operator SOME (array expression) - - - - The right-hand side is a parenthesized expression, which must yield an - array value. - The left-hand expression - is evaluated and compared to each element of the array using the - given operator, which must yield a Boolean - result. - The result of ANY is true if any true result is obtained. - The result is false if no true result is found (including the - case where the array has zero elements). - - - - If the array expression yields a null array, the result of - ANY will be null. If the left-hand expression yields null, - the result of ANY is ordinarily null (though a non-strict - comparison operator could possibly yield a different result). - Also, if the right-hand array contains any null elements and no true - comparison result is obtained, the result of ANY - will be null, not false (again, assuming a strict comparison operator). - This is in accordance with SQL's normal rules for Boolean combinations - of null values. - - - - SOME is a synonym for ANY. - - - - - <literal>ALL</literal> (array) - - -expression operator ALL (array expression) - - - - The right-hand side is a parenthesized expression, which must yield an - array value. - The left-hand expression - is evaluated and compared to each element of the array using the - given operator, which must yield a Boolean - result. - The result of ALL is true if all comparisons yield true - (including the case where the array has zero elements). - The result is false if any false result is found. - - - - If the array expression yields a null array, the result of - ALL will be null. If the left-hand expression yields null, - the result of ALL is ordinarily null (though a non-strict - comparison operator could possibly yield a different result). - Also, if the right-hand array contains any null elements and no false - comparison result is obtained, the result of ALL - will be null, not true (again, assuming a strict comparison operator). - This is in accordance with SQL's normal rules for Boolean combinations - of null values. - - - - - Row Constructor Comparison - - -row_constructor operator row_constructor - - - - Each side is a row constructor, - as described in . - The two row constructors must have the same number of fields. - The given operator is applied to each pair - of corresponding fields. (Since the fields could be of different - types, this means that a different specific operator could be selected - for each pair.) - All the selected operators must be members of some B-tree operator - class, or be the negator of an = member of a B-tree - operator class, meaning that row constructor comparison is only - possible when the operator is - =, - <>, - <, - <=, - >, or - >=, - or has semantics similar to one of these. - - - - The = and <> cases work slightly differently - from the others. Two rows are considered - equal if all their corresponding members are non-null and equal; the rows - are unequal if any corresponding members are non-null and unequal; - otherwise the result of the row comparison is unknown (null). - - - - For the <, <=, > and - >= cases, the row elements are compared left-to-right, - stopping as soon as an unequal or null pair of elements is found. - If either of this pair of elements is null, the result of the - row comparison is unknown (null); otherwise comparison of this pair - of elements determines the result. For example, - ROW(1,2,NULL) < ROW(1,3,0) - yields true, not null, because the third pair of elements are not - considered. - - - -row_constructor IS DISTINCT FROM row_constructor - - - - This construct is similar to a <> row comparison, - but it does not yield null for null inputs. Instead, any null value is - considered unequal to (distinct from) any non-null value, and any two - nulls are considered equal (not distinct). Thus the result will - either be true or false, never null. - - - -row_constructor IS NOT DISTINCT FROM row_constructor - - - - This construct is similar to a = row comparison, - but it does not yield null for null inputs. Instead, any null value is - considered unequal to (distinct from) any non-null value, and any two - nulls are considered equal (not distinct). Thus the result will always - be either true or false, never null. - - - - - - Composite Type Comparison - - -record operator record - - - - The SQL specification requires row-wise comparison to return NULL if the - result depends on comparing two NULL values or a NULL and a non-NULL. - PostgreSQL does this only when comparing the - results of two row constructors (as in - ) or comparing a row constructor - to the output of a subquery (as in ). - In other contexts where two composite-type values are compared, two - NULL field values are considered equal, and a NULL is considered larger - than a non-NULL. This is necessary in order to have consistent sorting - and indexing behavior for composite types. - - - - Each side is evaluated and they are compared row-wise. Composite type - comparisons are allowed when the operator is - =, - <>, - <, - <=, - > or - >=, - or has semantics similar to one of these. (To be specific, an operator - can be a row comparison operator if it is a member of a B-tree operator - class, or is the negator of the = member of a B-tree operator - class.) The default behavior of the above operators is the same as for - IS [ NOT ] DISTINCT FROM for row constructors (see - ). - - - - To support matching of rows which include elements without a default - B-tree operator class, the following operators are defined for composite - type comparison: - *=, - *<>, - *<, - *<=, - *>, and - *>=. - These operators compare the internal binary representation of the two - rows. Two rows might have a different binary representation even - though comparisons of the two rows with the equality operator is true. - The ordering of rows under these comparison operators is deterministic - but not otherwise meaningful. These operators are used internally - for materialized views and might be useful for other specialized - purposes such as replication and B-Tree deduplication (see ). They are not intended to be - generally useful for writing queries, though. - - - - - - Set Returning Functions - - - set returning functions - functions - - - - This section describes functions that possibly return more than one row. - The most widely used functions in this class are series generating - functions, as detailed in and - . Other, more specialized - set-returning functions are described elsewhere in this manual. - See for ways to combine multiple - set-returning functions. - - - - Series Generating Functions - - - - - Function - - - Description - - - - - - - - - generate_series - - generate_series ( start integer, stop integer , step integer ) - setof integer - - - generate_series ( start bigint, stop bigint , step bigint ) - setof bigint - - - generate_series ( start numeric, stop numeric , step numeric ) - setof numeric - - - Generates a series of values from start - to stop, with a step size - of step. step - defaults to 1. - - - - - - generate_series ( start timestamp, stop timestamp, step interval ) - setof timestamp - - - generate_series ( start timestamp with time zone, stop timestamp with time zone, step interval , timezone text ) - setof timestamp with time zone - - - Generates a series of values from start - to stop, with a step size - of step. - In the timezone-aware form, times of day and daylight-savings - adjustments are computed according to the time zone named by - the timezone argument, or the current - setting if that is omitted. - - - - -
- - - When step is positive, zero rows are returned if - start is greater than stop. - Conversely, when step is negative, zero rows are - returned if start is less than stop. - Zero rows are also returned if any input is NULL. - It is an error - for step to be zero. Some examples follow: - -SELECT * FROM generate_series(2,4); - generate_series ------------------ - 2 - 3 - 4 -(3 rows) - -SELECT * FROM generate_series(5,1,-2); - generate_series ------------------ - 5 - 3 - 1 -(3 rows) - -SELECT * FROM generate_series(4,3); - generate_series ------------------ -(0 rows) - -SELECT generate_series(1.1, 4, 1.3); - generate_series ------------------ - 1.1 - 2.4 - 3.7 -(3 rows) - --- this example relies on the date-plus-integer operator: -SELECT current_date + s.a AS dates FROM generate_series(0,14,7) AS s(a); - dates ------------- - 2004-02-05 - 2004-02-12 - 2004-02-19 -(3 rows) - -SELECT * FROM generate_series('2008-03-01 00:00'::timestamp, - '2008-03-04 12:00', '10 hours'); - generate_series ---------------------- - 2008-03-01 00:00:00 - 2008-03-01 10:00:00 - 2008-03-01 20:00:00 - 2008-03-02 06:00:00 - 2008-03-02 16:00:00 - 2008-03-03 02:00:00 - 2008-03-03 12:00:00 - 2008-03-03 22:00:00 - 2008-03-04 08:00:00 -(9 rows) - --- this example assumes that TimeZone is set to UTC; note the DST transition: -SELECT * FROM generate_series('2001-10-22 00:00 -04:00'::timestamptz, - '2001-11-01 00:00 -05:00'::timestamptz, - '1 day'::interval, 'America/New_York'); - generate_series ------------------------- - 2001-10-22 04:00:00+00 - 2001-10-23 04:00:00+00 - 2001-10-24 04:00:00+00 - 2001-10-25 04:00:00+00 - 2001-10-26 04:00:00+00 - 2001-10-27 04:00:00+00 - 2001-10-28 04:00:00+00 - 2001-10-29 05:00:00+00 - 2001-10-30 05:00:00+00 - 2001-10-31 05:00:00+00 - 2001-11-01 05:00:00+00 -(11 rows) - - - - - Subscript Generating Functions - - - - - Function - - - Description - - - - - - - - - generate_subscripts - - generate_subscripts ( array anyarray, dim integer ) - setof integer - - - Generates a series comprising the valid subscripts of - the dim'th dimension of the given array. - - - - - - generate_subscripts ( array anyarray, dim integer, reverse boolean ) - setof integer - - - Generates a series comprising the valid subscripts of - the dim'th dimension of the given array. - When reverse is true, returns the series in - reverse order. - - - - -
- - - generate_subscripts is a convenience function that generates - the set of valid subscripts for the specified dimension of the given - array. - Zero rows are returned for arrays that do not have the requested dimension, - or if any input is NULL. - Some examples follow: - --- basic usage: -SELECT generate_subscripts('{NULL,1,NULL,2}'::int[], 1) AS s; - s ---- - 1 - 2 - 3 - 4 -(4 rows) - --- presenting an array, the subscript and the subscripted --- value requires a subquery: -SELECT * FROM arrays; - a --------------------- - {-1,-2} - {100,200,300} -(2 rows) - -SELECT a AS array, s AS subscript, a[s] AS value -FROM (SELECT generate_subscripts(a, 1) AS s, a FROM arrays) foo; - array | subscript | value ----------------+-----------+------- - {-1,-2} | 1 | -1 - {-1,-2} | 2 | -2 - {100,200,300} | 1 | 100 - {100,200,300} | 2 | 200 - {100,200,300} | 3 | 300 -(5 rows) - --- unnest a 2D array: -CREATE OR REPLACE FUNCTION unnest2(anyarray) -RETURNS SETOF anyelement AS $$ -select $1[i][j] - from generate_subscripts($1,1) g1(i), - generate_subscripts($1,2) g2(j); -$$ LANGUAGE sql IMMUTABLE; -CREATE FUNCTION -SELECT * FROM unnest2(ARRAY[[1,2],[3,4]]); - unnest2 ---------- - 1 - 2 - 3 - 4 -(4 rows) - - - - - ordinality - - - - When a function in the FROM clause is suffixed - by WITH ORDINALITY, a bigint column is - appended to the function's output column(s), which starts from 1 and - increments by 1 for each row of the function's output. - This is most useful in the case of set returning - functions such as unnest(). - - --- set returning function WITH ORDINALITY: -SELECT * FROM pg_ls_dir('.') WITH ORDINALITY AS t(ls,n); - ls | n ------------------+---- - pg_serial | 1 - pg_twophase | 2 - postmaster.opts | 3 - pg_notify | 4 - postgresql.conf | 5 - pg_tblspc | 6 - logfile | 7 - base | 8 - postmaster.pid | 9 - pg_ident.conf | 10 - global | 11 - pg_xact | 12 - pg_snapshots | 13 - pg_multixact | 14 - PG_VERSION | 15 - pg_wal | 16 - pg_hba.conf | 17 - pg_stat_tmp | 18 - pg_subtrans | 19 -(19 rows) - - - -
- - - System Information Functions and Operators - - - The functions described in this section are used to obtain various - information about a PostgreSQL installation. - - - - Session Information Functions - - - shows several - functions that extract session and system information. - - - - In addition to the functions listed in this section, there are a number of - functions related to the statistics system that also provide system - information. See for more - information. - - - - Session Information Functions - - - - - Function - - - Description - - - - - - - - - current_catalog - - current_catalog - name - - - - current_database - - current_database () - name - - - Returns the name of the current database. (Databases are - called catalogs in the SQL standard, - so current_catalog is the standard's - spelling.) - - - - - - - current_query - - current_query () - text - - - Returns the text of the currently executing query, as submitted - by the client (which might contain more than one statement). - - - - - - - current_role - - current_role - name - - - This is equivalent to current_user. - - - - - - - current_schema - - - schema - current - - current_schema - name - - - current_schema () - name - - - Returns the name of the schema that is first in the search path (or a - null value if the search path is empty). This is the schema that will - be used for any tables or other named objects that are created without - specifying a target schema. - - - - - - - current_schemas - - - search path - current - - current_schemas ( include_implicit boolean ) - name[] - - - Returns an array of the names of all schemas presently in the - effective search path, in their priority order. (Items in the current - setting that do not correspond to - existing, searchable schemas are omitted.) If the Boolean argument - is true, then implicitly-searched system schemas - such as pg_catalog are included in the result. - - - - - - - current_user - - - user - current - - current_user - name - - - Returns the user name of the current execution context. - - - - - - - inet_client_addr - - inet_client_addr () - inet - - - Returns the IP address of the current client, - or NULL if the current connection is via a - Unix-domain socket. - - - - - - - inet_client_port - - inet_client_port () - integer - - - Returns the IP port number of the current client, - or NULL if the current connection is via a - Unix-domain socket. - - - - - - - inet_server_addr - - inet_server_addr () - inet - - - Returns the IP address on which the server accepted the current - connection, - or NULL if the current connection is via a - Unix-domain socket. - - - - - - - inet_server_port - - inet_server_port () - integer - - - Returns the IP port number on which the server accepted the current - connection, - or NULL if the current connection is via a - Unix-domain socket. - - - - - - - pg_backend_pid - - pg_backend_pid () - integer - - - Returns the process ID of the server process attached to the current - session. - - - - - - - pg_blocking_pids - - pg_blocking_pids ( integer ) - integer[] - - - Returns an array of the process ID(s) of the sessions that are - blocking the server process with the specified process ID from - acquiring a lock, or an empty array if there is no such server process - or it is not blocked. - - - One server process blocks another if it either holds a lock that - conflicts with the blocked process's lock request (hard block), or is - waiting for a lock that would conflict with the blocked process's lock - request and is ahead of it in the wait queue (soft block). When using - parallel queries the result always lists client-visible process IDs - (that is, pg_backend_pid results) even if the - actual lock is held or awaited by a child worker process. As a result - of that, there may be duplicated PIDs in the result. Also note that - when a prepared transaction holds a conflicting lock, it will be - represented by a zero process ID. - - - Frequent calls to this function could have some impact on database - performance, because it needs exclusive access to the lock manager's - shared state for a short time. - - - - - - - pg_conf_load_time - - pg_conf_load_time () - timestamp with time zone - - - Returns the time when the server configuration files were last loaded. - If the current session was alive at the time, this will be the time - when the session itself re-read the configuration files (so the - reading will vary a little in different sessions). Otherwise it is - the time when the postmaster process re-read the configuration files. - - - - - - - pg_current_logfile - - - Logging - pg_current_logfile function - - - current_logfiles - and the pg_current_logfile function - - - Logging - current_logfiles file and the pg_current_logfile - function - - pg_current_logfile ( text ) - text - - - Returns the path name of the log file currently in use by the logging - collector. The path includes the - directory and the individual log file name. The result - is NULL if the logging collector is disabled. - When multiple log files exist, each in a different - format, pg_current_logfile without an argument - returns the path of the file having the first format found in the - ordered list: stderr, - csvlog, jsonlog. - NULL is returned if no log file has any of these - formats. - To request information about a specific log file format, supply - either csvlog, jsonlog or - stderr as the - value of the optional parameter. The result is NULL - if the log format requested is not configured in - . - The result reflects the contents of - the current_logfiles file. - - - This function is restricted to superusers and roles with privileges of - the pg_monitor role by default, but other users can - be granted EXECUTE to run the function. - - - - - - - pg_get_loaded_modules - - pg_get_loaded_modules () - setof record - ( module_name text, - version text, - file_name text ) - - - Returns a list of the loadable modules that are loaded into the - current server session. The module_name - and version fields are NULL unless the - module author supplied values for them using - the PG_MODULE_MAGIC_EXT macro. - The file_name field gives the file - name of the module (shared library). - - - - - - - pg_my_temp_schema - - pg_my_temp_schema () - oid - - - Returns the OID of the current session's temporary schema, or zero if - it has none (because it has not created any temporary tables). - - - - - - - pg_is_other_temp_schema - - pg_is_other_temp_schema ( oid ) - boolean - - - Returns true if the given OID is the OID of another session's - temporary schema. (This can be useful, for example, to exclude other - sessions' temporary tables from a catalog display.) - - - - - - - pg_jit_available - - pg_jit_available () - boolean - - - Returns true if a JIT compiler extension is - available (see ) and the - configuration parameter is set to - on. - - - - - - - pg_numa_available - - pg_numa_available () - boolean - - - Returns true if the server has been compiled with NUMA support. - - - - - - - pg_listening_channels - - pg_listening_channels () - setof text - - - Returns the set of names of asynchronous notification channels that - the current session is listening to. - - - - - - - pg_notification_queue_usage - - pg_notification_queue_usage () - double precision - - - Returns the fraction (0–1) of the asynchronous notification - queue's maximum size that is currently occupied by notifications that - are waiting to be processed. - See and - for more information. - - - - - - - pg_postmaster_start_time - - pg_postmaster_start_time () - timestamp with time zone - - - Returns the time when the server started. - - - - - - - pg_safe_snapshot_blocking_pids - - pg_safe_snapshot_blocking_pids ( integer ) - integer[] - - - Returns an array of the process ID(s) of the sessions that are blocking - the server process with the specified process ID from acquiring a safe - snapshot, or an empty array if there is no such server process or it - is not blocked. - - - A session running a SERIALIZABLE transaction blocks - a SERIALIZABLE READ ONLY DEFERRABLE transaction - from acquiring a snapshot until the latter determines that it is safe - to avoid taking any predicate locks. See - for more information about - serializable and deferrable transactions. - - - Frequent calls to this function could have some impact on database - performance, because it needs access to the predicate lock manager's - shared state for a short time. - - - - - - - pg_trigger_depth - - pg_trigger_depth () - integer - - - Returns the current nesting level - of PostgreSQL triggers (0 if not called, - directly or indirectly, from inside a trigger). - - - - - - - session_user - - session_user - name - - - Returns the session user's name. - - - - - - - system_user - - system_user - text - - - Returns the authentication method and the identity (if any) that the - user presented during the authentication cycle before they were - assigned a database role. It is represented as - auth_method:identity or - NULL if the user has not been authenticated (for - example if Trust authentication has - been used). - - - - - - - user - - user - name - - - This is equivalent to current_user. - - - - -
- - - - current_catalog, - current_role, - current_schema, - current_user, - session_user, - and user have special syntactic status - in SQL: they must be called without trailing - parentheses. In PostgreSQL, parentheses can optionally be used with - current_schema, but not with the others. - - - - - The session_user is normally the user who initiated - the current database connection; but superusers can change this setting - with . - The current_user is the user identifier - that is applicable for permission checking. Normally it is equal - to the session user, but it can be changed with - . - It also changes during the execution of - functions with the attribute SECURITY DEFINER. - In Unix parlance, the session user is the real user and - the current user is the effective user. - current_role and user are - synonyms for current_user. (The SQL standard draws - a distinction between current_role - and current_user, but PostgreSQL - does not, since it unifies users and roles into a single kind of entity.) - - -
- - - Access Privilege Inquiry Functions - - - privilege - querying - - - - lists functions that - allow querying object access privileges programmatically. - (See for more information about - privileges.) - In these functions, the user whose privileges are being inquired about - can be specified by name or by OID - (pg_authid.oid), or if - the name is given as public then the privileges of the - PUBLIC pseudo-role are checked. Also, the user - argument can be omitted entirely, in which case - the current_user is assumed. - The object that is being inquired about can be specified either by name or - by OID, too. When specifying by name, a schema name can be included if - relevant. - The access privilege of interest is specified by a text string, which must - evaluate to one of the appropriate privilege keywords for the object's type - (e.g., SELECT). Optionally, WITH GRANT - OPTION can be added to a privilege type to test whether the - privilege is held with grant option. Also, multiple privilege types can be - listed separated by commas, in which case the result will be true if any of - the listed privileges is held. (Case of the privilege string is not - significant, and extra whitespace is allowed between but not within - privilege names.) - Some examples: - -SELECT has_table_privilege('myschema.mytable', 'select'); -SELECT has_table_privilege('joe', 'mytable', 'INSERT, SELECT WITH GRANT OPTION'); - - - - - Access Privilege Inquiry Functions - - - - - Function - - - Description - - - - - - - - - has_any_column_privilege - - has_any_column_privilege ( - user name or oid, - table text or oid, - privilege text ) - boolean - - - Does user have privilege for any column of table? - This succeeds either if the privilege is held for the whole table, or - if there is a column-level grant of the privilege for at least one - column. - Allowable privilege types are - SELECT, INSERT, - UPDATE, and REFERENCES. - - - - - - - has_column_privilege - - has_column_privilege ( - user name or oid, - table text or oid, - column text or smallint, - privilege text ) - boolean - - - Does user have privilege for the specified table column? - This succeeds either if the privilege is held for the whole table, or - if there is a column-level grant of the privilege for the column. - The column can be specified by name or by attribute number - (pg_attribute.attnum). - Allowable privilege types are - SELECT, INSERT, - UPDATE, and REFERENCES. - - - - - - - has_database_privilege - - has_database_privilege ( - user name or oid, - database text or oid, - privilege text ) - boolean - - - Does user have privilege for database? - Allowable privilege types are - CREATE, - CONNECT, - TEMPORARY, and - TEMP (which is equivalent to - TEMPORARY). - - - - - - - has_foreign_data_wrapper_privilege - - has_foreign_data_wrapper_privilege ( - user name or oid, - fdw text or oid, - privilege text ) - boolean - - - Does user have privilege for foreign-data wrapper? - The only allowable privilege type is USAGE. - - - - - - - has_function_privilege - - has_function_privilege ( - user name or oid, - function text or oid, - privilege text ) - boolean - - - Does user have privilege for function? - The only allowable privilege type is EXECUTE. - - - When specifying a function by name rather than by OID, the allowed - input is the same as for the regprocedure data type (see - ). - An example is: - -SELECT has_function_privilege('joeuser', 'myfunc(int, text)', 'execute'); - - - - - - - - has_language_privilege - - has_language_privilege ( - user name or oid, - language text or oid, - privilege text ) - boolean - - - Does user have privilege for language? - The only allowable privilege type is USAGE. - - - - - - - has_largeobject_privilege - - has_largeobject_privilege ( - user name or oid, - largeobject oid, - privilege text ) - boolean - - - Does user have privilege for large object? - Allowable privilege types are - SELECT and UPDATE. - - - - - - - has_parameter_privilege - - has_parameter_privilege ( - user name or oid, - parameter text, - privilege text ) - boolean - - - Does user have privilege for configuration parameter? - The parameter name is case-insensitive. - Allowable privilege types are SET - and ALTER SYSTEM. - - - - - - - has_schema_privilege - - has_schema_privilege ( - user name or oid, - schema text or oid, - privilege text ) - boolean - - - Does user have privilege for schema? - Allowable privilege types are - CREATE and - USAGE. - - - - - - - has_sequence_privilege - - has_sequence_privilege ( - user name or oid, - sequence text or oid, - privilege text ) - boolean - - - Does user have privilege for sequence? - Allowable privilege types are - USAGE, - SELECT, and - UPDATE. - - - - - - - has_server_privilege - - has_server_privilege ( - user name or oid, - server text or oid, - privilege text ) - boolean - - - Does user have privilege for foreign server? - The only allowable privilege type is USAGE. - - - - - - - has_table_privilege - - has_table_privilege ( - user name or oid, - table text or oid, - privilege text ) - boolean - - - Does user have privilege for table? - Allowable privilege types - are SELECT, INSERT, - UPDATE, DELETE, - TRUNCATE, REFERENCES, - TRIGGER, and MAINTAIN. - - - - - - - has_tablespace_privilege - - has_tablespace_privilege ( - user name or oid, - tablespace text or oid, - privilege text ) - boolean - - - Does user have privilege for tablespace? - The only allowable privilege type is CREATE. - - - - - - - has_type_privilege - - has_type_privilege ( - user name or oid, - type text or oid, - privilege text ) - boolean - - - Does user have privilege for data type? - The only allowable privilege type is USAGE. - When specifying a type by name rather than by OID, the allowed input - is the same as for the regtype data type (see - ). - - - - - - - pg_has_role - - pg_has_role ( - user name or oid, - role text or oid, - privilege text ) - boolean - - - Does user have privilege for role? - Allowable privilege types are - MEMBER, USAGE, - and SET. - MEMBER denotes direct or indirect membership in - the role without regard to what specific privileges may be conferred. - USAGE denotes whether the privileges of the role - are immediately available without doing SET ROLE, - while SET denotes whether it is possible to change - to the role using the SET ROLE command. - WITH ADMIN OPTION or WITH GRANT - OPTION can be added to any of these privilege types to - test whether the ADMIN privilege is held (all - six spellings test the same thing). - This function does not allow the special case of - setting user to public, - because the PUBLIC pseudo-role can never be a member of real roles. - - - - - - - row_security_active - - row_security_active ( - table text or oid ) - boolean - - - Is row-level security active for the specified table in the context of - the current user and current environment? - - - - -
- - - shows the operators - available for the aclitem type, which is the catalog - representation of access privileges. See - for information about how to read access privilege values. - - - - <type>aclitem</type> Operators - - - - - Operator - - - Description - - - Example(s) - - - - - - - - - aclitemeq - - aclitem = aclitem - boolean - - - Are aclitems equal? (Notice that - type aclitem lacks the usual set of comparison - operators; it has only equality. In turn, aclitem - arrays can only be compared for equality.) - - - 'calvin=r*w/hobbes'::aclitem = 'calvin=r*w*/hobbes'::aclitem - f - - - - - - - aclcontains - - aclitem[] @> aclitem - boolean - - - Does array contain the specified privileges? (This is true if there - is an array entry that matches the aclitem's grantee and - grantor, and has at least the specified set of privileges.) - - - '{calvin=r*w/hobbes,hobbes=r*w*/postgres}'::aclitem[] @> 'calvin=r*/hobbes'::aclitem - t - - - - - - aclitem[] ~ aclitem - boolean - - - This is a deprecated alias for @>. - - - '{calvin=r*w/hobbes,hobbes=r*w*/postgres}'::aclitem[] ~ 'calvin=r*/hobbes'::aclitem - t - - - - -
- - - shows some additional - functions to manage the aclitem type. - - - - <type>aclitem</type> Functions - - - - - Function - - - Description - - - - - - - - - acldefault - - acldefault ( - type "char", - ownerId oid ) - aclitem[] - - - Constructs an aclitem array holding the default access - privileges for an object of type type belonging - to the role with OID ownerId. This represents - the access privileges that will be assumed when an object's - ACL entry is null. (The default access privileges - are described in .) - The type parameter must be one of - 'c' for COLUMN, - 'r' for TABLE and table-like objects, - 's' for SEQUENCE, - 'd' for DATABASE, - 'f' for FUNCTION or PROCEDURE, - 'l' for LANGUAGE, - 'L' for LARGE OBJECT, - 'n' for SCHEMA, - 'p' for PARAMETER, - 't' for TABLESPACE, - 'F' for FOREIGN DATA WRAPPER, - 'S' for FOREIGN SERVER, - or - 'T' for TYPE or DOMAIN. - - - - - - - aclexplode - - aclexplode ( aclitem[] ) - setof record - ( grantor oid, - grantee oid, - privilege_type text, - is_grantable boolean ) - - - Returns the aclitem array as a set of rows. - If the grantee is the pseudo-role PUBLIC, it is represented by zero in - the grantee column. Each granted privilege is - represented as SELECT, INSERT, - etc (see for a full list). - Note that each privilege is broken out as a separate row, so - only one keyword appears in the privilege_type - column. - - - - - - - makeaclitem - - makeaclitem ( - grantee oid, - grantor oid, - privileges text, - is_grantable boolean ) - aclitem - - - Constructs an aclitem with the given properties. - privileges is a comma-separated list of - privilege names such as SELECT, - INSERT, etc, all of which are set in the - result. (Case of the privilege string is not significant, and - extra whitespace is allowed between but not within privilege - names.) - - - - -
- -
- - - Schema Visibility Inquiry Functions - - - shows functions that - determine whether a certain object is visible in the - current schema search path. - For example, a table is said to be visible if its - containing schema is in the search path and no table of the same - name appears earlier in the search path. This is equivalent to the - statement that the table can be referenced by name without explicit - schema qualification. Thus, to list the names of all visible tables: - -SELECT relname FROM pg_class WHERE pg_table_is_visible(oid); - - For functions and operators, an object in the search path is said to be - visible if there is no object of the same name and argument data - type(s) earlier in the path. For operator classes and families, - both the name and the associated index access method are considered. - - - - search path - object visibility - - - - Schema Visibility Inquiry Functions - - - - - Function - - - Description - - - - - - - - - pg_collation_is_visible - - pg_collation_is_visible ( collation oid ) - boolean - - - Is collation visible in search path? - - - - - - - pg_conversion_is_visible - - pg_conversion_is_visible ( conversion oid ) - boolean - - - Is conversion visible in search path? - - - - - - - pg_function_is_visible - - pg_function_is_visible ( function oid ) - boolean - - - Is function visible in search path? - (This also works for procedures and aggregates.) - - - - - - - pg_opclass_is_visible - - pg_opclass_is_visible ( opclass oid ) - boolean - - - Is operator class visible in search path? - - - - - - - pg_operator_is_visible - - pg_operator_is_visible ( operator oid ) - boolean - - - Is operator visible in search path? - - - - - - - pg_opfamily_is_visible - - pg_opfamily_is_visible ( opclass oid ) - boolean - - - Is operator family visible in search path? - - - - - - - pg_statistics_obj_is_visible - - pg_statistics_obj_is_visible ( stat oid ) - boolean - - - Is statistics object visible in search path? - - - - - - - pg_table_is_visible - - pg_table_is_visible ( table oid ) - boolean - - - Is table visible in search path? - (This works for all types of relations, including views, materialized - views, indexes, sequences and foreign tables.) - - - - - - - pg_ts_config_is_visible - - pg_ts_config_is_visible ( config oid ) - boolean - - - Is text search configuration visible in search path? - - - - - - - pg_ts_dict_is_visible - - pg_ts_dict_is_visible ( dict oid ) - boolean - - - Is text search dictionary visible in search path? - - - - - - - pg_ts_parser_is_visible - - pg_ts_parser_is_visible ( parser oid ) - boolean - - - Is text search parser visible in search path? - - - - - - - pg_ts_template_is_visible - - pg_ts_template_is_visible ( template oid ) - boolean - - - Is text search template visible in search path? - - - - - - - pg_type_is_visible - - pg_type_is_visible ( type oid ) - boolean - - - Is type (or domain) visible in search path? - - - - -
- - - All these functions require object OIDs to identify the object to be - checked. If you want to test an object by name, it is convenient to use - the OID alias types (regclass, regtype, - regprocedure, regoperator, regconfig, - or regdictionary), - for example: - -SELECT pg_type_is_visible('myschema.widget'::regtype); - - Note that it would not make much sense to test a non-schema-qualified - type name in this way — if the name can be recognized at all, it must be visible. - - -
- - - System Catalog Information Functions - - - lists functions that - extract information from the system catalogs. - - - - System Catalog Information Functions - - - - - Function - - - Description - - - - - - - - - format_type - - format_type ( type oid, typemod integer ) - text - - - Returns the SQL name for a data type that is identified by its type - OID and possibly a type modifier. Pass NULL for the type modifier if - no specific modifier is known. - - - - - - - pg_basetype - - pg_basetype ( regtype ) - regtype - - - Returns the OID of the base type of a domain identified by its - type OID. If the argument is the OID of a non-domain type, - returns the argument as-is. Returns NULL if the argument is - not a valid type OID. If there's a chain of domain dependencies, - it will recurse until finding the base type. - - - Assuming CREATE DOMAIN mytext AS text: - - - pg_basetype('mytext'::regtype) - text - - - - - - - pg_char_to_encoding - - pg_char_to_encoding ( encoding name ) - integer - - - Converts the supplied encoding name into an integer representing the - internal identifier used in some system catalog tables. - Returns -1 if an unknown encoding name is provided. - - - - - - - pg_encoding_to_char - - pg_encoding_to_char ( encoding integer ) - name - - - Converts the integer used as the internal identifier of an encoding in some - system catalog tables into a human-readable string. - Returns an empty string if an invalid encoding number is provided. - - - - - - - pg_get_catalog_foreign_keys - - pg_get_catalog_foreign_keys () - setof record - ( fktable regclass, - fkcols text[], - pktable regclass, - pkcols text[], - is_array boolean, - is_opt boolean ) - - - Returns a set of records describing the foreign key relationships - that exist within the PostgreSQL system - catalogs. - The fktable column contains the name of the - referencing catalog, and the fkcols column - contains the name(s) of the referencing column(s). Similarly, - the pktable column contains the name of the - referenced catalog, and the pkcols column - contains the name(s) of the referenced column(s). - If is_array is true, the last referencing - column is an array, each of whose elements should match some entry - in the referenced catalog. - If is_opt is true, the referencing column(s) - are allowed to contain zeroes instead of a valid reference. - - - - - - - pg_get_constraintdef - - pg_get_constraintdef ( constraint oid , pretty boolean ) - text - - - Reconstructs the creating command for a constraint. - (This is a decompiled reconstruction, not the original text - of the command.) - - - - - - - pg_get_expr - - pg_get_expr ( expr pg_node_tree, relation oid , pretty boolean ) - text - - - Decompiles the internal form of an expression stored in the system - catalogs, such as the default value for a column. If the expression - might contain Vars, specify the OID of the relation they refer to as - the second parameter; if no Vars are expected, passing zero is - sufficient. - - - - - - - pg_get_functiondef - - pg_get_functiondef ( func oid ) - text - - - Reconstructs the creating command for a function or procedure. - (This is a decompiled reconstruction, not the original text - of the command.) - The result is a complete CREATE OR REPLACE FUNCTION - or CREATE OR REPLACE PROCEDURE statement. - - - - - - - pg_get_function_arguments - - pg_get_function_arguments ( func oid ) - text - - - Reconstructs the argument list of a function or procedure, in the form - it would need to appear in within CREATE FUNCTION - (including default values). - - - - - - - pg_get_function_identity_arguments - - pg_get_function_identity_arguments ( func oid ) - text - - - Reconstructs the argument list necessary to identify a function or - procedure, in the form it would need to appear in within commands such - as ALTER FUNCTION. This form omits default values. - - - - - - - pg_get_function_result - - pg_get_function_result ( func oid ) - text - - - Reconstructs the RETURNS clause of a function, in - the form it would need to appear in within CREATE - FUNCTION. Returns NULL for a procedure. - - - - - - - pg_get_indexdef - - pg_get_indexdef ( index oid , column integer, pretty boolean ) - text - - - Reconstructs the creating command for an index. - (This is a decompiled reconstruction, not the original text - of the command.) If column is supplied and is - not zero, only the definition of that column is reconstructed. - - - - - - - pg_get_keywords - - pg_get_keywords () - setof record - ( word text, - catcode "char", - barelabel boolean, - catdesc text, - baredesc text ) - - - Returns a set of records describing the SQL keywords recognized by the - server. The word column contains the - keyword. The catcode column contains a - category code: U for an unreserved - keyword, C for a keyword that can be a column - name, T for a keyword that can be a type or - function name, or R for a fully reserved keyword. - The barelabel column - contains true if the keyword can be used as - a bare column label in SELECT lists, - or false if it can only be used - after AS. - The catdesc column contains a - possibly-localized string describing the keyword's category. - The baredesc column contains a - possibly-localized string describing the keyword's column label status. - - - - - - - pg_get_partkeydef - - pg_get_partkeydef ( table oid ) - text - - - Reconstructs the definition of a partitioned table's partition - key, in the form it would have in the PARTITION - BY clause of CREATE TABLE. - (This is a decompiled reconstruction, not the original text - of the command.) - - - - - - - pg_get_ruledef - - pg_get_ruledef ( rule oid , pretty boolean ) - text - - - Reconstructs the creating command for a rule. - (This is a decompiled reconstruction, not the original text - of the command.) - - - - - - - pg_get_serial_sequence - - pg_get_serial_sequence ( table text, column text ) - text - - - Returns the name of the sequence associated with a column, - or NULL if no sequence is associated with the column. - If the column is an identity column, the associated sequence is the - sequence internally created for that column. - For columns created using one of the serial types - (serial, smallserial, bigserial), - it is the sequence created for that serial column definition. - In the latter case, the association can be modified or removed - with ALTER SEQUENCE OWNED BY. - (This function probably should have been - called pg_get_owned_sequence; its current name - reflects the fact that it has historically been used with serial-type - columns.) The first parameter is a table name with optional - schema, and the second parameter is a column name. Because the first - parameter potentially contains both schema and table names, it is - parsed per usual SQL rules, meaning it is lower-cased by default. - The second parameter, being just a column name, is treated literally - and so has its case preserved. The result is suitably formatted - for passing to the sequence functions (see - ). - - - A typical use is in reading the current value of the sequence for an - identity or serial column, for example: - -SELECT currval(pg_get_serial_sequence('sometable', 'id')); - - - - - - - - pg_get_statisticsobjdef - - pg_get_statisticsobjdef ( statobj oid ) - text - - - Reconstructs the creating command for an extended statistics object. - (This is a decompiled reconstruction, not the original text - of the command.) - - - - - - - pg_get_triggerdef - -pg_get_triggerdef ( trigger oid , pretty boolean ) - text - - - Reconstructs the creating command for a trigger. - (This is a decompiled reconstruction, not the original text - of the command.) - - - - - - - pg_get_userbyid - - pg_get_userbyid ( role oid ) - name - - - Returns a role's name given its OID. - - - - - - - pg_get_viewdef - - pg_get_viewdef ( view oid , pretty boolean ) - text - - - Reconstructs the underlying SELECT command for a - view or materialized view. (This is a decompiled reconstruction, not - the original text of the command.) - - - - - - pg_get_viewdef ( view oid, wrap_column integer ) - text - - - Reconstructs the underlying SELECT command for a - view or materialized view. (This is a decompiled reconstruction, not - the original text of the command.) In this form of the function, - pretty-printing is always enabled, and long lines are wrapped to try - to keep them shorter than the specified number of columns. - - - - - - pg_get_viewdef ( view text , pretty boolean ) - text - - - Reconstructs the underlying SELECT command for a - view or materialized view, working from a textual name for the view - rather than its OID. (This is deprecated; use the OID variant - instead.) - - - - - - - pg_index_column_has_property - - pg_index_column_has_property ( index regclass, column integer, property text ) - boolean - - - Tests whether an index column has the named property. - Common index column properties are listed in - . - (Note that extension access methods can define additional property - names for their indexes.) - NULL is returned if the property name is not known - or does not apply to the particular object, or if the OID or column - number does not identify a valid object. - - - - - - - pg_index_has_property - - pg_index_has_property ( index regclass, property text ) - boolean - - - Tests whether an index has the named property. - Common index properties are listed in - . - (Note that extension access methods can define additional property - names for their indexes.) - NULL is returned if the property name is not known - or does not apply to the particular object, or if the OID does not - identify a valid object. - - - - - - - pg_indexam_has_property - - pg_indexam_has_property ( am oid, property text ) - boolean - - - Tests whether an index access method has the named property. - Access method properties are listed in - . - NULL is returned if the property name is not known - or does not apply to the particular object, or if the OID does not - identify a valid object. - - - - - - - pg_options_to_table - - pg_options_to_table ( options_array text[] ) - setof record - ( option_name text, - option_value text ) - - - Returns the set of storage options represented by a value from - pg_class.reloptions or - pg_attribute.attoptions. - - - - - - - pg_settings_get_flags - - pg_settings_get_flags ( guc text ) - text[] - - - Returns an array of the flags associated with the given GUC, or - NULL if it does not exist. The result is - an empty array if the GUC exists but there are no flags to show. - Only the most useful flags listed in - are exposed. - - - - - - - pg_tablespace_databases - - pg_tablespace_databases ( tablespace oid ) - setof oid - - - Returns the set of OIDs of databases that have objects stored in the - specified tablespace. If this function returns any rows, the - tablespace is not empty and cannot be dropped. To identify the specific - objects populating the tablespace, you will need to connect to the - database(s) identified by pg_tablespace_databases - and query their pg_class catalogs. - - - - - - - pg_tablespace_location - - pg_tablespace_location ( tablespace oid ) - text - - - Returns the file system path that this tablespace is located in. - - - - - - - pg_typeof - - pg_typeof ( "any" ) - regtype - - - Returns the OID of the data type of the value that is passed to it. - This can be helpful for troubleshooting or dynamically constructing - SQL queries. The function is declared as - returning regtype, which is an OID alias type (see - ); this means that it is the same as an - OID for comparison purposes but displays as a type name. - - - pg_typeof(33) - integer - - - - - - - COLLATION FOR - - COLLATION FOR ( "any" ) - text - - - Returns the name of the collation of the value that is passed to it. - The value is quoted and schema-qualified if necessary. If no - collation was derived for the argument expression, - then NULL is returned. If the argument is not of a - collatable data type, then an error is raised. - - - collation for ('foo'::text) - "default" - - - collation for ('foo' COLLATE "de_DE") - "de_DE" - - - - - - - to_regclass - - to_regclass ( text ) - regclass - - - Translates a textual relation name to its OID. A similar result is - obtained by casting the string to type regclass (see - ); however, this function will return - NULL rather than throwing an error if the name is - not found. - - - - - - - to_regdatabase - - to_regdatabase ( text ) - regdatabase - - - Translates a textual database name to its OID. A similar result is - obtained by casting the string to type regdatabase (see - ); however, this function will return - NULL rather than throwing an error if the name is - not found. - - - - - - - to_regcollation - - to_regcollation ( text ) - regcollation - - - Translates a textual collation name to its OID. A similar result is - obtained by casting the string to type regcollation (see - ); however, this function will return - NULL rather than throwing an error if the name is - not found. - - - - - - - to_regnamespace - - to_regnamespace ( text ) - regnamespace - - - Translates a textual schema name to its OID. A similar result is - obtained by casting the string to type regnamespace (see - ); however, this function will return - NULL rather than throwing an error if the name is - not found. - - - - - - - to_regoper - - to_regoper ( text ) - regoper - - - Translates a textual operator name to its OID. A similar result is - obtained by casting the string to type regoper (see - ); however, this function will return - NULL rather than throwing an error if the name is - not found or is ambiguous. - - - - - - - to_regoperator - - to_regoperator ( text ) - regoperator - - - Translates a textual operator name (with parameter types) to its OID. A similar result is - obtained by casting the string to type regoperator (see - ); however, this function will return - NULL rather than throwing an error if the name is - not found. - - - - - - - to_regproc - - to_regproc ( text ) - regproc - - - Translates a textual function or procedure name to its OID. A similar result is - obtained by casting the string to type regproc (see - ); however, this function will return - NULL rather than throwing an error if the name is - not found or is ambiguous. - - - - - - - to_regprocedure - - to_regprocedure ( text ) - regprocedure - - - Translates a textual function or procedure name (with argument types) to its OID. A similar result is - obtained by casting the string to type regprocedure (see - ); however, this function will return - NULL rather than throwing an error if the name is - not found. - - - - - - - to_regrole - - to_regrole ( text ) - regrole - - - Translates a textual role name to its OID. A similar result is - obtained by casting the string to type regrole (see - ); however, this function will return - NULL rather than throwing an error if the name is - not found. - - - - - - - to_regtype - - to_regtype ( text ) - regtype - - - Parses a string of text, extracts a potential type name from it, - and translates that name into a type OID. A syntax error in the - string will result in an error; but if the string is a - syntactically valid type name that happens not to be found in the - catalogs, the result is NULL. A similar result - is obtained by casting the string to type regtype - (see ), except that that will throw - error for name not found. - - - - - - - to_regtypemod - - to_regtypemod ( text ) - integer - - - Parses a string of text, extracts a potential type name from it, - and translates its type modifier, if any. A syntax error in the - string will result in an error; but if the string is a - syntactically valid type name that happens not to be found in the - catalogs, the result is NULL. The result is - -1 if no type modifier is present. - - - to_regtypemod can be combined with - to produce appropriate inputs for - , allowing a string representing a - type name to be canonicalized. - - - format_type(to_regtype('varchar(32)'), to_regtypemod('varchar(32)')) - character varying(32) - - - - -
- - - Most of the functions that reconstruct (decompile) database objects - have an optional pretty flag, which - if true causes the result to - be pretty-printed. Pretty-printing suppresses unnecessary - parentheses and adds whitespace for legibility. - The pretty-printed format is more readable, but the default format - is more likely to be interpreted the same way by future versions of - PostgreSQL; so avoid using pretty-printed output - for dump purposes. Passing false for - the pretty parameter yields the same result as - omitting the parameter. - - - - Index Column Properties - - - NameDescription - - - - asc - Does the column sort in ascending order on a forward scan? - - - - desc - Does the column sort in descending order on a forward scan? - - - - nulls_first - Does the column sort with nulls first on a forward scan? - - - - nulls_last - Does the column sort with nulls last on a forward scan? - - - - orderable - Does the column possess any defined sort ordering? - - - - distance_orderable - Can the column be scanned in order by a distance - operator, for example ORDER BY col <-> constant ? - - - - returnable - Can the column value be returned by an index-only scan? - - - - search_array - Does the column natively support col = ANY(array) - searches? - - - - search_nulls - Does the column support IS NULL and - IS NOT NULL searches? - - - - -
- - - Index Properties - - - NameDescription - - - - clusterable - Can the index be used in a CLUSTER command? - - - - index_scan - Does the index support plain (non-bitmap) scans? - - - - bitmap_scan - Does the index support bitmap scans? - - - - backward_scan - Can the scan direction be changed in mid-scan (to - support FETCH BACKWARD on a cursor without - needing materialization)? - - - - -
- - - Index Access Method Properties - - - NameDescription - - - - can_order - Does the access method support ASC, - DESC and related keywords in - CREATE INDEX? - - - - can_unique - Does the access method support unique indexes? - - - - can_multi_col - Does the access method support indexes with multiple columns? - - - - can_exclude - Does the access method support exclusion constraints? - - - - can_include - Does the access method support the INCLUDE - clause of CREATE INDEX? - - - - -
- - - GUC Flags - - - FlagDescription - - - - EXPLAIN - Parameters with this flag are included in - EXPLAIN (SETTINGS) commands. - - - - NO_SHOW_ALL - Parameters with this flag are excluded from - SHOW ALL commands. - - - - NO_RESET - Parameters with this flag do not support - RESET commands. - - - - NO_RESET_ALL - Parameters with this flag are excluded from - RESET ALL commands. - - - - NOT_IN_SAMPLE - Parameters with this flag are not included in - postgresql.conf by default. - - - - RUNTIME_COMPUTED - Parameters with this flag are runtime-computed ones. - - - - -
- -
- - - Object Information and Addressing Functions - - - lists functions related to - database object identification and addressing. - - - - Object Information and Addressing Functions - - - - - Function - - - Description - - - - - - - - - pg_get_acl - - pg_get_acl ( classid oid, objid oid, objsubid integer ) - aclitem[] - - - Returns the ACL for a database object, specified - by catalog OID, object OID and sub-object ID. This function returns - NULL values for undefined objects. - - - - - - - pg_describe_object - - pg_describe_object ( classid oid, objid oid, objsubid integer ) - text - - - Returns a textual description of a database object identified by - catalog OID, object OID, and sub-object ID (such as a column number - within a table; the sub-object ID is zero when referring to a whole - object). This description is intended to be human-readable, and might - be translated, depending on server configuration. This is especially - useful to determine the identity of an object referenced in the - pg_depend catalog. This function returns - NULL values for undefined objects. - - - - - - - pg_identify_object - - pg_identify_object ( classid oid, objid oid, objsubid integer ) - record - ( type text, - schema text, - name text, - identity text ) - - - Returns a row containing enough information to uniquely identify the - database object specified by catalog OID, object OID and sub-object - ID. - This information is intended to be machine-readable, and is never - translated. - type identifies the type of database object; - schema is the schema name that the object - belongs in, or NULL for object types that do not - belong to schemas; - name is the name of the object, quoted if - necessary, if the name (along with schema name, if pertinent) is - sufficient to uniquely identify the object, - otherwise NULL; - identity is the complete object identity, with - the precise format depending on object type, and each name within the - format being schema-qualified and quoted as necessary. Undefined - objects are identified with NULL values. - - - - - - - pg_identify_object_as_address - - pg_identify_object_as_address ( classid oid, objid oid, objsubid integer ) - record - ( type text, - object_names text[], - object_args text[] ) - - - Returns a row containing enough information to uniquely identify the - database object specified by catalog OID, object OID and sub-object - ID. - The returned information is independent of the current server, that - is, it could be used to identify an identically named object in - another server. - type identifies the type of database object; - object_names and - object_args - are text arrays that together form a reference to the object. - These three values can be passed - to pg_get_object_address to obtain the internal - address of the object. - - - - - - - pg_get_object_address - - pg_get_object_address ( type text, object_names text[], object_args text[] ) - record - ( classid oid, - objid oid, - objsubid integer ) - - - Returns a row containing enough information to uniquely identify the - database object specified by a type code and object name and argument - arrays. - The returned values are the ones that would be used in system catalogs - such as pg_depend; they can be passed to - other system functions such as pg_describe_object - or pg_identify_object. - classid is the OID of the system catalog - containing the object; - objid is the OID of the object itself, and - objsubid is the sub-object ID, or zero if none. - This function is the inverse - of pg_identify_object_as_address. - Undefined objects are identified with NULL values. - - - - -
- - - pg_get_acl is useful for retrieving and inspecting - the privileges associated with database objects without looking at - specific catalogs. For example, to retrieve all the granted privileges - on objects in the current database: - -postgres=# SELECT - (pg_identify_object(s.classid,s.objid,s.objsubid)).*, - pg_catalog.pg_get_acl(s.classid,s.objid,s.objsubid) AS acl -FROM pg_catalog.pg_shdepend AS s -JOIN pg_catalog.pg_database AS d - ON d.datname = current_database() AND - d.oid = s.dbid -JOIN pg_catalog.pg_authid AS a - ON a.oid = s.refobjid AND - s.refclassid = 'pg_authid'::regclass -WHERE s.deptype = 'a'; --[ RECORD 1 ]----------------------------------------- -type | table -schema | public -name | testtab -identity | public.testtab -acl | {postgres=arwdDxtm/postgres,foo=r/postgres} - - - -
- - - Comment Information Functions - - - comment - about database objects - - - - The functions shown in - extract comments previously stored with the - command. A null value is returned if no - comment could be found for the specified parameters. - - - - Comment Information Functions - - - - - Function - - - Description - - - - - - - - - col_description - - col_description ( table oid, column integer ) - text - - - Returns the comment for a table column, which is specified by the OID - of its table and its column number. - (obj_description cannot be used for table - columns, since columns do not have OIDs of their own.) - - - - - - - obj_description - - obj_description ( object oid, catalog name ) - text - - - Returns the comment for a database object specified by its OID and the - name of the containing system catalog. For - example, obj_description(123456, 'pg_class') would - retrieve the comment for the table with OID 123456. - - - - - - obj_description ( object oid ) - text - - - Returns the comment for a database object specified by its OID alone. - This is deprecated since there is no guarantee - that OIDs are unique across different system catalogs; therefore, the - wrong comment might be returned. - - - - - - - shobj_description - - shobj_description ( object oid, catalog name ) - text - - - Returns the comment for a shared database object specified by its OID - and the name of the containing system catalog. This is just - like obj_description except that it is used for - retrieving comments on shared objects (that is, databases, roles, and - tablespaces). Some system catalogs are global to all databases within - each cluster, and the descriptions for objects in them are stored - globally as well. - - - - -
- -
- - - Data Validity Checking Functions - - - The functions shown in - can be helpful for checking validity of proposed input data. - - - - Data Validity Checking Functions - - - - - Function - - - Description - - - Example(s) - - - - - - - - - pg_input_is_valid - - pg_input_is_valid ( - string text, - type text - ) - boolean - - - Tests whether the given string is valid - input for the specified data type, returning true or false. - - - This function will only work as desired if the data type's input - function has been updated to report invalid input as - a soft error. Otherwise, invalid input will abort - the transaction, just as if the string had been cast to the type - directly. - - - pg_input_is_valid('42', 'integer') - t - - - pg_input_is_valid('42000000000', 'integer') - f - - - pg_input_is_valid('1234.567', 'numeric(7,4)') - f - - - - - - pg_input_error_info - - pg_input_error_info ( - string text, - type text - ) - record - ( message text, - detail text, - hint text, - sql_error_code text ) - - - Tests whether the given string is valid - input for the specified data type; if not, return the details of - the error that would have been thrown. If the input is valid, the - results are NULL. The inputs are the same as - for pg_input_is_valid. - - - This function will only work as desired if the data type's input - function has been updated to report invalid input as - a soft error. Otherwise, invalid input will abort - the transaction, just as if the string had been cast to the type - directly. - - - SELECT * FROM pg_input_error_info('42000000000', 'integer') - - - message | detail | hint | sql_error_code -------------------------------------------------------+--------+------+---------------- - value "42000000000" is out of range for type integer | | | 22003 - - - - - -
- -
- - - Transaction ID and Snapshot Information Functions - - - The functions shown in - provide server transaction information in an exportable form. The main - use of these functions is to determine which transactions were committed - between two snapshots. - - - - Transaction ID and Snapshot Information Functions - - - - - Function - - - Description - - - - - - - - - age - - age ( xid ) - integer - - - Returns the number of transactions between the supplied - transaction id and the current transaction counter. - - - - - - - mxid_age - - mxid_age ( xid ) - integer - - - Returns the number of multixacts IDs between the supplied - multixact ID and the current multixacts counter. - - - - - - - pg_current_xact_id - - pg_current_xact_id () - xid8 - - - Returns the current transaction's ID. It will assign a new one if the - current transaction does not have one already (because it has not - performed any database updates); see for details. If executed in a - subtransaction, this will return the top-level transaction ID; - see for details. - - - - - - - pg_current_xact_id_if_assigned - - pg_current_xact_id_if_assigned () - xid8 - - - Returns the current transaction's ID, or NULL if no - ID is assigned yet. (It's best to use this variant if the transaction - might otherwise be read-only, to avoid unnecessary consumption of an - XID.) - If executed in a subtransaction, this will return the top-level - transaction ID. - - - - - - - pg_xact_status - - pg_xact_status ( xid8 ) - text - - - Reports the commit status of a recent transaction. - The result is one of in progress, - committed, or aborted, - provided that the transaction is recent enough that the system retains - the commit status of that transaction. - If it is old enough that no references to the transaction survive in - the system and the commit status information has been discarded, the - result is NULL. - Applications might use this function, for example, to determine - whether their transaction committed or aborted after the application - and database server become disconnected while - a COMMIT is in progress. - Note that prepared transactions are reported as in - progress; applications must check pg_prepared_xacts - if they need to determine whether a transaction ID belongs to a - prepared transaction. - - - - - - - pg_current_snapshot - - pg_current_snapshot () - pg_snapshot - - - Returns a current snapshot, a data structure - showing which transaction IDs are now in-progress. - Only top-level transaction IDs are included in the snapshot; - subtransaction IDs are not shown; see - for details. - - - - - - - pg_snapshot_xip - - pg_snapshot_xip ( pg_snapshot ) - setof xid8 - - - Returns the set of in-progress transaction IDs contained in a snapshot. - - - - - - - pg_snapshot_xmax - - pg_snapshot_xmax ( pg_snapshot ) - xid8 - - - Returns the xmax of a snapshot. - - - - - - - pg_snapshot_xmin - - pg_snapshot_xmin ( pg_snapshot ) - xid8 - - - Returns the xmin of a snapshot. - - - - - - - pg_visible_in_snapshot - - pg_visible_in_snapshot ( xid8, pg_snapshot ) - boolean - - - Is the given transaction ID visible according - to this snapshot (that is, was it completed before the snapshot was - taken)? Note that this function will not give the correct answer for - a subtransaction ID (subxid); see for - details. - - - - - - - pg_get_multixact_members - - pg_get_multixact_members ( multixid xid ) - setof record - ( xid xid, - mode text ) - - - Returns the transaction ID and lock mode for each member of the - specified multixact ID. The lock modes forupd, - fornokeyupd, sh, and - keysh correspond to the row-level locks - FOR UPDATE, FOR NO KEY UPDATE, - FOR SHARE, and FOR KEY SHARE, - respectively, as described in . Two - additional modes are specific to multixacts: - nokeyupd, used by updates that do not modify key - columns, and upd, used by updates or deletes that - modify key columns. - - - - -
- - - The internal transaction ID type xid is 32 bits wide and - wraps around every 4 billion transactions. However, - the functions shown in , except - age, mxid_age, and - pg_get_multixact_members, use a - 64-bit type xid8 that does not wrap around during the life - of an installation and can be converted to xid by casting if - required; see for details. - The data type pg_snapshot stores information about - transaction ID visibility at a particular moment in time. Its components - are described in . - pg_snapshot's textual representation is - xmin:xmax:xip_list. - For example 10:20:10,14,15 means - xmin=10, xmax=20, xip_list=10, 14, 15. - - - - Snapshot Components - - - - Name - Description - - - - - - xmin - - Lowest transaction ID that was still active. All transaction IDs - less than xmin are either committed and visible, - or rolled back and dead. - - - - - xmax - - One past the highest completed transaction ID. All transaction IDs - greater than or equal to xmax had not yet - completed as of the time of the snapshot, and thus are invisible. - - - - - xip_list - - Transactions in progress at the time of the snapshot. A transaction - ID that is xmin <= X < - xmax and not in this list was already completed at the time - of the snapshot, and thus is either visible or dead according to its - commit status. This list does not include the transaction IDs of - subtransactions (subxids). - - - - -
- - - In releases of PostgreSQL before 13 there was - no xid8 type, so variants of these functions were provided - that used bigint to represent a 64-bit XID, with a - correspondingly distinct snapshot data type txid_snapshot. - These older functions have txid in their names. They - are still supported for backward compatibility, but may be removed from a - future release. See . - - - - Deprecated Transaction ID and Snapshot Information Functions - - - - - Function - - - Description - - - - - - - - - - txid_current - - txid_current () - bigint - - - See pg_current_xact_id(). - - - - - - - txid_current_if_assigned - - txid_current_if_assigned () - bigint - - - See pg_current_xact_id_if_assigned(). - - - - - - - txid_current_snapshot - - txid_current_snapshot () - txid_snapshot - - - See pg_current_snapshot(). - - - - - - - txid_snapshot_xip - - txid_snapshot_xip ( txid_snapshot ) - setof bigint - - - See pg_snapshot_xip(). - - - - - - - txid_snapshot_xmax - - txid_snapshot_xmax ( txid_snapshot ) - bigint - - - See pg_snapshot_xmax(). - - - - - - - txid_snapshot_xmin - - txid_snapshot_xmin ( txid_snapshot ) - bigint - - - See pg_snapshot_xmin(). - - - - - - - txid_visible_in_snapshot - - txid_visible_in_snapshot ( bigint, txid_snapshot ) - boolean - - - See pg_visible_in_snapshot(). - - - - - - - txid_status - - txid_status ( bigint ) - text - - - See pg_xact_status(). - - - - -
- -
- - - Committed Transaction Information Functions - - - The functions shown in - provide information about when past transactions were committed. - They only provide useful data when the - configuration option is - enabled, and only for transactions that were committed after it was - enabled. Commit timestamp information is routinely removed during - vacuum. - - - - Committed Transaction Information Functions - - - - - Function - - - Description - - - - - - - - - pg_xact_commit_timestamp - - pg_xact_commit_timestamp ( xid ) - timestamp with time zone - - - Returns the commit timestamp of a transaction. - - - - - - - pg_xact_commit_timestamp_origin - - pg_xact_commit_timestamp_origin ( xid ) - record - ( timestamp timestamp with time zone, - roident oid) - - - Returns the commit timestamp and replication origin of a transaction. - - - - - - - pg_last_committed_xact - - pg_last_committed_xact () - record - ( xid xid, - timestamp timestamp with time zone, - roident oid ) - - - Returns the transaction ID, commit timestamp and replication origin - of the latest committed transaction. - - - - -
- -
- - - Control Data Functions - - - The functions shown in - print information initialized during initdb, such - as the catalog version. They also show information about write-ahead - logging and checkpoint processing. This information is cluster-wide, - not specific to any one database. These functions provide most of the same - information, from the same source, as the - application. - - - - Control Data Functions - - - - - Function - - - Description - - - - - - - - - pg_control_checkpoint - - pg_control_checkpoint () - record - - - Returns information about current checkpoint state, as shown in - . - - - - - - - pg_control_system - - pg_control_system () - record - - - Returns information about current control file state, as shown in - . - - - - - - - pg_control_init - - pg_control_init () - record - - - Returns information about cluster initialization state, as shown in - . - - - - - - - pg_control_recovery - - pg_control_recovery () - record - - - Returns information about recovery state, as shown in - . - - - - -
- - - <function>pg_control_checkpoint</function> Output Columns - - - - Column Name - Data Type - - - - - - - checkpoint_lsn - pg_lsn - - - - redo_lsn - pg_lsn - - - - redo_wal_file - text - - - - timeline_id - integer - - - - prev_timeline_id - integer - - - - full_page_writes - boolean - - - - next_xid - text - - - - next_oid - oid - - - - next_multixact_id - xid - - - - next_multi_offset - xid - - - - oldest_xid - xid - - - - oldest_xid_dbid - oid - - - - oldest_active_xid - xid - - - - oldest_multi_xid - xid - - - - oldest_multi_dbid - oid - - - - oldest_commit_ts_xid - xid - - - - newest_commit_ts_xid - xid - - - - checkpoint_time - timestamp with time zone - - - - -
- - - <function>pg_control_system</function> Output Columns - - - - Column Name - Data Type - - - - - - - pg_control_version - integer - - - - catalog_version_no - integer - - - - system_identifier - bigint - - - - pg_control_last_modified - timestamp with time zone - - - - -
- - - <function>pg_control_init</function> Output Columns - - - - Column Name - Data Type - - - - - - - max_data_alignment - integer - - - - database_block_size - integer - - - - blocks_per_segment - integer - - - - wal_block_size - integer - - - - bytes_per_wal_segment - integer - - - - max_identifier_length - integer - - - - max_index_columns - integer - - - - max_toast_chunk_size - integer - - - - large_object_chunk_size - integer - - - - float8_pass_by_value - boolean - - - - data_page_checksum_version - integer - - - - default_char_signedness - boolean - - - - -
- - - <function>pg_control_recovery</function> Output Columns - - - - Column Name - Data Type - - - - - - - min_recovery_end_lsn - pg_lsn - - - - min_recovery_end_timeline - integer - - - - backup_start_lsn - pg_lsn - - - - backup_end_lsn - pg_lsn - - - - end_of_backup_record_required - boolean - - - - -
- -
- - - Version Information Functions - - - The functions shown in - print version information. - - - - Version Information Functions - - - - - Function - - - Description - - - - - - - - - version - - version () - text - - - Returns a string describing the PostgreSQL - server's version. You can also get this information from - , or for a machine-readable - version use . Software - developers should use server_version_num (available - since 8.2) or instead of - parsing the text version. - - - - - - - unicode_version - - unicode_version () - text - - - Returns a string representing the version of Unicode used by - PostgreSQL. - - - - - - icu_unicode_version - - icu_unicode_version () - text - - - Returns a string representing the version of Unicode used by ICU, if - the server was built with ICU support; otherwise returns - NULL - - - -
- -
- - - WAL Summarization Information Functions - - - The functions shown in - print information about the status of WAL summarization. - See . - - - - WAL Summarization Information Functions - - - - - Function - - - Description - - - - - - - - - pg_available_wal_summaries - - pg_available_wal_summaries () - setof record - ( tli bigint, - start_lsn pg_lsn, - end_lsn pg_lsn ) - - - Returns information about the WAL summary files present in the - data directory, under pg_wal/summaries. - One row will be returned per WAL summary file. Each file summarizes - WAL on the indicated TLI within the indicated LSN range. This function - might be useful to determine whether enough WAL summaries are present - on the server to take an incremental backup based on some prior - backup whose start LSN is known. - - - - - - - pg_wal_summary_contents - - pg_wal_summary_contents ( tli bigint, start_lsn pg_lsn, end_lsn pg_lsn ) - setof record - ( relfilenode oid, - reltablespace oid, - reldatabase oid, - relforknumber smallint, - relblocknumber bigint, - is_limit_block boolean ) - - - Returns one information about the contents of a single WAL summary file - identified by TLI and starting and ending LSNs. Each row with - is_limit_block false indicates that the block - identified by the remaining output columns was modified by at least - one WAL record within the range of records summarized by this file. - Each row with is_limit_block true indicates either - that (a) the relation fork was truncated to the length given by - relblocknumber within the relevant range of WAL - records or (b) that the relation fork was created or dropped within - the relevant range of WAL records; in such cases, - relblocknumber will be zero. - - - - - - - pg_get_wal_summarizer_state - - pg_get_wal_summarizer_state () - record - ( summarized_tli bigint, - summarized_lsn pg_lsn, - pending_lsn pg_lsn, - summarizer_pid int ) - - - Returns information about the progress of the WAL summarizer. If the - WAL summarizer has never run since the instance was started, then - summarized_tli and summarized_lsn - will be 0 and 0/00000000 respectively; - otherwise, they will be the TLI and ending LSN of the last WAL summary - file written to disk. If the WAL summarizer is currently running, - pending_lsn will be the ending LSN of the last - record that it has consumed, which must always be greater than or - equal to summarized_lsn; if the WAL summarizer is - not running, it will be equal to summarized_lsn. - summarizer_pid is the PID of the WAL summarizer - process, if it is running, and otherwise NULL. - - - As a special exception, the WAL summarizer will refuse to generate - WAL summary files if run on WAL generated under - wal_level=minimal, since such summaries would be - unsafe to use as the basis for an incremental backup. In this case, - the fields above will continue to advance as if summaries were being - generated, but nothing will be written to disk. Once the summarizer - reaches WAL generated while wal_level was set - to replica or higher, it will resume writing - summaries to disk. - - - - -
- -
- -
- - - System Administration Functions - - - The functions described in this section are used to control and - monitor a PostgreSQL installation. - - - - Configuration Settings Functions - - - SET - - - - SHOW - - - - configuration - of the server - functions - - - - shows the functions - available to query and alter run-time configuration parameters. - - - - Configuration Settings Functions - - - - - Function - - - Description - - - Example(s) - - - - - - - - - current_setting - - current_setting ( setting_name text , missing_ok boolean ) - text - - - Returns the current value of the - setting setting_name. If there is no such - setting, current_setting throws an error - unless missing_ok is supplied and - is true (in which case NULL is returned). - This function corresponds to - the SQL command . - - - current_setting('datestyle') - ISO, MDY - - - - - - - set_config - - set_config ( - setting_name text, - new_value text, - is_local boolean ) - text - - - Sets the parameter setting_name - to new_value, and returns that value. - If is_local is true, the new - value will only apply during the current transaction. If you want the - new value to apply for the rest of the current session, - use false instead. This function corresponds to - the SQL command . - - - set_config accepts the NULL value for - new_value, but as settings cannot be null, it - is interpreted as a request to reset the setting to its default value. - - - set_config('log_statement_stats', 'off', false) - off - - - - -
- -
- - - Server Signaling Functions - - - signal - backend processes - - - - The functions shown in send control signals to - other server processes. Use of these functions is restricted to - superusers by default but access may be granted to others using - GRANT, with noted exceptions. - - - - Each of these functions returns true if - the signal was successfully sent and false - if sending the signal failed. - - - - Server Signaling Functions - - - - - Function - - - Description - - - - - - - - - pg_cancel_backend - - pg_cancel_backend ( pid integer ) - boolean - - - Cancels the current query of the session whose backend process has the - specified process ID. This is also allowed if the - calling role is a member of the role whose backend is being canceled or - the calling role has privileges of pg_signal_backend, - however only superusers can cancel superuser backends. - As an exception, roles with privileges of - pg_signal_autovacuum_worker are permitted to - cancel autovacuum worker processes, which are otherwise considered - superuser backends. - - - - - - - pg_log_backend_memory_contexts - - pg_log_backend_memory_contexts ( pid integer ) - boolean - - - Requests to log the memory contexts of the backend with the - specified process ID. This function can send the request to - backends and auxiliary processes except logger. These memory contexts - will be logged at - LOG message level. They will appear in - the server log based on the log configuration set - (see for more information), - but will not be sent to the client regardless of - . - - - - - - - pg_reload_conf - - pg_reload_conf () - boolean - - - Causes all processes of the PostgreSQL - server to reload their configuration files. (This is initiated by - sending a SIGHUP signal to the postmaster - process, which in turn sends SIGHUP to each - of its children.) You can use the - pg_file_settings, - pg_hba_file_rules and - pg_ident_file_mappings views - to check the configuration files for possible errors, before reloading. - - - - - - - pg_rotate_logfile - - pg_rotate_logfile () - boolean - - - Signals the log-file manager to switch to a new output file - immediately. This works only when the built-in log collector is - running, since otherwise there is no log-file manager subprocess. - - - - - - - pg_terminate_backend - - pg_terminate_backend ( pid integer, timeout bigint DEFAULT 0 ) - boolean - - - Terminates the session whose backend process has the - specified process ID. This is also allowed if the calling role - is a member of the role whose backend is being terminated or the - calling role has privileges of pg_signal_backend, - however only superusers can terminate superuser backends. - As an exception, roles with privileges of - pg_signal_autovacuum_worker are permitted to - terminate autovacuum worker processes, which are otherwise considered - superuser backends. - - - If timeout is not specified or zero, this - function returns true whether the process actually - terminates or not, indicating only that the sending of the signal was - successful. If the timeout is specified (in - milliseconds) and greater than zero, the function waits until the - process is actually terminated or until the given time has passed. If - the process is terminated, the function - returns true. On timeout, a warning is emitted and - false is returned. - - - - -
- - - pg_cancel_backend and pg_terminate_backend - send signals (SIGINT or SIGTERM - respectively) to backend processes identified by process ID. - The process ID of an active backend can be found from - the pid column of the - pg_stat_activity view, or by listing the - postgres processes on the server (using - ps on Unix or the Task - Manager on Windows). - The role of an active backend can be found from the - usename column of the - pg_stat_activity view. - - - - pg_log_backend_memory_contexts can be used - to log the memory contexts of a backend process. For example: - -postgres=# SELECT pg_log_backend_memory_contexts(pg_backend_pid()); - pg_log_backend_memory_contexts --------------------------------- - t -(1 row) - -One message for each memory context will be logged. For example: - -LOG: logging memory contexts of PID 10377 -STATEMENT: SELECT pg_log_backend_memory_contexts(pg_backend_pid()); -LOG: level: 1; TopMemoryContext: 80800 total in 6 blocks; 14432 free (5 chunks); 66368 used -LOG: level: 2; pgstat TabStatusArray lookup hash table: 8192 total in 1 blocks; 1408 free (0 chunks); 6784 used -LOG: level: 2; TopTransactionContext: 8192 total in 1 blocks; 7720 free (1 chunks); 472 used -LOG: level: 2; RowDescriptionContext: 8192 total in 1 blocks; 6880 free (0 chunks); 1312 used -LOG: level: 2; MessageContext: 16384 total in 2 blocks; 5152 free (0 chunks); 11232 used -LOG: level: 2; Operator class cache: 8192 total in 1 blocks; 512 free (0 chunks); 7680 used -LOG: level: 2; smgr relation table: 16384 total in 2 blocks; 4544 free (3 chunks); 11840 used -LOG: level: 2; TransactionAbortContext: 32768 total in 1 blocks; 32504 free (0 chunks); 264 used -... -LOG: level: 2; ErrorContext: 8192 total in 1 blocks; 7928 free (3 chunks); 264 used -LOG: Grand total: 1651920 bytes in 201 blocks; 622360 free (88 chunks); 1029560 used - - If there are more than 100 child contexts under the same parent, the first - 100 child contexts are logged, along with a summary of the remaining contexts. - Note that frequent calls to this function could incur significant overhead, - because it may generate a large number of log messages. - - -
- - - Backup Control Functions - - - backup - - - - The functions shown in assist in making on-line backups. - These functions cannot be executed during recovery (except - pg_backup_start, - pg_backup_stop, - and pg_wal_lsn_diff). - - - - For details about proper usage of these functions, see - . - - - - Backup Control Functions - - - - - Function - - - Description - - - - - - - - - pg_create_restore_point - - pg_create_restore_point ( name text ) - pg_lsn - - - Creates a named marker record in the write-ahead log that can later be - used as a recovery target, and returns the corresponding write-ahead - log location. The given name can then be used with - to specify the point up to - which recovery will proceed. Avoid creating multiple restore points - with the same name, since recovery will stop at the first one whose - name matches the recovery target. - - - This function is restricted to superusers by default, but other users - can be granted EXECUTE to run the function. - - - - - - - pg_current_wal_flush_lsn - - pg_current_wal_flush_lsn () - pg_lsn - - - Returns the current write-ahead log flush location (see notes below). - - - - - - - pg_current_wal_insert_lsn - - pg_current_wal_insert_lsn () - pg_lsn - - - Returns the current write-ahead log insert location (see notes below). - - - - - - - pg_current_wal_lsn - - pg_current_wal_lsn () - pg_lsn - - - Returns the current write-ahead log write location (see notes below). - - - - - - - pg_backup_start - - pg_backup_start ( - label text - , fast boolean - ) - pg_lsn - - - Prepares the server to begin an on-line backup. The only required - parameter is an arbitrary user-defined label for the backup. - (Typically this would be the name under which the backup dump file - will be stored.) - If the optional second parameter is given as true, - it specifies executing pg_backup_start as quickly - as possible. This forces a fast checkpoint which will cause a - spike in I/O operations, slowing any concurrently executing queries. - - - This function is restricted to superusers by default, but other users - can be granted EXECUTE to run the function. - - - - - - - pg_backup_stop - - pg_backup_stop ( - wait_for_archive boolean - ) - record - ( lsn pg_lsn, - labelfile text, - spcmapfile text ) - - - Finishes performing an on-line backup. The desired contents of the - backup label file and the tablespace map file are returned as part of - the result of the function and must be written to files in the - backup area. These files must not be written to the live data directory - (doing so will cause PostgreSQL to fail to restart in the event of a - crash). - - - There is an optional parameter of type boolean. - If false, the function will return immediately after the backup is - completed, without waiting for WAL to be archived. This behavior is - only useful with backup software that independently monitors WAL - archiving. Otherwise, WAL required to make the backup consistent might - be missing and make the backup useless. By default or when this - parameter is true, pg_backup_stop will wait for - WAL to be archived when archiving is enabled. (On a standby, this - means that it will wait only when archive_mode = - always. If write activity on the primary is low, - it may be useful to run pg_switch_wal on the - primary in order to trigger an immediate segment switch.) - - - When executed on a primary, this function also creates a backup - history file in the write-ahead log archive area. The history file - includes the label given to pg_backup_start, the - starting and ending write-ahead log locations for the backup, and the - starting and ending times of the backup. After recording the ending - location, the current write-ahead log insertion point is automatically - advanced to the next write-ahead log file, so that the ending - write-ahead log file can be archived immediately to complete the - backup. - - - The result of the function is a single record. - The lsn column holds the backup's ending - write-ahead log location (which again can be ignored). The second - column returns the contents of the backup label file, and the third - column returns the contents of the tablespace map file. These must be - stored as part of the backup and are required as part of the restore - process. - - - This function is restricted to superusers by default, but other users - can be granted EXECUTE to run the function. - - - - - - - pg_switch_wal - - pg_switch_wal () - pg_lsn - - - Forces the server to switch to a new write-ahead log file, which - allows the current file to be archived (assuming you are using - continuous archiving). The result is the ending write-ahead log - location plus 1 within the just-completed write-ahead log file. If - there has been no write-ahead log activity since the last write-ahead - log switch, pg_switch_wal does nothing and - returns the start location of the write-ahead log file currently in - use. - - - This function is restricted to superusers by default, but other users - can be granted EXECUTE to run the function. - - - - - - - pg_walfile_name - - pg_walfile_name ( lsn pg_lsn ) - text - - - Converts a write-ahead log location to the name of the WAL file - holding that location. - - - - - - - pg_walfile_name_offset - - pg_walfile_name_offset ( lsn pg_lsn ) - record - ( file_name text, - file_offset integer ) - - - Converts a write-ahead log location to a WAL file name and byte offset - within that file. - - - - - - - pg_split_walfile_name - - pg_split_walfile_name ( file_name text ) - record - ( segment_number numeric, - timeline_id bigint ) - - - Extracts the sequence number and timeline ID from a WAL file - name. - - - - - - - pg_wal_lsn_diff - - pg_wal_lsn_diff ( lsn1 pg_lsn, lsn2 pg_lsn ) - numeric - - - Calculates the difference in bytes (lsn1 - lsn2) between two write-ahead log - locations. This can be used - with pg_stat_replication or some of the - functions shown in to - get the replication lag. - - - - -
- - - pg_current_wal_lsn displays the current write-ahead - log write location in the same format used by the above functions. - Similarly, pg_current_wal_insert_lsn displays the - current write-ahead log insertion location - and pg_current_wal_flush_lsn displays the current - write-ahead log flush location. The insertion location is - the logical end of the write-ahead log at any instant, - while the write location is the end of what has actually been written out - from the server's internal buffers, and the flush location is the last - location known to be written to durable storage. The write location is the - end of what can be examined from outside the server, and is usually what - you want if you are interested in archiving partially-complete write-ahead - log files. The insertion and flush locations are made available primarily - for server debugging purposes. These are all read-only operations and do - not require superuser permissions. - - - - You can use pg_walfile_name_offset to extract the - corresponding write-ahead log file name and byte offset from - a pg_lsn value. For example: - -postgres=# SELECT * FROM pg_walfile_name_offset((pg_backup_stop()).lsn); - file_name | file_offset ---------------------------+------------- - 00000001000000000000000D | 4039624 -(1 row) - - Similarly, pg_walfile_name extracts just the write-ahead log file name. - - - - pg_split_walfile_name is useful to compute a - LSN from a file offset and WAL file name, for example: - -postgres=# \set file_name '000000010000000100C000AB' -postgres=# \set offset 256 -postgres=# SELECT '0/0'::pg_lsn + pd.segment_number * ps.setting::int + :offset AS lsn - FROM pg_split_walfile_name(:'file_name') pd, - pg_show_all_settings() ps - WHERE ps.name = 'wal_segment_size'; - lsn ---------------- - C001/AB000100 -(1 row) - - - -
- - - Recovery Control Functions - - - The functions shown in provide information - about the current status of a standby server. - These functions may be executed both during recovery and in normal running. - - - - Recovery Information Functions - - - - - Function - - - Description - - - - - - - - - pg_is_in_recovery - - pg_is_in_recovery () - boolean - - - Returns true if recovery is still in progress. - - - - - - - pg_last_wal_receive_lsn - - pg_last_wal_receive_lsn () - pg_lsn - - - Returns the last write-ahead log location that has been received and - synced to disk by streaming replication. While streaming replication - is in progress this will increase monotonically. If recovery has - completed then this will remain static at the location of the last WAL - record received and synced to disk during recovery. If streaming - replication is disabled, or if it has not yet started, the function - returns NULL. - - - - - - - pg_last_wal_replay_lsn - - pg_last_wal_replay_lsn () - pg_lsn - - - Returns the last write-ahead log location that has been replayed - during recovery. If recovery is still in progress this will increase - monotonically. If recovery has completed then this will remain - static at the location of the last WAL record applied during recovery. - When the server has been started normally without recovery, the - function returns NULL. - - - - - - - pg_last_xact_replay_timestamp - - pg_last_xact_replay_timestamp () - timestamp with time zone - - - Returns the time stamp of the last transaction replayed during - recovery. This is the time at which the commit or abort WAL record - for that transaction was generated on the primary. If no transactions - have been replayed during recovery, the function - returns NULL. Otherwise, if recovery is still in - progress this will increase monotonically. If recovery has completed - then this will remain static at the time of the last transaction - applied during recovery. When the server has been started normally - without recovery, the function returns NULL. - - - - - - - pg_get_wal_resource_managers - - pg_get_wal_resource_managers () - setof record - ( rm_id integer, - rm_name text, - rm_builtin boolean ) - - - Returns the currently-loaded WAL resource managers in the system. The - column rm_builtin indicates whether it's a - built-in resource manager, or a custom resource manager loaded by an - extension. - - - - -
- - - The functions shown in control the progress of recovery. - These functions may be executed only during recovery. - - - - Recovery Control Functions - - - - - Function - - - Description - - - - - - - - - pg_is_wal_replay_paused - - pg_is_wal_replay_paused () - boolean - - - Returns true if recovery pause is requested. - - - - - - - pg_get_wal_replay_pause_state - - pg_get_wal_replay_pause_state () - text - - - Returns recovery pause state. The return values are - not paused if pause is not requested, - pause requested if pause is requested but recovery is - not yet paused, and paused if the recovery is - actually paused. - - - - - - - pg_promote - - pg_promote ( wait boolean DEFAULT true, wait_seconds integer DEFAULT 60 ) - boolean - - - Promotes a standby server to primary status. - With wait set to true (the - default), the function waits until promotion is completed - or wait_seconds seconds have passed, and - returns true if promotion is successful - and false otherwise. - If wait is set to false, the - function returns true immediately after sending a - SIGUSR1 signal to the postmaster to trigger - promotion. - - - This function is restricted to superusers by default, but other users - can be granted EXECUTE to run the function. - - - - - - - pg_wal_replay_pause - - pg_wal_replay_pause () - void - - - Request to pause recovery. A request doesn't mean that recovery stops - right away. If you want a guarantee that recovery is actually paused, - you need to check for the recovery pause state returned by - pg_get_wal_replay_pause_state(). Note that - pg_is_wal_replay_paused() returns whether a request - is made. While recovery is paused, no further database changes are applied. - If hot standby is active, all new queries will see the same consistent - snapshot of the database, and no further query conflicts will be generated - until recovery is resumed. - - - This function is restricted to superusers by default, but other users - can be granted EXECUTE to run the function. - - - - - - - pg_wal_replay_resume - - pg_wal_replay_resume () - void - - - Restarts recovery if it was paused. - - - This function is restricted to superusers by default, but other users - can be granted EXECUTE to run the function. - - - - -
- - - pg_wal_replay_pause and - pg_wal_replay_resume cannot be executed while - a promotion is ongoing. If a promotion is triggered while recovery - is paused, the paused state ends and promotion continues. - - - - If streaming replication is disabled, the paused state may continue - indefinitely without a problem. If streaming replication is in - progress then WAL records will continue to be received, which will - eventually fill available disk space, depending upon the duration of - the pause, the rate of WAL generation and available disk space. - - -
- - - Snapshot Synchronization Functions - - - PostgreSQL allows database sessions to synchronize their - snapshots. A snapshot determines which data is visible to the - transaction that is using the snapshot. Synchronized snapshots are - necessary when two or more sessions need to see identical content in the - database. If two sessions just start their transactions independently, - there is always a possibility that some third transaction commits - between the executions of the two START TRANSACTION commands, - so that one session sees the effects of that transaction and the other - does not. - - - - To solve this problem, PostgreSQL allows a transaction to - export the snapshot it is using. As long as the exporting - transaction remains open, other transactions can import its - snapshot, and thereby be guaranteed that they see exactly the same view - of the database that the first transaction sees. But note that any - database changes made by any one of these transactions remain invisible - to the other transactions, as is usual for changes made by uncommitted - transactions. So the transactions are synchronized with respect to - pre-existing data, but act normally for changes they make themselves. - - - - Snapshots are exported with the pg_export_snapshot function, - shown in , and - imported with the command. - - - - Snapshot Synchronization Functions - - - - - Function - - - Description - - - - - - - - - pg_export_snapshot - - pg_export_snapshot () - text - - - Saves the transaction's current snapshot and returns - a text string identifying the snapshot. This string must - be passed (outside the database) to clients that want to import the - snapshot. The snapshot is available for import only until the end of - the transaction that exported it. - - - A transaction can export more than one snapshot, if needed. Note that - doing so is only useful in READ COMMITTED - transactions, since in REPEATABLE READ and higher - isolation levels, transactions use the same snapshot throughout their - lifetime. Once a transaction has exported any snapshots, it cannot be - prepared with . - - - - - - pg_log_standby_snapshot - - pg_log_standby_snapshot () - pg_lsn - - - Take a snapshot of running transactions and write it to WAL, without - having to wait for bgwriter or checkpointer to log one. This is useful - for logical decoding on standby, as logical slot creation has to wait - until such a record is replayed on the standby. - - - - -
- -
- - - Replication Management Functions - - - The functions shown - in are for - controlling and interacting with replication features. - See , - , and - - for information about the underlying features. - Use of functions for replication origin is only allowed to the - superuser by default, but may be allowed to other users by using the - GRANT command. - Use of functions for replication slots is restricted to superusers - and users having REPLICATION privilege. - - - - Many of these functions have equivalent commands in the replication - protocol; see . - - - - The functions described in - , - , and - - are also relevant for replication. - - - - Replication Management Functions - - - - - Function - - - Description - - - - - - - - - pg_create_physical_replication_slot - - pg_create_physical_replication_slot ( slot_name name , immediately_reserve boolean, temporary boolean ) - record - ( slot_name name, - lsn pg_lsn ) - - - Creates a new physical replication slot named - slot_name. The name cannot be - pg_conflict_detection as it is reserved for the - conflict detection slot. The optional second parameter, - when true, specifies that the LSN for this - replication slot be reserved immediately; otherwise - the LSN is reserved on first connection from a streaming - replication client. Streaming changes from a physical slot is only - possible with the streaming-replication protocol — - see . The optional third - parameter, temporary, when set to true, specifies that - the slot should not be permanently stored to disk and is only meant - for use by the current session. Temporary slots are also - released upon any error. This function corresponds - to the replication protocol command CREATE_REPLICATION_SLOT - ... PHYSICAL. - - - - - - - pg_drop_replication_slot - - pg_drop_replication_slot ( slot_name name ) - void - - - Drops the physical or logical replication slot - named slot_name. Same as replication protocol - command DROP_REPLICATION_SLOT. - - - - - - - pg_create_logical_replication_slot - - pg_create_logical_replication_slot ( slot_name name, plugin name , temporary boolean, twophase boolean, failover boolean ) - record - ( slot_name name, - lsn pg_lsn ) - - - Creates a new logical (decoding) replication slot named - slot_name using the output plugin - plugin. The name cannot be - pg_conflict_detection as it is reserved for - the conflict detection slot. The optional third - parameter, temporary, when set to true, specifies that - the slot should not be permanently stored to disk and is only meant - for use by the current session. Temporary slots are also - released upon any error. The optional fourth parameter, - twophase, when set to true, specifies - that the decoding of prepared transactions is enabled for this - slot. The optional fifth parameter, - failover, when set to true, - specifies that this slot is enabled to be synced to the - standbys so that logical replication can be resumed after - failover. A call to this function has the same effect as - the replication protocol command - CREATE_REPLICATION_SLOT ... LOGICAL. - - - - - - - pg_copy_physical_replication_slot - - pg_copy_physical_replication_slot ( src_slot_name name, dst_slot_name name , temporary boolean ) - record - ( slot_name name, - lsn pg_lsn ) - - - Copies an existing physical replication slot named src_slot_name - to a physical replication slot named dst_slot_name. - The new slot name cannot be pg_conflict_detection, - as it is reserved for the conflict detection. - The copied physical slot starts to reserve WAL from the same LSN as the - source slot. - temporary is optional. If temporary - is omitted, the same value as the source slot is used. Copy of an - invalidated slot is not allowed. - - - - - - - pg_copy_logical_replication_slot - - pg_copy_logical_replication_slot ( src_slot_name name, dst_slot_name name , temporary boolean , plugin name ) - record - ( slot_name name, - lsn pg_lsn ) - - - Copies an existing logical replication slot - named src_slot_name to a logical replication - slot named dst_slot_name, optionally changing - the output plugin and persistence. The new slot name cannot be - pg_conflict_detection as it is reserved for - the conflict detection. The copied logical slot starts from the same - LSN as the source logical slot. Both - temporary and plugin are - optional; if they are omitted, the values of the source slot are used. - The failover option of the source logical slot - is not copied and is set to false by default. This - is to avoid the risk of being unable to continue logical replication - after failover to standby where the slot is being synchronized. Copy of - an invalidated slot is not allowed. - - - - - - - pg_logical_slot_get_changes - - pg_logical_slot_get_changes ( slot_name name, upto_lsn pg_lsn, upto_nchanges integer, VARIADIC options text[] ) - setof record - ( lsn pg_lsn, - xid xid, - data text ) - - - Returns changes in the slot slot_name, starting - from the point from which changes have been consumed last. If - upto_lsn - and upto_nchanges are NULL, - logical decoding will continue until end of WAL. If - upto_lsn is non-NULL, decoding will include only - those transactions which commit prior to the specified LSN. If - upto_nchanges is non-NULL, decoding will - stop when the number of rows produced by decoding exceeds - the specified value. Note, however, that the actual number of - rows returned may be larger, since this limit is only checked after - adding the rows produced when decoding each new transaction commit. - If the specified slot is a logical failover slot then the function will - not return until all physical slots specified in - synchronized_standby_slots - have confirmed WAL receipt. - - - - - - - pg_logical_slot_peek_changes - - pg_logical_slot_peek_changes ( slot_name name, upto_lsn pg_lsn, upto_nchanges integer, VARIADIC options text[] ) - setof record - ( lsn pg_lsn, - xid xid, - data text ) - - - Behaves just like - the pg_logical_slot_get_changes() function, - except that changes are not consumed; that is, they will be returned - again on future calls. - - - - - - - pg_logical_slot_get_binary_changes - - pg_logical_slot_get_binary_changes ( slot_name name, upto_lsn pg_lsn, upto_nchanges integer, VARIADIC options text[] ) - setof record - ( lsn pg_lsn, - xid xid, - data bytea ) - - - Behaves just like - the pg_logical_slot_get_changes() function, - except that changes are returned as bytea. - - - - - - - pg_logical_slot_peek_binary_changes - - pg_logical_slot_peek_binary_changes ( slot_name name, upto_lsn pg_lsn, upto_nchanges integer, VARIADIC options text[] ) - setof record - ( lsn pg_lsn, - xid xid, - data bytea ) - - - Behaves just like - the pg_logical_slot_peek_changes() function, - except that changes are returned as bytea. - - - - - - - pg_replication_slot_advance - - pg_replication_slot_advance ( slot_name name, upto_lsn pg_lsn ) - record - ( slot_name name, - end_lsn pg_lsn ) - - - Advances the current confirmed position of a replication slot named - slot_name. The slot will not be moved backwards, - and it will not be moved beyond the current insert location. Returns - the name of the slot and the actual position that it was advanced to. - The updated slot position information is written out at the next - checkpoint if any advancing is done. So in the event of a crash, the - slot may return to an earlier position. If the specified slot is a - logical failover slot then the function will not return until all - physical slots specified in - synchronized_standby_slots - have confirmed WAL receipt. - - - - - - - pg_replication_origin_create - - pg_replication_origin_create ( node_name text ) - oid - - - Creates a replication origin with the given external - name, and returns the internal ID assigned to it. - The name must be no longer than 512 bytes. - - - - - - - pg_replication_origin_drop - - pg_replication_origin_drop ( node_name text ) - void - - - Deletes a previously-created replication origin, including any - associated replay progress. - - - - - - - pg_replication_origin_oid - - pg_replication_origin_oid ( node_name text ) - oid - - - Looks up a replication origin by name and returns the internal ID. If - no such replication origin is found, NULL is - returned. - - - - - - - pg_replication_origin_session_setup - - pg_replication_origin_session_setup ( node_name text ) - void - - - Marks the current session as replaying from the given - origin, allowing replay progress to be tracked. - Can only be used if no origin is currently selected. - Use pg_replication_origin_session_reset to undo. - - - - - - - pg_replication_origin_session_reset - - pg_replication_origin_session_reset () - void - - - Cancels the effects - of pg_replication_origin_session_setup(). - - - - - - - pg_replication_origin_session_is_setup - - pg_replication_origin_session_is_setup () - boolean - - - Returns true if a replication origin has been selected in the - current session. - - - - - - - pg_replication_origin_session_progress - - pg_replication_origin_session_progress ( flush boolean ) - pg_lsn - - - Returns the replay location for the replication origin selected in - the current session. The parameter flush - determines whether the corresponding local transaction will be - guaranteed to have been flushed to disk or not. - - - - - - - pg_replication_origin_xact_setup - - pg_replication_origin_xact_setup ( origin_lsn pg_lsn, origin_timestamp timestamp with time zone ) - void - - - Marks the current transaction as replaying a transaction that has - committed at the given LSN and timestamp. Can - only be called when a replication origin has been selected - using pg_replication_origin_session_setup. - - - - - - - pg_replication_origin_xact_reset - - pg_replication_origin_xact_reset () - void - - - Cancels the effects of - pg_replication_origin_xact_setup(). - - - - - - - pg_replication_origin_advance - - pg_replication_origin_advance ( node_name text, lsn pg_lsn ) - void - - - Sets replication progress for the given node to the given - location. This is primarily useful for setting up the initial - location, or setting a new location after configuration changes and - similar. Be aware that careless use of this function can lead to - inconsistently replicated data. - - - - - - - pg_replication_origin_progress - - pg_replication_origin_progress ( node_name text, flush boolean ) - pg_lsn - - - Returns the replay location for the given replication origin. The - parameter flush determines whether the - corresponding local transaction will be guaranteed to have been - flushed to disk or not. - - - - - - - pg_logical_emit_message - - pg_logical_emit_message ( transactional boolean, prefix text, content text , flush boolean DEFAULT false ) - pg_lsn - - - pg_logical_emit_message ( transactional boolean, prefix text, content bytea , flush boolean DEFAULT false ) - pg_lsn - - - Emits a logical decoding message. This can be used to pass generic - messages to logical decoding plugins through - WAL. The transactional parameter specifies if - the message should be part of the current transaction, or if it should - be written immediately and decoded as soon as the logical decoder - reads the record. The prefix parameter is a - textual prefix that can be used by logical decoding plugins to easily - recognize messages that are interesting for them. - The content parameter is the content of the - message, given either in text or binary form. - The flush parameter (default set to - false) controls if the message is immediately - flushed to WAL or not. flush has no effect - with transactional, as the message's WAL - record is flushed along with its transaction. - - - - - - - pg_sync_replication_slots - - pg_sync_replication_slots () - void - - - Synchronize the logical failover replication slots from the primary - server to the standby server. This function can only be executed on the - standby server. Temporary synced slots, if any, cannot be used for - logical decoding and must be dropped after promotion. See - for details. - Note that this function is primarily intended for testing and - debugging purposes and should be used with caution. Additionally, - this function cannot be executed if - - sync_replication_slots is enabled and the slotsync - worker is already running to perform the synchronization of slots. - - - - - If, after executing the function, - - hot_standby_feedback is disabled on - the standby or the physical slot configured in - - primary_slot_name is - removed, then it is possible that the necessary rows of the - synchronized slot will be removed by the VACUUM process on the primary - server, resulting in the synchronized slot becoming invalidated. - - - - - - - -
- -
- - - Database Object Management Functions - - - The functions shown in calculate - the disk space usage of database objects, or assist in presentation - or understanding of usage results. bigint results - are measured in bytes. If an OID that does - not represent an existing object is passed to one of these - functions, NULL is returned. - - - - Database Object Size Functions - - - - - Function - - - Description - - - - - - - - - pg_column_size - - pg_column_size ( "any" ) - integer - - - Shows the number of bytes used to store any individual data value. If - applied directly to a table column value, this reflects any - compression that was done. - - - - - - - pg_column_compression - - pg_column_compression ( "any" ) - text - - - Shows the compression algorithm that was used to compress - an individual variable-length value. Returns NULL - if the value is not compressed. - - - - - - - pg_column_toast_chunk_id - - pg_column_toast_chunk_id ( "any" ) - oid - - - Shows the chunk_id of an on-disk - TOASTed value. Returns NULL - if the value is un-TOASTed or not on-disk. See - for more information about - TOAST. - - - - - - - pg_database_size - - pg_database_size ( name ) - bigint - - - pg_database_size ( oid ) - bigint - - - Computes the total disk space used by the database with the specified - name or OID. To use this function, you must - have CONNECT privilege on the specified database - (which is granted by default) or have privileges of - the pg_read_all_stats role. - - - - - - - pg_indexes_size - - pg_indexes_size ( regclass ) - bigint - - - Computes the total disk space used by indexes attached to the - specified table. - - - - - - - pg_relation_size - - pg_relation_size ( relation regclass , fork text ) - bigint - - - Computes the disk space used by one fork of the - specified relation. (Note that for most purposes it is more - convenient to use the higher-level - functions pg_total_relation_size - or pg_table_size, which sum the sizes of all - forks.) With one argument, this returns the size of the main data - fork of the relation. The second argument can be provided to specify - which fork to examine: - - - - main returns the size of the main - data fork of the relation. - - - - - fsm returns the size of the Free Space Map - (see ) associated with the relation. - - - - - vm returns the size of the Visibility Map - (see ) associated with the relation. - - - - - init returns the size of the initialization - fork, if any, associated with the relation. - - - - - - - - - - pg_size_bytes - - pg_size_bytes ( text ) - bigint - - - Converts a size in human-readable format (as returned - by pg_size_pretty) into bytes. Valid units are - bytes, B, kB, - MB, GB, TB, - and PB. - - - - - - - pg_size_pretty - - pg_size_pretty ( bigint ) - text - - - pg_size_pretty ( numeric ) - text - - - Converts a size in bytes into a more easily human-readable format with - size units (bytes, kB, MB, GB, TB, or PB as appropriate). Note that the - units are powers of 2 rather than powers of 10, so 1kB is 1024 bytes, - 1MB is 10242 = 1048576 bytes, and so on. - - - - - - - pg_table_size - - pg_table_size ( regclass ) - bigint - - - Computes the disk space used by the specified table, excluding indexes - (but including its TOAST table if any, free space map, and visibility - map). - - - - - - - pg_tablespace_size - - pg_tablespace_size ( name ) - bigint - - - pg_tablespace_size ( oid ) - bigint - - - Computes the total disk space used in the tablespace with the - specified name or OID. To use this function, you must - have CREATE privilege on the specified tablespace - or have privileges of the pg_read_all_stats role, - unless it is the default tablespace for the current database. - - - - - - - pg_total_relation_size - - pg_total_relation_size ( regclass ) - bigint - - - Computes the total disk space used by the specified table, including - all indexes and TOAST data. The result is - equivalent to pg_table_size - + pg_indexes_size. - - - - -
- - - The functions above that operate on tables or indexes accept a - regclass argument, which is simply the OID of the table or index - in the pg_class system catalog. You do not have to look up - the OID by hand, however, since the regclass data type's input - converter will do the work for you. See - for details. - - - - The functions shown in assist - in identifying the specific disk files associated with database objects. - - - - Database Object Location Functions - - - - - Function - - - Description - - - - - - - - - pg_relation_filenode - - pg_relation_filenode ( relation regclass ) - oid - - - Returns the filenode number currently assigned to the - specified relation. The filenode is the base component of the file - name(s) used for the relation (see - for more information). - For most relations the result is the same as - pg_class.relfilenode, - but for certain system catalogs relfilenode - is zero and this function must be used to get the correct value. The - function returns NULL if passed a relation that does not have storage, - such as a view. - - - - - - - pg_relation_filepath - - pg_relation_filepath ( relation regclass ) - text - - - Returns the entire file path name (relative to the database cluster's - data directory, PGDATA) of the relation. - - - - - - - pg_filenode_relation - - pg_filenode_relation ( tablespace oid, filenode oid ) - regclass - - - Returns a relation's OID given the tablespace OID and filenode it is - stored under. This is essentially the inverse mapping of - pg_relation_filepath. For a relation in the - database's default tablespace, the tablespace can be specified as zero. - Returns NULL if no relation in the current database - is associated with the given values. - - - - -
- - - lists functions used to manage - collations. - - - - Collation Management Functions - - - - - Function - - - Description - - - - - - - - - pg_collation_actual_version - - pg_collation_actual_version ( oid ) - text - - - Returns the actual version of the collation object as it is currently - installed in the operating system. If this is different from the - value in - pg_collation.collversion, - then objects depending on the collation might need to be rebuilt. See - also . - - - - - - - pg_database_collation_actual_version - - pg_database_collation_actual_version ( oid ) - text - - - Returns the actual version of the database's collation as it is currently - installed in the operating system. If this is different from the - value in - pg_database.datcollversion, - then objects depending on the collation might need to be rebuilt. See - also . - - - - - - - pg_import_system_collations - - pg_import_system_collations ( schema regnamespace ) - integer - - - Adds collations to the system - catalog pg_collation based on all the locales - it finds in the operating system. This is - what initdb uses; see - for more details. If additional - locales are installed into the operating system later on, this - function can be run again to add collations for the new locales. - Locales that match existing entries - in pg_collation will be skipped. (But - collation objects based on locales that are no longer present in the - operating system are not removed by this function.) - The schema parameter would typically - be pg_catalog, but that is not a requirement; the - collations could be installed into some other schema as well. The - function returns the number of new collation objects it created. - Use of this function is restricted to superusers. - - - - -
- - - lists functions used to - manipulate statistics. - These functions cannot be executed during recovery. - - - Changes made by these statistics manipulation functions are likely to be - overwritten by autovacuum (or manual - VACUUM or ANALYZE) and should be - considered temporary. - - - - - - Database Object Statistics Manipulation Functions - - - - - Function - - - Description - - - - - - - - - pg_restore_relation_stats - - pg_restore_relation_stats ( - VARIADIC kwargs "any" ) - boolean - - - Updates table-level statistics. Ordinarily, these statistics are - collected automatically or updated as a part of or , so it's not - necessary to call this function. However, it is useful after a - restore to enable the optimizer to choose better plans if - ANALYZE has not been run yet. - - - The tracked statistics may change from version to version, so - arguments are passed as pairs of argname - and argvalue in the form: - -SELECT pg_restore_relation_stats( - 'arg1name', 'arg1value'::arg1type, - 'arg2name', 'arg2value'::arg2type, - 'arg3name', 'arg3value'::arg3type); - - - - For example, to set the relpages and - reltuples values for the table - mytable: - -SELECT pg_restore_relation_stats( - 'schemaname', 'myschema', - 'relname', 'mytable', - 'relpages', 173::integer, - 'reltuples', 10000::real); - - - - The arguments schemaname and - relname are required, and specify the table. Other - arguments are the names and values of statistics corresponding to - certain columns in pg_class. - The currently-supported relation statistics are - relpages with a value of type - integer, reltuples with a value of - type real, relallvisible with a value - of type integer, and relallfrozen - with a value of type integer. - - - Additionally, this function accepts argument name - version of type integer, which - specifies the server version from which the statistics originated. - This is anticipated to be helpful in porting statistics from older - versions of PostgreSQL. - - - Minor errors are reported as a WARNING and - ignored, and remaining statistics will still be restored. If all - specified statistics are successfully restored, returns - true, otherwise false. - - - The caller must have the MAINTAIN privilege on the - table or be the owner of the database. - - - - - - - - - pg_clear_relation_stats - - pg_clear_relation_stats ( schemaname text, relname text ) - void - - - Clears table-level statistics for the given relation, as though the - table was newly created. - - - The caller must have the MAINTAIN privilege on the - table or be the owner of the database. - - - - - - - - pg_restore_attribute_stats - - pg_restore_attribute_stats ( - VARIADIC kwargs "any" ) - boolean - - - Creates or updates column-level statistics. Ordinarily, these - statistics are collected automatically or updated as a part of or , so it's not - necessary to call this function. However, it is useful after a - restore to enable the optimizer to choose better plans if - ANALYZE has not been run yet. - - - The tracked statistics may change from version to version, so - arguments are passed as pairs of argname - and argvalue in the form: - -SELECT pg_restore_attribute_stats( - 'arg1name', 'arg1value'::arg1type, - 'arg2name', 'arg2value'::arg2type, - 'arg3name', 'arg3value'::arg3type); - - - - For example, to set the avg_width and - null_frac values for the attribute - col1 of the table - mytable: - -SELECT pg_restore_attribute_stats( - 'schemaname', 'myschema', - 'relname', 'mytable', - 'attname', 'col1', - 'inherited', false, - 'avg_width', 125::integer, - 'null_frac', 0.5::real); - - - - The required arguments are schemaname and - relname with a value of type text - which specify the table; either attname with a - value of type text or attnum with a - value of type smallint, which specifies the column; and - inherited, which specifies whether the statistics - include values from child tables. Other arguments are the names and - values of statistics corresponding to columns in pg_stats. - - - Additionally, this function accepts argument name - version of type integer, which - specifies the server version from which the statistics originated. - This is anticipated to be helpful in porting statistics from older - versions of PostgreSQL. - - - Minor errors are reported as a WARNING and - ignored, and remaining statistics will still be restored. If all - specified statistics are successfully restored, returns - true, otherwise false. - - - The caller must have the MAINTAIN privilege on the - table or be the owner of the database. - - - - - - - - - pg_clear_attribute_stats - - pg_clear_attribute_stats ( - schemaname text, - relname text, - attname text, - inherited boolean ) - void - - - Clears column-level statistics for the given relation and - attribute, as though the table was newly created. - - - The caller must have the MAINTAIN privilege on - the table or be the owner of the database. - - - - - -
- - - lists functions that provide - information about the structure of partitioned tables. - - - - Partitioning Information Functions - - - - - Function - - - Description - - - - - - - - - pg_partition_tree - - pg_partition_tree ( regclass ) - setof record - ( relid regclass, - parentrelid regclass, - isleaf boolean, - level integer ) - - - Lists the tables or indexes in the partition tree of the - given partitioned table or partitioned index, with one row for each - partition. Information provided includes the OID of the partition, - the OID of its immediate parent, a boolean value telling if the - partition is a leaf, and an integer telling its level in the hierarchy. - The level value is 0 for the input table or index, 1 for its - immediate child partitions, 2 for their partitions, and so on. - Returns no rows if the relation does not exist or is not a partition - or partitioned table. - - - - - - - pg_partition_ancestors - - pg_partition_ancestors ( regclass ) - setof regclass - - - Lists the ancestor relations of the given partition, - including the relation itself. Returns no rows if the relation - does not exist or is not a partition or partitioned table. - - - - - - - pg_partition_root - - pg_partition_root ( regclass ) - regclass - - - Returns the top-most parent of the partition tree to which the given - relation belongs. Returns NULL if the relation - does not exist or is not a partition or partitioned table. - - - - -
- - - For example, to check the total size of the data contained in a - partitioned table measurement, one could use the - following query: - -SELECT pg_size_pretty(sum(pg_relation_size(relid))) AS total_size - FROM pg_partition_tree('measurement'); - - - -
- - - Index Maintenance Functions - - - shows the functions - available for index maintenance tasks. (Note that these maintenance - tasks are normally done automatically by autovacuum; use of these - functions is only required in special cases.) - These functions cannot be executed during recovery. - Use of these functions is restricted to superusers and the owner - of the given index. - - - - Index Maintenance Functions - - - - - Function - - - Description - - - - - - - - - brin_summarize_new_values - - brin_summarize_new_values ( index regclass ) - integer - - - Scans the specified BRIN index to find page ranges in the base table - that are not currently summarized by the index; for any such range it - creates a new summary index tuple by scanning those table pages. - Returns the number of new page range summaries that were inserted - into the index. - - - - - - - brin_summarize_range - - brin_summarize_range ( index regclass, blockNumber bigint ) - integer - - - Summarizes the page range covering the given block, if not already - summarized. This is - like brin_summarize_new_values except that it - only processes the page range that covers the given table block number. - - - - - - - brin_desummarize_range - - brin_desummarize_range ( index regclass, blockNumber bigint ) - void - - - Removes the BRIN index tuple that summarizes the page range covering - the given table block, if there is one. - - - - - - - gin_clean_pending_list - - gin_clean_pending_list ( index regclass ) - bigint - - - Cleans up the pending list of the specified GIN index - by moving entries in it, in bulk, to the main GIN data structure. - Returns the number of pages removed from the pending list. - If the argument is a GIN index built with - the fastupdate option disabled, no cleanup happens - and the result is zero, because the index doesn't have a pending list. - See and - for details about the pending list and fastupdate - option. - - - - -
- -
- - - Generic File Access Functions - - - The functions shown in provide native access to - files on the machine hosting the server. Only files within the - database cluster directory and the log_directory can be - accessed, unless the user is a superuser or is granted the role - pg_read_server_files. Use a relative path for files in - the cluster directory, and a path matching the log_directory - configuration setting for log files. - - - - Note that granting users the EXECUTE privilege on - pg_read_file(), or related functions, allows them the - ability to read any file on the server that the database server process can - read; these functions bypass all in-database privilege checks. This means - that, for example, a user with such access is able to read the contents of - the pg_authid table where authentication - information is stored, as well as read any table data in the database. - Therefore, granting access to these functions should be carefully - considered. - - - - When granting privilege on these functions, note that the table entries - showing optional parameters are mostly implemented as several physical - functions with different parameter lists. Privilege must be granted - separately on each such function, if it is to be - used. psql's \df command - can be useful to check what the actual function signatures are. - - - - Some of these functions take an optional missing_ok - parameter, which specifies the behavior when the file or directory does - not exist. If true, the function - returns NULL or an empty result set, as appropriate. - If false, an error is raised. (Failure conditions - other than file not found are reported as errors in any - case.) The default is false. - - - - Generic File Access Functions - - - - - Function - - - Description - - - - - - - - - pg_ls_dir - - pg_ls_dir ( dirname text , missing_ok boolean, include_dot_dirs boolean ) - setof text - - - Returns the names of all files (and directories and other special - files) in the specified - directory. The include_dot_dirs parameter - indicates whether . and .. are to be - included in the result set; the default is to exclude them. Including - them can be useful when missing_ok - is true, to distinguish an empty directory from a - non-existent directory. - - - This function is restricted to superusers by default, but other users - can be granted EXECUTE to run the function. - - - - - - - pg_ls_logdir - - pg_ls_logdir () - setof record - ( name text, - size bigint, - modification timestamp with time zone ) - - - Returns the name, size, and last modification time (mtime) of each - ordinary file in the server's log directory. Filenames beginning with - a dot, directories, and other special files are excluded. - - - This function is restricted to superusers and roles with privileges of - the pg_monitor role by default, but other users can - be granted EXECUTE to run the function. - - - - - - - pg_ls_waldir - - pg_ls_waldir () - setof record - ( name text, - size bigint, - modification timestamp with time zone ) - - - Returns the name, size, and last modification time (mtime) of each - ordinary file in the server's write-ahead log (WAL) directory. - Filenames beginning with a dot, directories, and other special files - are excluded. - - - This function is restricted to superusers and roles with privileges of - the pg_monitor role by default, but other users can - be granted EXECUTE to run the function. - - - - - - - pg_ls_logicalmapdir - - pg_ls_logicalmapdir () - setof record - ( name text, - size bigint, - modification timestamp with time zone ) - - - Returns the name, size, and last modification time (mtime) of each - ordinary file in the server's pg_logical/mappings - directory. Filenames beginning with a dot, directories, and other - special files are excluded. - - - This function is restricted to superusers and members of - the pg_monitor role by default, but other users can - be granted EXECUTE to run the function. - - - - - - - pg_ls_logicalsnapdir - - pg_ls_logicalsnapdir () - setof record - ( name text, - size bigint, - modification timestamp with time zone ) - - - Returns the name, size, and last modification time (mtime) of each - ordinary file in the server's pg_logical/snapshots - directory. Filenames beginning with a dot, directories, and other - special files are excluded. - - - This function is restricted to superusers and members of - the pg_monitor role by default, but other users can - be granted EXECUTE to run the function. - - - - - - - pg_ls_replslotdir - - pg_ls_replslotdir ( slot_name text ) - setof record - ( name text, - size bigint, - modification timestamp with time zone ) - - - Returns the name, size, and last modification time (mtime) of each - ordinary file in the server's pg_replslot/slot_name - directory, where slot_name is the name of the - replication slot provided as input of the function. Filenames beginning - with a dot, directories, and other special files are excluded. - - - This function is restricted to superusers and members of - the pg_monitor role by default, but other users can - be granted EXECUTE to run the function. - - - - - - - pg_ls_summariesdir - - pg_ls_summariesdir () - setof record - ( name text, - size bigint, - modification timestamp with time zone ) - - - Returns the name, size, and last modification time (mtime) of each - ordinary file in the server's WAL summaries directory - (pg_wal/summaries). Filenames beginning - with a dot, directories, and other special files are excluded. - - - This function is restricted to superusers and members of - the pg_monitor role by default, but other users can - be granted EXECUTE to run the function. - - - - - - - pg_ls_archive_statusdir - - pg_ls_archive_statusdir () - setof record - ( name text, - size bigint, - modification timestamp with time zone ) - - - Returns the name, size, and last modification time (mtime) of each - ordinary file in the server's WAL archive status directory - (pg_wal/archive_status). Filenames beginning - with a dot, directories, and other special files are excluded. - - - This function is restricted to superusers and members of - the pg_monitor role by default, but other users can - be granted EXECUTE to run the function. - - - - - - - - pg_ls_tmpdir - - pg_ls_tmpdir ( tablespace oid ) - setof record - ( name text, - size bigint, - modification timestamp with time zone ) - - - Returns the name, size, and last modification time (mtime) of each - ordinary file in the temporary file directory for the - specified tablespace. - If tablespace is not provided, - the pg_default tablespace is examined. Filenames - beginning with a dot, directories, and other special files are - excluded. - - - This function is restricted to superusers and members of - the pg_monitor role by default, but other users can - be granted EXECUTE to run the function. - - - - - - - pg_read_file - - pg_read_file ( filename text , offset bigint, length bigint , missing_ok boolean ) - text - - - Returns all or part of a text file, starting at the - given byte offset, returning at - most length bytes (less if the end of file is - reached first). If offset is negative, it is - relative to the end of the file. If offset - and length are omitted, the entire file is - returned. The bytes read from the file are interpreted as a string in - the database's encoding; an error is thrown if they are not valid in - that encoding. - - - This function is restricted to superusers by default, but other users - can be granted EXECUTE to run the function. - - - - - - - pg_read_binary_file - - pg_read_binary_file ( filename text , offset bigint, length bigint , missing_ok boolean ) - bytea - - - Returns all or part of a file. This function is identical to - pg_read_file except that it can read arbitrary - binary data, returning the result as bytea - not text; accordingly, no encoding checks are performed. - - - This function is restricted to superusers by default, but other users - can be granted EXECUTE to run the function. - - - In combination with the convert_from function, - this function can be used to read a text file in a specified encoding - and convert to the database's encoding: - -SELECT convert_from(pg_read_binary_file('file_in_utf8.txt'), 'UTF8'); - - - - - - - - pg_stat_file - - pg_stat_file ( filename text , missing_ok boolean ) - record - ( size bigint, - access timestamp with time zone, - modification timestamp with time zone, - change timestamp with time zone, - creation timestamp with time zone, - isdir boolean ) - - - Returns a record containing the file's size, last access time stamp, - last modification time stamp, last file status change time stamp (Unix - platforms only), file creation time stamp (Windows only), and a flag - indicating if it is a directory. - - - This function is restricted to superusers by default, but other users - can be granted EXECUTE to run the function. - - - - - -
- -
- - - Advisory Lock Functions - - - The functions shown in - manage advisory locks. For details about proper use of these functions, - see . - - - - All these functions are intended to be used to lock application-defined - resources, which can be identified either by a single 64-bit key value or - two 32-bit key values (note that these two key spaces do not overlap). - If another session already holds a conflicting lock on the same resource - identifier, the functions will either wait until the resource becomes - available, or return a false result, as appropriate for - the function. - Locks can be either shared or exclusive: a shared lock does not conflict - with other shared locks on the same resource, only with exclusive locks. - Locks can be taken at session level (so that they are held until released - or the session ends) or at transaction level (so that they are held until - the current transaction ends; there is no provision for manual release). - Multiple session-level lock requests stack, so that if the same resource - identifier is locked three times there must then be three unlock requests - to release the resource in advance of session end. - - - - Advisory Lock Functions - - - - - Function - - - Description - - - - - - - - - pg_advisory_lock - - pg_advisory_lock ( key bigint ) - void - - - pg_advisory_lock ( key1 integer, key2 integer ) - void - - - Obtains an exclusive session-level advisory lock, waiting if necessary. - - - - - - - pg_advisory_lock_shared - - pg_advisory_lock_shared ( key bigint ) - void - - - pg_advisory_lock_shared ( key1 integer, key2 integer ) - void - - - Obtains a shared session-level advisory lock, waiting if necessary. - - - - - - - pg_advisory_unlock - - pg_advisory_unlock ( key bigint ) - boolean - - - pg_advisory_unlock ( key1 integer, key2 integer ) - boolean - - - Releases a previously-acquired exclusive session-level advisory lock. - Returns true if the lock is successfully released. - If the lock was not held, false is returned, and in - addition, an SQL warning will be reported by the server. - - - - - - - pg_advisory_unlock_all - - pg_advisory_unlock_all () - void - - - Releases all session-level advisory locks held by the current session. - (This function is implicitly invoked at session end, even if the - client disconnects ungracefully.) - - - - - - - pg_advisory_unlock_shared - - pg_advisory_unlock_shared ( key bigint ) - boolean - - - pg_advisory_unlock_shared ( key1 integer, key2 integer ) - boolean - - - Releases a previously-acquired shared session-level advisory lock. - Returns true if the lock is successfully released. - If the lock was not held, false is returned, and in - addition, an SQL warning will be reported by the server. - - - - - - - pg_advisory_xact_lock - - pg_advisory_xact_lock ( key bigint ) - void - - - pg_advisory_xact_lock ( key1 integer, key2 integer ) - void - - - Obtains an exclusive transaction-level advisory lock, waiting if - necessary. - - - - - - - pg_advisory_xact_lock_shared - - pg_advisory_xact_lock_shared ( key bigint ) - void - - - pg_advisory_xact_lock_shared ( key1 integer, key2 integer ) - void - - - Obtains a shared transaction-level advisory lock, waiting if - necessary. - - - - - - - pg_try_advisory_lock - - pg_try_advisory_lock ( key bigint ) - boolean - - - pg_try_advisory_lock ( key1 integer, key2 integer ) - boolean - - - Obtains an exclusive session-level advisory lock if available. - This will either obtain the lock immediately and - return true, or return false - without waiting if the lock cannot be acquired immediately. - - - - - - - pg_try_advisory_lock_shared - - pg_try_advisory_lock_shared ( key bigint ) - boolean - - - pg_try_advisory_lock_shared ( key1 integer, key2 integer ) - boolean - - - Obtains a shared session-level advisory lock if available. - This will either obtain the lock immediately and - return true, or return false - without waiting if the lock cannot be acquired immediately. - - - - - - - pg_try_advisory_xact_lock - - pg_try_advisory_xact_lock ( key bigint ) - boolean - - - pg_try_advisory_xact_lock ( key1 integer, key2 integer ) - boolean - - - Obtains an exclusive transaction-level advisory lock if available. - This will either obtain the lock immediately and - return true, or return false - without waiting if the lock cannot be acquired immediately. - - - - - - - pg_try_advisory_xact_lock_shared - - pg_try_advisory_xact_lock_shared ( key bigint ) - boolean - - - pg_try_advisory_xact_lock_shared ( key1 integer, key2 integer ) - boolean - - - Obtains a shared transaction-level advisory lock if available. - This will either obtain the lock immediately and - return true, or return false - without waiting if the lock cannot be acquired immediately. - - - - -
- -
- -
- - - Trigger Functions - - - While many uses of triggers involve user-written trigger functions, - PostgreSQL provides a few built-in trigger - functions that can be used directly in user-defined triggers. These - are summarized in . - (Additional built-in trigger functions exist, which implement foreign - key constraints and deferred index constraints. Those are not documented - here since users need not use them directly.) - - - - For more information about creating triggers, see - . - - - - Built-In Trigger Functions - - - - - Function - - - Description - - - Example Usage - - - - - - - - - suppress_redundant_updates_trigger - - suppress_redundant_updates_trigger ( ) - trigger - - - Suppresses do-nothing update operations. See below for details. - - - CREATE TRIGGER ... suppress_redundant_updates_trigger() - - - - - - - tsvector_update_trigger - - tsvector_update_trigger ( ) - trigger - - - Automatically updates a tsvector column from associated - plain-text document column(s). The text search configuration to use - is specified by name as a trigger argument. See - for details. - - - CREATE TRIGGER ... tsvector_update_trigger(tsvcol, 'pg_catalog.swedish', title, body) - - - - - - - tsvector_update_trigger_column - - tsvector_update_trigger_column ( ) - trigger - - - Automatically updates a tsvector column from associated - plain-text document column(s). The text search configuration to use - is taken from a regconfig column of the table. See - for details. - - - CREATE TRIGGER ... tsvector_update_trigger_column(tsvcol, tsconfigcol, title, body) - - - - -
- - - The suppress_redundant_updates_trigger function, - when applied as a row-level BEFORE UPDATE trigger, - will prevent any update that does not actually change the data in the - row from taking place. This overrides the normal behavior which always - performs a physical row update - regardless of whether or not the data has changed. (This normal behavior - makes updates run faster, since no checking is required, and is also - useful in certain cases.) - - - - Ideally, you should avoid running updates that don't actually - change the data in the record. Redundant updates can cost considerable - unnecessary time, especially if there are lots of indexes to alter, - and space in dead rows that will eventually have to be vacuumed. - However, detecting such situations in client code is not - always easy, or even possible, and writing expressions to detect - them can be error-prone. An alternative is to use - suppress_redundant_updates_trigger, which will skip - updates that don't change the data. You should use this with care, - however. The trigger takes a small but non-trivial time for each record, - so if most of the records affected by updates do actually change, - use of this trigger will make updates run slower on average. - - - - The suppress_redundant_updates_trigger function can be - added to a table like this: - -CREATE TRIGGER z_min_update -BEFORE UPDATE ON tablename -FOR EACH ROW EXECUTE FUNCTION suppress_redundant_updates_trigger(); - - In most cases, you need to fire this trigger last for each row, so that - it does not override other triggers that might wish to alter the row. - Bearing in mind that triggers fire in name order, you would therefore - choose a trigger name that comes after the name of any other trigger - you might have on the table. (Hence the z prefix in the - example.) - -
- - - Event Trigger Functions - - - PostgreSQL provides these helper functions - to retrieve information from event triggers. - - - - For more information about event triggers, - see . - - - - Capturing Changes at Command End - - - pg_event_trigger_ddl_commands - - - -pg_event_trigger_ddl_commands () setof record - - - - pg_event_trigger_ddl_commands returns a list of - DDL commands executed by each user action, - when invoked in a function attached to a - ddl_command_end event trigger. If called in any other - context, an error is raised. - pg_event_trigger_ddl_commands returns one row for each - base command executed; some commands that are a single SQL sentence - may return more than one row. This function returns the following - columns: - - - - - - Name - Type - Description - - - - - - classid - oid - OID of catalog the object belongs in - - - objid - oid - OID of the object itself - - - objsubid - integer - Sub-object ID (e.g., attribute number for a column) - - - command_tag - text - Command tag - - - object_type - text - Type of the object - - - schema_name - text - - Name of the schema the object belongs in, if any; otherwise NULL. - No quoting is applied. - - - - object_identity - text - - Text rendering of the object identity, schema-qualified. Each - identifier included in the identity is quoted if necessary. - - - - in_extension - boolean - True if the command is part of an extension script - - - command - pg_ddl_command - - A complete representation of the command, in internal format. - This cannot be output directly, but it can be passed to other - functions to obtain different pieces of information about the - command. - - - - - - - - - - Processing Objects Dropped by a DDL Command - - - pg_event_trigger_dropped_objects - - - -pg_event_trigger_dropped_objects () setof record - - - - pg_event_trigger_dropped_objects returns a list of all objects - dropped by the command in whose sql_drop event it is called. - If called in any other context, an error is raised. - This function returns the following columns: - - - - - - Name - Type - Description - - - - - - classid - oid - OID of catalog the object belonged in - - - objid - oid - OID of the object itself - - - objsubid - integer - Sub-object ID (e.g., attribute number for a column) - - - original - boolean - True if this was one of the root object(s) of the deletion - - - normal - boolean - - True if there was a normal dependency relationship - in the dependency graph leading to this object - - - - is_temporary - boolean - - True if this was a temporary object - - - - object_type - text - Type of the object - - - schema_name - text - - Name of the schema the object belonged in, if any; otherwise NULL. - No quoting is applied. - - - - object_name - text - - Name of the object, if the combination of schema and name can be - used as a unique identifier for the object; otherwise NULL. - No quoting is applied, and name is never schema-qualified. - - - - object_identity - text - - Text rendering of the object identity, schema-qualified. Each - identifier included in the identity is quoted if necessary. - - - - address_names - text[] - - An array that, together with object_type and - address_args, can be used by - the pg_get_object_address function to - recreate the object address in a remote server containing an - identically named object of the same kind. - - - - address_args - text[] - - Complement for address_names - - - - - - - - - The pg_event_trigger_dropped_objects function can be used - in an event trigger like this: - -CREATE FUNCTION test_event_trigger_for_drops() - RETURNS event_trigger LANGUAGE plpgsql AS $$ -DECLARE - obj record; -BEGIN - FOR obj IN SELECT * FROM pg_event_trigger_dropped_objects() - LOOP - RAISE NOTICE '% dropped object: % %.% %', - tg_tag, - obj.object_type, - obj.schema_name, - obj.object_name, - obj.object_identity; - END LOOP; -END; -$$; -CREATE EVENT TRIGGER test_event_trigger_for_drops - ON sql_drop - EXECUTE FUNCTION test_event_trigger_for_drops(); - - - - - - Handling a Table Rewrite Event - - - The functions shown in - - provide information about a table for which a - table_rewrite event has just been called. - If called in any other context, an error is raised. - - - - Table Rewrite Information Functions - - - - - Function - - - Description - - - - - - - - - pg_event_trigger_table_rewrite_oid - - pg_event_trigger_table_rewrite_oid () - oid - - - Returns the OID of the table about to be rewritten. - - - - - - - pg_event_trigger_table_rewrite_reason - - pg_event_trigger_table_rewrite_reason () - integer - - - Returns a code explaining the reason(s) for rewriting. The value is - a bitmap built from the following values: 1 - (the table has changed its persistence), 2 - (default value of a column has changed), 4 - (a column has a new data type) and 8 - (the table access method has changed). - - - - -
- - - These functions can be used in an event trigger like this: - -CREATE FUNCTION test_event_trigger_table_rewrite_oid() - RETURNS event_trigger - LANGUAGE plpgsql AS -$$ -BEGIN - RAISE NOTICE 'rewriting table % for reason %', - pg_event_trigger_table_rewrite_oid()::regclass, - pg_event_trigger_table_rewrite_reason(); -END; -$$; - -CREATE EVENT TRIGGER test_table_rewrite_oid - ON table_rewrite - EXECUTE FUNCTION test_event_trigger_table_rewrite_oid(); - - -
-
- - - Statistics Information Functions - - - function - statistics - - - - PostgreSQL provides a function to inspect complex - statistics defined using the CREATE STATISTICS command. - - - - Inspecting MCV Lists - - - pg_mcv_list_items - - - -pg_mcv_list_items ( pg_mcv_list ) setof record - - - - pg_mcv_list_items returns a set of records describing - all items stored in a multi-column MCV list. It - returns the following columns: - - - - - - Name - Type - Description - - - - - - index - integer - index of the item in the MCV list - - - values - text[] - values stored in the MCV item - - - nulls - boolean[] - flags identifying NULL values - - - frequency - double precision - frequency of this MCV item - - - base_frequency - double precision - base frequency of this MCV item - - - - - - - - The pg_mcv_list_items function can be used like this: - - -SELECT m.* FROM pg_statistic_ext join pg_statistic_ext_data on (oid = stxoid), - pg_mcv_list_items(stxdmcv) m WHERE stxname = 'stts'; - - - Values of the pg_mcv_list type can be obtained only from the - pg_statistic_ext_data.stxdmcv - column. - - - - - -
diff --git a/doc/src/sgml/func/allfiles.sgml b/doc/src/sgml/func/allfiles.sgml new file mode 100644 index 0000000000000..ce11ef1d5d8ed --- /dev/null +++ b/doc/src/sgml/func/allfiles.sgml @@ -0,0 +1,40 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/src/sgml/func/func-admin.sgml b/doc/src/sgml/func/func-admin.sgml new file mode 100644 index 0000000000000..446fdfe56f4f9 --- /dev/null +++ b/doc/src/sgml/func/func-admin.sgml @@ -0,0 +1,2962 @@ + + System Administration Functions + + + The functions described in this section are used to control and + monitor a PostgreSQL installation. + + + + Configuration Settings Functions + + + SET + + + + SHOW + + + + configuration + of the server + functions + + + + shows the functions + available to query and alter run-time configuration parameters. + + + + Configuration Settings Functions + + + + + Function + + + Description + + + Example(s) + + + + + + + + + current_setting + + current_setting ( setting_name text , missing_ok boolean ) + text + + + Returns the current value of the + setting setting_name. If there is no such + setting, current_setting throws an error + unless missing_ok is supplied and + is true (in which case NULL is returned). + This function corresponds to + the SQL command . + + + current_setting('datestyle') + ISO, MDY + + + + + + + set_config + + set_config ( + setting_name text, + new_value text, + is_local boolean ) + text + + + Sets the parameter setting_name + to new_value, and returns that value. + If is_local is true, the new + value will only apply during the current transaction. If you want the + new value to apply for the rest of the current session, + use false instead. This function corresponds to + the SQL command . + + + set_config accepts the NULL value for + new_value, but as settings cannot be null, it + is interpreted as a request to reset the setting to its default value. + + + set_config('log_statement_stats', 'off', false) + off + + + + +
+ +
+ + + Server Signaling Functions + + + signal + backend processes + + + + The functions shown in send control signals to + other server processes. Use of these functions is restricted to + superusers by default but access may be granted to others using + GRANT, with noted exceptions. + + + + Each of these functions returns true if + the signal was successfully sent and false + if sending the signal failed. + + + + Server Signaling Functions + + + + + Function + + + Description + + + + + + + + + pg_cancel_backend + + pg_cancel_backend ( pid integer ) + boolean + + + Cancels the current query of the session whose backend process has the + specified process ID. This is also allowed if the + calling role is a member of the role whose backend is being canceled or + the calling role has privileges of pg_signal_backend, + however only superusers can cancel superuser backends. + As an exception, roles with privileges of + pg_signal_autovacuum_worker are permitted to + cancel autovacuum worker processes, which are otherwise considered + superuser backends. + + + + + + + pg_log_backend_memory_contexts + + pg_log_backend_memory_contexts ( pid integer ) + boolean + + + Requests to log the memory contexts of the backend with the + specified process ID. This function can send the request to + backends and auxiliary processes except logger. These memory contexts + will be logged at + LOG message level. They will appear in + the server log based on the log configuration set + (see for more information), + but will not be sent to the client regardless of + . + + + + + + + pg_reload_conf + + pg_reload_conf () + boolean + + + Causes all processes of the PostgreSQL + server to reload their configuration files. (This is initiated by + sending a SIGHUP signal to the postmaster + process, which in turn sends SIGHUP to each + of its children.) You can use the + pg_file_settings, + pg_hba_file_rules and + pg_ident_file_mappings views + to check the configuration files for possible errors, before reloading. + + + + + + + pg_rotate_logfile + + pg_rotate_logfile () + boolean + + + Signals the log-file manager to switch to a new output file + immediately. This works only when the built-in log collector is + running, since otherwise there is no log-file manager subprocess. + + + + + + + pg_terminate_backend + + pg_terminate_backend ( pid integer, timeout bigint DEFAULT 0 ) + boolean + + + Terminates the session whose backend process has the + specified process ID. This is also allowed if the calling role + is a member of the role whose backend is being terminated or the + calling role has privileges of pg_signal_backend, + however only superusers can terminate superuser backends. + As an exception, roles with privileges of + pg_signal_autovacuum_worker are permitted to + terminate autovacuum worker processes, which are otherwise considered + superuser backends. + + + If timeout is not specified or zero, this + function returns true whether the process actually + terminates or not, indicating only that the sending of the signal was + successful. If the timeout is specified (in + milliseconds) and greater than zero, the function waits until the + process is actually terminated or until the given time has passed. If + the process is terminated, the function + returns true. On timeout, a warning is emitted and + false is returned. + + + + +
+ + + pg_cancel_backend and pg_terminate_backend + send signals (SIGINT or SIGTERM + respectively) to backend processes identified by process ID. + The process ID of an active backend can be found from + the pid column of the + pg_stat_activity view, or by listing the + postgres processes on the server (using + ps on Unix or the Task + Manager on Windows). + The role of an active backend can be found from the + usename column of the + pg_stat_activity view. + + + + pg_log_backend_memory_contexts can be used + to log the memory contexts of a backend process. For example: + +postgres=# SELECT pg_log_backend_memory_contexts(pg_backend_pid()); + pg_log_backend_memory_contexts +-------------------------------- + t +(1 row) + +One message for each memory context will be logged. For example: + +LOG: logging memory contexts of PID 10377 +STATEMENT: SELECT pg_log_backend_memory_contexts(pg_backend_pid()); +LOG: level: 1; TopMemoryContext: 80800 total in 6 blocks; 14432 free (5 chunks); 66368 used +LOG: level: 2; pgstat TabStatusArray lookup hash table: 8192 total in 1 blocks; 1408 free (0 chunks); 6784 used +LOG: level: 2; TopTransactionContext: 8192 total in 1 blocks; 7720 free (1 chunks); 472 used +LOG: level: 2; RowDescriptionContext: 8192 total in 1 blocks; 6880 free (0 chunks); 1312 used +LOG: level: 2; MessageContext: 16384 total in 2 blocks; 5152 free (0 chunks); 11232 used +LOG: level: 2; Operator class cache: 8192 total in 1 blocks; 512 free (0 chunks); 7680 used +LOG: level: 2; smgr relation table: 16384 total in 2 blocks; 4544 free (3 chunks); 11840 used +LOG: level: 2; TransactionAbortContext: 32768 total in 1 blocks; 32504 free (0 chunks); 264 used +... +LOG: level: 2; ErrorContext: 8192 total in 1 blocks; 7928 free (3 chunks); 264 used +LOG: Grand total: 1651920 bytes in 201 blocks; 622360 free (88 chunks); 1029560 used + + If there are more than 100 child contexts under the same parent, the first + 100 child contexts are logged, along with a summary of the remaining contexts. + Note that frequent calls to this function could incur significant overhead, + because it may generate a large number of log messages. + + +
+ + + Backup Control Functions + + + backup + + + + The functions shown in assist in making on-line backups. + These functions cannot be executed during recovery (except + pg_backup_start, + pg_backup_stop, + and pg_wal_lsn_diff). + + + + For details about proper usage of these functions, see + . + + + + Backup Control Functions + + + + + Function + + + Description + + + + + + + + + pg_create_restore_point + + pg_create_restore_point ( name text ) + pg_lsn + + + Creates a named marker record in the write-ahead log that can later be + used as a recovery target, and returns the corresponding write-ahead + log location. The given name can then be used with + to specify the point up to + which recovery will proceed. Avoid creating multiple restore points + with the same name, since recovery will stop at the first one whose + name matches the recovery target. + + + This function is restricted to superusers by default, but other users + can be granted EXECUTE to run the function. + + + + + + + pg_current_wal_flush_lsn + + pg_current_wal_flush_lsn () + pg_lsn + + + Returns the current write-ahead log flush location (see notes below). + + + + + + + pg_current_wal_insert_lsn + + pg_current_wal_insert_lsn () + pg_lsn + + + Returns the current write-ahead log insert location (see notes below). + + + + + + + pg_current_wal_lsn + + pg_current_wal_lsn () + pg_lsn + + + Returns the current write-ahead log write location (see notes below). + + + + + + + pg_backup_start + + pg_backup_start ( + label text + , fast boolean + ) + pg_lsn + + + Prepares the server to begin an on-line backup. The only required + parameter is an arbitrary user-defined label for the backup. + (Typically this would be the name under which the backup dump file + will be stored.) + If the optional second parameter is given as true, + it specifies executing pg_backup_start as quickly + as possible. This forces a fast checkpoint which will cause a + spike in I/O operations, slowing any concurrently executing queries. + + + This function is restricted to superusers by default, but other users + can be granted EXECUTE to run the function. + + + + + + + pg_backup_stop + + pg_backup_stop ( + wait_for_archive boolean + ) + record + ( lsn pg_lsn, + labelfile text, + spcmapfile text ) + + + Finishes performing an on-line backup. The desired contents of the + backup label file and the tablespace map file are returned as part of + the result of the function and must be written to files in the + backup area. These files must not be written to the live data directory + (doing so will cause PostgreSQL to fail to restart in the event of a + crash). + + + There is an optional parameter of type boolean. + If false, the function will return immediately after the backup is + completed, without waiting for WAL to be archived. This behavior is + only useful with backup software that independently monitors WAL + archiving. Otherwise, WAL required to make the backup consistent might + be missing and make the backup useless. By default or when this + parameter is true, pg_backup_stop will wait for + WAL to be archived when archiving is enabled. (On a standby, this + means that it will wait only when archive_mode = + always. If write activity on the primary is low, + it may be useful to run pg_switch_wal on the + primary in order to trigger an immediate segment switch.) + + + When executed on a primary, this function also creates a backup + history file in the write-ahead log archive area. The history file + includes the label given to pg_backup_start, the + starting and ending write-ahead log locations for the backup, and the + starting and ending times of the backup. After recording the ending + location, the current write-ahead log insertion point is automatically + advanced to the next write-ahead log file, so that the ending + write-ahead log file can be archived immediately to complete the + backup. + + + The result of the function is a single record. + The lsn column holds the backup's ending + write-ahead log location (which again can be ignored). The second + column returns the contents of the backup label file, and the third + column returns the contents of the tablespace map file. These must be + stored as part of the backup and are required as part of the restore + process. + + + This function is restricted to superusers by default, but other users + can be granted EXECUTE to run the function. + + + + + + + pg_switch_wal + + pg_switch_wal () + pg_lsn + + + Forces the server to switch to a new write-ahead log file, which + allows the current file to be archived (assuming you are using + continuous archiving). The result is the ending write-ahead log + location plus 1 within the just-completed write-ahead log file. If + there has been no write-ahead log activity since the last write-ahead + log switch, pg_switch_wal does nothing and + returns the start location of the write-ahead log file currently in + use. + + + This function is restricted to superusers by default, but other users + can be granted EXECUTE to run the function. + + + + + + + pg_walfile_name + + pg_walfile_name ( lsn pg_lsn ) + text + + + Converts a write-ahead log location to the name of the WAL file + holding that location. + + + + + + + pg_walfile_name_offset + + pg_walfile_name_offset ( lsn pg_lsn ) + record + ( file_name text, + file_offset integer ) + + + Converts a write-ahead log location to a WAL file name and byte offset + within that file. + + + + + + + pg_split_walfile_name + + pg_split_walfile_name ( file_name text ) + record + ( segment_number numeric, + timeline_id bigint ) + + + Extracts the sequence number and timeline ID from a WAL file + name. + + + + + + + pg_wal_lsn_diff + + pg_wal_lsn_diff ( lsn1 pg_lsn, lsn2 pg_lsn ) + numeric + + + Calculates the difference in bytes (lsn1 - lsn2) between two write-ahead log + locations. This can be used + with pg_stat_replication or some of the + functions shown in to + get the replication lag. + + + + +
+ + + pg_current_wal_lsn displays the current write-ahead + log write location in the same format used by the above functions. + Similarly, pg_current_wal_insert_lsn displays the + current write-ahead log insertion location + and pg_current_wal_flush_lsn displays the current + write-ahead log flush location. The insertion location is + the logical end of the write-ahead log at any instant, + while the write location is the end of what has actually been written out + from the server's internal buffers, and the flush location is the last + location known to be written to durable storage. The write location is the + end of what can be examined from outside the server, and is usually what + you want if you are interested in archiving partially-complete write-ahead + log files. The insertion and flush locations are made available primarily + for server debugging purposes. These are all read-only operations and do + not require superuser permissions. + + + + You can use pg_walfile_name_offset to extract the + corresponding write-ahead log file name and byte offset from + a pg_lsn value. For example: + +postgres=# SELECT * FROM pg_walfile_name_offset((pg_backup_stop()).lsn); + file_name | file_offset +--------------------------+------------- + 00000001000000000000000D | 4039624 +(1 row) + + Similarly, pg_walfile_name extracts just the write-ahead log file name. + + + + pg_split_walfile_name is useful to compute a + LSN from a file offset and WAL file name, for example: + +postgres=# \set file_name '000000010000000100C000AB' +postgres=# \set offset 256 +postgres=# SELECT '0/0'::pg_lsn + pd.segment_number * ps.setting::int + :offset AS lsn + FROM pg_split_walfile_name(:'file_name') pd, + pg_show_all_settings() ps + WHERE ps.name = 'wal_segment_size'; + lsn +--------------- + C001/AB000100 +(1 row) + + + +
+ + + Recovery Control Functions + + + The functions shown in provide information + about the current status of a standby server. + These functions may be executed both during recovery and in normal running. + + + + Recovery Information Functions + + + + + Function + + + Description + + + + + + + + + pg_is_in_recovery + + pg_is_in_recovery () + boolean + + + Returns true if recovery is still in progress. + + + + + + + pg_last_wal_receive_lsn + + pg_last_wal_receive_lsn () + pg_lsn + + + Returns the last write-ahead log location that has been received and + synced to disk by streaming replication. While streaming replication + is in progress this will increase monotonically. If recovery has + completed then this will remain static at the location of the last WAL + record received and synced to disk during recovery. If streaming + replication is disabled, or if it has not yet started, the function + returns NULL. + + + + + + + pg_last_wal_replay_lsn + + pg_last_wal_replay_lsn () + pg_lsn + + + Returns the last write-ahead log location that has been replayed + during recovery. If recovery is still in progress this will increase + monotonically. If recovery has completed then this will remain + static at the location of the last WAL record applied during recovery. + When the server has been started normally without recovery, the + function returns NULL. + + + + + + + pg_last_xact_replay_timestamp + + pg_last_xact_replay_timestamp () + timestamp with time zone + + + Returns the time stamp of the last transaction replayed during + recovery. This is the time at which the commit or abort WAL record + for that transaction was generated on the primary. If no transactions + have been replayed during recovery, the function + returns NULL. Otherwise, if recovery is still in + progress this will increase monotonically. If recovery has completed + then this will remain static at the time of the last transaction + applied during recovery. When the server has been started normally + without recovery, the function returns NULL. + + + + + + + pg_get_wal_resource_managers + + pg_get_wal_resource_managers () + setof record + ( rm_id integer, + rm_name text, + rm_builtin boolean ) + + + Returns the currently-loaded WAL resource managers in the system. The + column rm_builtin indicates whether it's a + built-in resource manager, or a custom resource manager loaded by an + extension. + + + + +
+ + + The functions shown in control the progress of recovery. + These functions may be executed only during recovery. + + + + Recovery Control Functions + + + + + Function + + + Description + + + + + + + + + pg_is_wal_replay_paused + + pg_is_wal_replay_paused () + boolean + + + Returns true if recovery pause is requested. + + + + + + + pg_get_wal_replay_pause_state + + pg_get_wal_replay_pause_state () + text + + + Returns recovery pause state. The return values are + not paused if pause is not requested, + pause requested if pause is requested but recovery is + not yet paused, and paused if the recovery is + actually paused. + + + + + + + pg_promote + + pg_promote ( wait boolean DEFAULT true, wait_seconds integer DEFAULT 60 ) + boolean + + + Promotes a standby server to primary status. + With wait set to true (the + default), the function waits until promotion is completed + or wait_seconds seconds have passed, and + returns true if promotion is successful + and false otherwise. + If wait is set to false, the + function returns true immediately after sending a + SIGUSR1 signal to the postmaster to trigger + promotion. + + + This function is restricted to superusers by default, but other users + can be granted EXECUTE to run the function. + + + + + + + pg_wal_replay_pause + + pg_wal_replay_pause () + void + + + Request to pause recovery. A request doesn't mean that recovery stops + right away. If you want a guarantee that recovery is actually paused, + you need to check for the recovery pause state returned by + pg_get_wal_replay_pause_state(). Note that + pg_is_wal_replay_paused() returns whether a request + is made. While recovery is paused, no further database changes are applied. + If hot standby is active, all new queries will see the same consistent + snapshot of the database, and no further query conflicts will be generated + until recovery is resumed. + + + This function is restricted to superusers by default, but other users + can be granted EXECUTE to run the function. + + + + + + + pg_wal_replay_resume + + pg_wal_replay_resume () + void + + + Restarts recovery if it was paused. + + + This function is restricted to superusers by default, but other users + can be granted EXECUTE to run the function. + + + + +
+ + + pg_wal_replay_pause and + pg_wal_replay_resume cannot be executed while + a promotion is ongoing. If a promotion is triggered while recovery + is paused, the paused state ends and promotion continues. + + + + If streaming replication is disabled, the paused state may continue + indefinitely without a problem. If streaming replication is in + progress then WAL records will continue to be received, which will + eventually fill available disk space, depending upon the duration of + the pause, the rate of WAL generation and available disk space. + + +
+ + + Snapshot Synchronization Functions + + + PostgreSQL allows database sessions to synchronize their + snapshots. A snapshot determines which data is visible to the + transaction that is using the snapshot. Synchronized snapshots are + necessary when two or more sessions need to see identical content in the + database. If two sessions just start their transactions independently, + there is always a possibility that some third transaction commits + between the executions of the two START TRANSACTION commands, + so that one session sees the effects of that transaction and the other + does not. + + + + To solve this problem, PostgreSQL allows a transaction to + export the snapshot it is using. As long as the exporting + transaction remains open, other transactions can import its + snapshot, and thereby be guaranteed that they see exactly the same view + of the database that the first transaction sees. But note that any + database changes made by any one of these transactions remain invisible + to the other transactions, as is usual for changes made by uncommitted + transactions. So the transactions are synchronized with respect to + pre-existing data, but act normally for changes they make themselves. + + + + Snapshots are exported with the pg_export_snapshot function, + shown in , and + imported with the command. + + + + Snapshot Synchronization Functions + + + + + Function + + + Description + + + + + + + + + pg_export_snapshot + + pg_export_snapshot () + text + + + Saves the transaction's current snapshot and returns + a text string identifying the snapshot. This string must + be passed (outside the database) to clients that want to import the + snapshot. The snapshot is available for import only until the end of + the transaction that exported it. + + + A transaction can export more than one snapshot, if needed. Note that + doing so is only useful in READ COMMITTED + transactions, since in REPEATABLE READ and higher + isolation levels, transactions use the same snapshot throughout their + lifetime. Once a transaction has exported any snapshots, it cannot be + prepared with . + + + + + + pg_log_standby_snapshot + + pg_log_standby_snapshot () + pg_lsn + + + Take a snapshot of running transactions and write it to WAL, without + having to wait for bgwriter or checkpointer to log one. This is useful + for logical decoding on standby, as logical slot creation has to wait + until such a record is replayed on the standby. + + + + +
+ +
+ + + Replication Management Functions + + + The functions shown + in are for + controlling and interacting with replication features. + See , + , and + + for information about the underlying features. + Use of functions for replication origin is only allowed to the + superuser by default, but may be allowed to other users by using the + GRANT command. + Use of functions for replication slots is restricted to superusers + and users having REPLICATION privilege. + + + + Many of these functions have equivalent commands in the replication + protocol; see . + + + + The functions described in + , + , and + + are also relevant for replication. + + + + Replication Management Functions + + + + + Function + + + Description + + + + + + + + + pg_create_physical_replication_slot + + pg_create_physical_replication_slot ( slot_name name , immediately_reserve boolean, temporary boolean ) + record + ( slot_name name, + lsn pg_lsn ) + + + Creates a new physical replication slot named + slot_name. The name cannot be + pg_conflict_detection as it is reserved for the + conflict detection slot. The optional second parameter, + when true, specifies that the LSN for this + replication slot be reserved immediately; otherwise + the LSN is reserved on first connection from a streaming + replication client. Streaming changes from a physical slot is only + possible with the streaming-replication protocol — + see . The optional third + parameter, temporary, when set to true, specifies that + the slot should not be permanently stored to disk and is only meant + for use by the current session. Temporary slots are also + released upon any error. This function corresponds + to the replication protocol command CREATE_REPLICATION_SLOT + ... PHYSICAL. + + + + + + + pg_drop_replication_slot + + pg_drop_replication_slot ( slot_name name ) + void + + + Drops the physical or logical replication slot + named slot_name. Same as replication protocol + command DROP_REPLICATION_SLOT. + + + + + + + pg_create_logical_replication_slot + + pg_create_logical_replication_slot ( slot_name name, plugin name , temporary boolean, twophase boolean, failover boolean ) + record + ( slot_name name, + lsn pg_lsn ) + + + Creates a new logical (decoding) replication slot named + slot_name using the output plugin + plugin. The name cannot be + pg_conflict_detection as it is reserved for + the conflict detection slot. The optional third + parameter, temporary, when set to true, specifies that + the slot should not be permanently stored to disk and is only meant + for use by the current session. Temporary slots are also + released upon any error. The optional fourth parameter, + twophase, when set to true, specifies + that the decoding of prepared transactions is enabled for this + slot. The optional fifth parameter, + failover, when set to true, + specifies that this slot is enabled to be synced to the + standbys so that logical replication can be resumed after + failover. A call to this function has the same effect as + the replication protocol command + CREATE_REPLICATION_SLOT ... LOGICAL. + + + + + + + pg_copy_physical_replication_slot + + pg_copy_physical_replication_slot ( src_slot_name name, dst_slot_name name , temporary boolean ) + record + ( slot_name name, + lsn pg_lsn ) + + + Copies an existing physical replication slot named src_slot_name + to a physical replication slot named dst_slot_name. + The new slot name cannot be pg_conflict_detection, + as it is reserved for the conflict detection. + The copied physical slot starts to reserve WAL from the same LSN as the + source slot. + temporary is optional. If temporary + is omitted, the same value as the source slot is used. Copy of an + invalidated slot is not allowed. + + + + + + + pg_copy_logical_replication_slot + + pg_copy_logical_replication_slot ( src_slot_name name, dst_slot_name name , temporary boolean , plugin name ) + record + ( slot_name name, + lsn pg_lsn ) + + + Copies an existing logical replication slot + named src_slot_name to a logical replication + slot named dst_slot_name, optionally changing + the output plugin and persistence. The new slot name cannot be + pg_conflict_detection as it is reserved for + the conflict detection. The copied logical slot starts from the same + LSN as the source logical slot. Both + temporary and plugin are + optional; if they are omitted, the values of the source slot are used. + The failover option of the source logical slot + is not copied and is set to false by default. This + is to avoid the risk of being unable to continue logical replication + after failover to standby where the slot is being synchronized. Copy of + an invalidated slot is not allowed. + + + + + + + pg_logical_slot_get_changes + + pg_logical_slot_get_changes ( slot_name name, upto_lsn pg_lsn, upto_nchanges integer, VARIADIC options text[] ) + setof record + ( lsn pg_lsn, + xid xid, + data text ) + + + Returns changes in the slot slot_name, starting + from the point from which changes have been consumed last. If + upto_lsn + and upto_nchanges are NULL, + logical decoding will continue until end of WAL. If + upto_lsn is non-NULL, decoding will include only + those transactions which commit prior to the specified LSN. If + upto_nchanges is non-NULL, decoding will + stop when the number of rows produced by decoding exceeds + the specified value. Note, however, that the actual number of + rows returned may be larger, since this limit is only checked after + adding the rows produced when decoding each new transaction commit. + If the specified slot is a logical failover slot then the function will + not return until all physical slots specified in + synchronized_standby_slots + have confirmed WAL receipt. + + + + + + + pg_logical_slot_peek_changes + + pg_logical_slot_peek_changes ( slot_name name, upto_lsn pg_lsn, upto_nchanges integer, VARIADIC options text[] ) + setof record + ( lsn pg_lsn, + xid xid, + data text ) + + + Behaves just like + the pg_logical_slot_get_changes() function, + except that changes are not consumed; that is, they will be returned + again on future calls. + + + + + + + pg_logical_slot_get_binary_changes + + pg_logical_slot_get_binary_changes ( slot_name name, upto_lsn pg_lsn, upto_nchanges integer, VARIADIC options text[] ) + setof record + ( lsn pg_lsn, + xid xid, + data bytea ) + + + Behaves just like + the pg_logical_slot_get_changes() function, + except that changes are returned as bytea. + + + + + + + pg_logical_slot_peek_binary_changes + + pg_logical_slot_peek_binary_changes ( slot_name name, upto_lsn pg_lsn, upto_nchanges integer, VARIADIC options text[] ) + setof record + ( lsn pg_lsn, + xid xid, + data bytea ) + + + Behaves just like + the pg_logical_slot_peek_changes() function, + except that changes are returned as bytea. + + + + + + + pg_replication_slot_advance + + pg_replication_slot_advance ( slot_name name, upto_lsn pg_lsn ) + record + ( slot_name name, + end_lsn pg_lsn ) + + + Advances the current confirmed position of a replication slot named + slot_name. The slot will not be moved backwards, + and it will not be moved beyond the current insert location. Returns + the name of the slot and the actual position that it was advanced to. + The updated slot position information is written out at the next + checkpoint if any advancing is done. So in the event of a crash, the + slot may return to an earlier position. If the specified slot is a + logical failover slot then the function will not return until all + physical slots specified in + synchronized_standby_slots + have confirmed WAL receipt. + + + + + + + pg_replication_origin_create + + pg_replication_origin_create ( node_name text ) + oid + + + Creates a replication origin with the given external + name, and returns the internal ID assigned to it. + The name must be no longer than 512 bytes. + + + + + + + pg_replication_origin_drop + + pg_replication_origin_drop ( node_name text ) + void + + + Deletes a previously-created replication origin, including any + associated replay progress. + + + + + + + pg_replication_origin_oid + + pg_replication_origin_oid ( node_name text ) + oid + + + Looks up a replication origin by name and returns the internal ID. If + no such replication origin is found, NULL is + returned. + + + + + + + pg_replication_origin_session_setup + + pg_replication_origin_session_setup ( node_name text ) + void + + + Marks the current session as replaying from the given + origin, allowing replay progress to be tracked. + Can only be used if no origin is currently selected. + Use pg_replication_origin_session_reset to undo. + + + + + + + pg_replication_origin_session_reset + + pg_replication_origin_session_reset () + void + + + Cancels the effects + of pg_replication_origin_session_setup(). + + + + + + + pg_replication_origin_session_is_setup + + pg_replication_origin_session_is_setup () + boolean + + + Returns true if a replication origin has been selected in the + current session. + + + + + + + pg_replication_origin_session_progress + + pg_replication_origin_session_progress ( flush boolean ) + pg_lsn + + + Returns the replay location for the replication origin selected in + the current session. The parameter flush + determines whether the corresponding local transaction will be + guaranteed to have been flushed to disk or not. + + + + + + + pg_replication_origin_xact_setup + + pg_replication_origin_xact_setup ( origin_lsn pg_lsn, origin_timestamp timestamp with time zone ) + void + + + Marks the current transaction as replaying a transaction that has + committed at the given LSN and timestamp. Can + only be called when a replication origin has been selected + using pg_replication_origin_session_setup. + + + + + + + pg_replication_origin_xact_reset + + pg_replication_origin_xact_reset () + void + + + Cancels the effects of + pg_replication_origin_xact_setup(). + + + + + + + pg_replication_origin_advance + + pg_replication_origin_advance ( node_name text, lsn pg_lsn ) + void + + + Sets replication progress for the given node to the given + location. This is primarily useful for setting up the initial + location, or setting a new location after configuration changes and + similar. Be aware that careless use of this function can lead to + inconsistently replicated data. + + + + + + + pg_replication_origin_progress + + pg_replication_origin_progress ( node_name text, flush boolean ) + pg_lsn + + + Returns the replay location for the given replication origin. The + parameter flush determines whether the + corresponding local transaction will be guaranteed to have been + flushed to disk or not. + + + + + + + pg_logical_emit_message + + pg_logical_emit_message ( transactional boolean, prefix text, content text , flush boolean DEFAULT false ) + pg_lsn + + + pg_logical_emit_message ( transactional boolean, prefix text, content bytea , flush boolean DEFAULT false ) + pg_lsn + + + Emits a logical decoding message. This can be used to pass generic + messages to logical decoding plugins through + WAL. The transactional parameter specifies if + the message should be part of the current transaction, or if it should + be written immediately and decoded as soon as the logical decoder + reads the record. The prefix parameter is a + textual prefix that can be used by logical decoding plugins to easily + recognize messages that are interesting for them. + The content parameter is the content of the + message, given either in text or binary form. + The flush parameter (default set to + false) controls if the message is immediately + flushed to WAL or not. flush has no effect + with transactional, as the message's WAL + record is flushed along with its transaction. + + + + + + + pg_sync_replication_slots + + pg_sync_replication_slots () + void + + + Synchronize the logical failover replication slots from the primary + server to the standby server. This function can only be executed on the + standby server. Temporary synced slots, if any, cannot be used for + logical decoding and must be dropped after promotion. See + for details. + Note that this function is primarily intended for testing and + debugging purposes and should be used with caution. Additionally, + this function cannot be executed if + + sync_replication_slots is enabled and the slotsync + worker is already running to perform the synchronization of slots. + + + + + If, after executing the function, + + hot_standby_feedback is disabled on + the standby or the physical slot configured in + + primary_slot_name is + removed, then it is possible that the necessary rows of the + synchronized slot will be removed by the VACUUM process on the primary + server, resulting in the synchronized slot becoming invalidated. + + + + + + + +
+ +
+ + + Database Object Management Functions + + + The functions shown in calculate + the disk space usage of database objects, or assist in presentation + or understanding of usage results. bigint results + are measured in bytes. If an OID that does + not represent an existing object is passed to one of these + functions, NULL is returned. + + + + Database Object Size Functions + + + + + Function + + + Description + + + + + + + + + pg_column_size + + pg_column_size ( "any" ) + integer + + + Shows the number of bytes used to store any individual data value. If + applied directly to a table column value, this reflects any + compression that was done. + + + + + + + pg_column_compression + + pg_column_compression ( "any" ) + text + + + Shows the compression algorithm that was used to compress + an individual variable-length value. Returns NULL + if the value is not compressed. + + + + + + + pg_column_toast_chunk_id + + pg_column_toast_chunk_id ( "any" ) + oid + + + Shows the chunk_id of an on-disk + TOASTed value. Returns NULL + if the value is un-TOASTed or not on-disk. See + for more information about + TOAST. + + + + + + + pg_database_size + + pg_database_size ( name ) + bigint + + + pg_database_size ( oid ) + bigint + + + Computes the total disk space used by the database with the specified + name or OID. To use this function, you must + have CONNECT privilege on the specified database + (which is granted by default) or have privileges of + the pg_read_all_stats role. + + + + + + + pg_indexes_size + + pg_indexes_size ( regclass ) + bigint + + + Computes the total disk space used by indexes attached to the + specified table. + + + + + + + pg_relation_size + + pg_relation_size ( relation regclass , fork text ) + bigint + + + Computes the disk space used by one fork of the + specified relation. (Note that for most purposes it is more + convenient to use the higher-level + functions pg_total_relation_size + or pg_table_size, which sum the sizes of all + forks.) With one argument, this returns the size of the main data + fork of the relation. The second argument can be provided to specify + which fork to examine: + + + + main returns the size of the main + data fork of the relation. + + + + + fsm returns the size of the Free Space Map + (see ) associated with the relation. + + + + + vm returns the size of the Visibility Map + (see ) associated with the relation. + + + + + init returns the size of the initialization + fork, if any, associated with the relation. + + + + + + + + + + pg_size_bytes + + pg_size_bytes ( text ) + bigint + + + Converts a size in human-readable format (as returned + by pg_size_pretty) into bytes. Valid units are + bytes, B, kB, + MB, GB, TB, + and PB. + + + + + + + pg_size_pretty + + pg_size_pretty ( bigint ) + text + + + pg_size_pretty ( numeric ) + text + + + Converts a size in bytes into a more easily human-readable format with + size units (bytes, kB, MB, GB, TB, or PB as appropriate). Note that the + units are powers of 2 rather than powers of 10, so 1kB is 1024 bytes, + 1MB is 10242 = 1048576 bytes, and so on. + + + + + + + pg_table_size + + pg_table_size ( regclass ) + bigint + + + Computes the disk space used by the specified table, excluding indexes + (but including its TOAST table if any, free space map, and visibility + map). + + + + + + + pg_tablespace_size + + pg_tablespace_size ( name ) + bigint + + + pg_tablespace_size ( oid ) + bigint + + + Computes the total disk space used in the tablespace with the + specified name or OID. To use this function, you must + have CREATE privilege on the specified tablespace + or have privileges of the pg_read_all_stats role, + unless it is the default tablespace for the current database. + + + + + + + pg_total_relation_size + + pg_total_relation_size ( regclass ) + bigint + + + Computes the total disk space used by the specified table, including + all indexes and TOAST data. The result is + equivalent to pg_table_size + + pg_indexes_size. + + + + +
+ + + The functions above that operate on tables or indexes accept a + regclass argument, which is simply the OID of the table or index + in the pg_class system catalog. You do not have to look up + the OID by hand, however, since the regclass data type's input + converter will do the work for you. See + for details. + + + + The functions shown in assist + in identifying the specific disk files associated with database objects. + + + + Database Object Location Functions + + + + + Function + + + Description + + + + + + + + + pg_relation_filenode + + pg_relation_filenode ( relation regclass ) + oid + + + Returns the filenode number currently assigned to the + specified relation. The filenode is the base component of the file + name(s) used for the relation (see + for more information). + For most relations the result is the same as + pg_class.relfilenode, + but for certain system catalogs relfilenode + is zero and this function must be used to get the correct value. The + function returns NULL if passed a relation that does not have storage, + such as a view. + + + + + + + pg_relation_filepath + + pg_relation_filepath ( relation regclass ) + text + + + Returns the entire file path name (relative to the database cluster's + data directory, PGDATA) of the relation. + + + + + + + pg_filenode_relation + + pg_filenode_relation ( tablespace oid, filenode oid ) + regclass + + + Returns a relation's OID given the tablespace OID and filenode it is + stored under. This is essentially the inverse mapping of + pg_relation_filepath. For a relation in the + database's default tablespace, the tablespace can be specified as zero. + Returns NULL if no relation in the current database + is associated with the given values. + + + + +
+ + + lists functions used to manage + collations. + + + + Collation Management Functions + + + + + Function + + + Description + + + + + + + + + pg_collation_actual_version + + pg_collation_actual_version ( oid ) + text + + + Returns the actual version of the collation object as it is currently + installed in the operating system. If this is different from the + value in + pg_collation.collversion, + then objects depending on the collation might need to be rebuilt. See + also . + + + + + + + pg_database_collation_actual_version + + pg_database_collation_actual_version ( oid ) + text + + + Returns the actual version of the database's collation as it is currently + installed in the operating system. If this is different from the + value in + pg_database.datcollversion, + then objects depending on the collation might need to be rebuilt. See + also . + + + + + + + pg_import_system_collations + + pg_import_system_collations ( schema regnamespace ) + integer + + + Adds collations to the system + catalog pg_collation based on all the locales + it finds in the operating system. This is + what initdb uses; see + for more details. If additional + locales are installed into the operating system later on, this + function can be run again to add collations for the new locales. + Locales that match existing entries + in pg_collation will be skipped. (But + collation objects based on locales that are no longer present in the + operating system are not removed by this function.) + The schema parameter would typically + be pg_catalog, but that is not a requirement; the + collations could be installed into some other schema as well. The + function returns the number of new collation objects it created. + Use of this function is restricted to superusers. + + + + +
+ + + lists functions used to + manipulate statistics. + These functions cannot be executed during recovery. + + + Changes made by these statistics manipulation functions are likely to be + overwritten by autovacuum (or manual + VACUUM or ANALYZE) and should be + considered temporary. + + + + + + Database Object Statistics Manipulation Functions + + + + + Function + + + Description + + + + + + + + + pg_restore_relation_stats + + pg_restore_relation_stats ( + VARIADIC kwargs "any" ) + boolean + + + Updates table-level statistics. Ordinarily, these statistics are + collected automatically or updated as a part of or , so it's not + necessary to call this function. However, it is useful after a + restore to enable the optimizer to choose better plans if + ANALYZE has not been run yet. + + + The tracked statistics may change from version to version, so + arguments are passed as pairs of argname + and argvalue in the form: + +SELECT pg_restore_relation_stats( + 'arg1name', 'arg1value'::arg1type, + 'arg2name', 'arg2value'::arg2type, + 'arg3name', 'arg3value'::arg3type); + + + + For example, to set the relpages and + reltuples values for the table + mytable: + +SELECT pg_restore_relation_stats( + 'schemaname', 'myschema', + 'relname', 'mytable', + 'relpages', 173::integer, + 'reltuples', 10000::real); + + + + The arguments schemaname and + relname are required, and specify the table. Other + arguments are the names and values of statistics corresponding to + certain columns in pg_class. + The currently-supported relation statistics are + relpages with a value of type + integer, reltuples with a value of + type real, relallvisible with a value + of type integer, and relallfrozen + with a value of type integer. + + + Additionally, this function accepts argument name + version of type integer, which + specifies the server version from which the statistics originated. + This is anticipated to be helpful in porting statistics from older + versions of PostgreSQL. + + + Minor errors are reported as a WARNING and + ignored, and remaining statistics will still be restored. If all + specified statistics are successfully restored, returns + true, otherwise false. + + + The caller must have the MAINTAIN privilege on the + table or be the owner of the database. + + + + + + + + + pg_clear_relation_stats + + pg_clear_relation_stats ( schemaname text, relname text ) + void + + + Clears table-level statistics for the given relation, as though the + table was newly created. + + + The caller must have the MAINTAIN privilege on the + table or be the owner of the database. + + + + + + + + pg_restore_attribute_stats + + pg_restore_attribute_stats ( + VARIADIC kwargs "any" ) + boolean + + + Creates or updates column-level statistics. Ordinarily, these + statistics are collected automatically or updated as a part of or , so it's not + necessary to call this function. However, it is useful after a + restore to enable the optimizer to choose better plans if + ANALYZE has not been run yet. + + + The tracked statistics may change from version to version, so + arguments are passed as pairs of argname + and argvalue in the form: + +SELECT pg_restore_attribute_stats( + 'arg1name', 'arg1value'::arg1type, + 'arg2name', 'arg2value'::arg2type, + 'arg3name', 'arg3value'::arg3type); + + + + For example, to set the avg_width and + null_frac values for the attribute + col1 of the table + mytable: + +SELECT pg_restore_attribute_stats( + 'schemaname', 'myschema', + 'relname', 'mytable', + 'attname', 'col1', + 'inherited', false, + 'avg_width', 125::integer, + 'null_frac', 0.5::real); + + + + The required arguments are schemaname and + relname with a value of type text + which specify the table; either attname with a + value of type text or attnum with a + value of type smallint, which specifies the column; and + inherited, which specifies whether the statistics + include values from child tables. Other arguments are the names and + values of statistics corresponding to columns in pg_stats. + + + Additionally, this function accepts argument name + version of type integer, which + specifies the server version from which the statistics originated. + This is anticipated to be helpful in porting statistics from older + versions of PostgreSQL. + + + Minor errors are reported as a WARNING and + ignored, and remaining statistics will still be restored. If all + specified statistics are successfully restored, returns + true, otherwise false. + + + The caller must have the MAINTAIN privilege on the + table or be the owner of the database. + + + + + + + + + pg_clear_attribute_stats + + pg_clear_attribute_stats ( + schemaname text, + relname text, + attname text, + inherited boolean ) + void + + + Clears column-level statistics for the given relation and + attribute, as though the table was newly created. + + + The caller must have the MAINTAIN privilege on + the table or be the owner of the database. + + + + + +
+ + + lists functions that provide + information about the structure of partitioned tables. + + + + Partitioning Information Functions + + + + + Function + + + Description + + + + + + + + + pg_partition_tree + + pg_partition_tree ( regclass ) + setof record + ( relid regclass, + parentrelid regclass, + isleaf boolean, + level integer ) + + + Lists the tables or indexes in the partition tree of the + given partitioned table or partitioned index, with one row for each + partition. Information provided includes the OID of the partition, + the OID of its immediate parent, a boolean value telling if the + partition is a leaf, and an integer telling its level in the hierarchy. + The level value is 0 for the input table or index, 1 for its + immediate child partitions, 2 for their partitions, and so on. + Returns no rows if the relation does not exist or is not a partition + or partitioned table. + + + + + + + pg_partition_ancestors + + pg_partition_ancestors ( regclass ) + setof regclass + + + Lists the ancestor relations of the given partition, + including the relation itself. Returns no rows if the relation + does not exist or is not a partition or partitioned table. + + + + + + + pg_partition_root + + pg_partition_root ( regclass ) + regclass + + + Returns the top-most parent of the partition tree to which the given + relation belongs. Returns NULL if the relation + does not exist or is not a partition or partitioned table. + + + + +
+ + + For example, to check the total size of the data contained in a + partitioned table measurement, one could use the + following query: + +SELECT pg_size_pretty(sum(pg_relation_size(relid))) AS total_size + FROM pg_partition_tree('measurement'); + + + +
+ + + Index Maintenance Functions + + + shows the functions + available for index maintenance tasks. (Note that these maintenance + tasks are normally done automatically by autovacuum; use of these + functions is only required in special cases.) + These functions cannot be executed during recovery. + Use of these functions is restricted to superusers and the owner + of the given index. + + + + Index Maintenance Functions + + + + + Function + + + Description + + + + + + + + + brin_summarize_new_values + + brin_summarize_new_values ( index regclass ) + integer + + + Scans the specified BRIN index to find page ranges in the base table + that are not currently summarized by the index; for any such range it + creates a new summary index tuple by scanning those table pages. + Returns the number of new page range summaries that were inserted + into the index. + + + + + + + brin_summarize_range + + brin_summarize_range ( index regclass, blockNumber bigint ) + integer + + + Summarizes the page range covering the given block, if not already + summarized. This is + like brin_summarize_new_values except that it + only processes the page range that covers the given table block number. + + + + + + + brin_desummarize_range + + brin_desummarize_range ( index regclass, blockNumber bigint ) + void + + + Removes the BRIN index tuple that summarizes the page range covering + the given table block, if there is one. + + + + + + + gin_clean_pending_list + + gin_clean_pending_list ( index regclass ) + bigint + + + Cleans up the pending list of the specified GIN index + by moving entries in it, in bulk, to the main GIN data structure. + Returns the number of pages removed from the pending list. + If the argument is a GIN index built with + the fastupdate option disabled, no cleanup happens + and the result is zero, because the index doesn't have a pending list. + See and + for details about the pending list and fastupdate + option. + + + + +
+ +
+ + + Generic File Access Functions + + + The functions shown in provide native access to + files on the machine hosting the server. Only files within the + database cluster directory and the log_directory can be + accessed, unless the user is a superuser or is granted the role + pg_read_server_files. Use a relative path for files in + the cluster directory, and a path matching the log_directory + configuration setting for log files. + + + + Note that granting users the EXECUTE privilege on + pg_read_file(), or related functions, allows them the + ability to read any file on the server that the database server process can + read; these functions bypass all in-database privilege checks. This means + that, for example, a user with such access is able to read the contents of + the pg_authid table where authentication + information is stored, as well as read any table data in the database. + Therefore, granting access to these functions should be carefully + considered. + + + + When granting privilege on these functions, note that the table entries + showing optional parameters are mostly implemented as several physical + functions with different parameter lists. Privilege must be granted + separately on each such function, if it is to be + used. psql's \df command + can be useful to check what the actual function signatures are. + + + + Some of these functions take an optional missing_ok + parameter, which specifies the behavior when the file or directory does + not exist. If true, the function + returns NULL or an empty result set, as appropriate. + If false, an error is raised. (Failure conditions + other than file not found are reported as errors in any + case.) The default is false. + + + + Generic File Access Functions + + + + + Function + + + Description + + + + + + + + + pg_ls_dir + + pg_ls_dir ( dirname text , missing_ok boolean, include_dot_dirs boolean ) + setof text + + + Returns the names of all files (and directories and other special + files) in the specified + directory. The include_dot_dirs parameter + indicates whether . and .. are to be + included in the result set; the default is to exclude them. Including + them can be useful when missing_ok + is true, to distinguish an empty directory from a + non-existent directory. + + + This function is restricted to superusers by default, but other users + can be granted EXECUTE to run the function. + + + + + + + pg_ls_logdir + + pg_ls_logdir () + setof record + ( name text, + size bigint, + modification timestamp with time zone ) + + + Returns the name, size, and last modification time (mtime) of each + ordinary file in the server's log directory. Filenames beginning with + a dot, directories, and other special files are excluded. + + + This function is restricted to superusers and roles with privileges of + the pg_monitor role by default, but other users can + be granted EXECUTE to run the function. + + + + + + + pg_ls_waldir + + pg_ls_waldir () + setof record + ( name text, + size bigint, + modification timestamp with time zone ) + + + Returns the name, size, and last modification time (mtime) of each + ordinary file in the server's write-ahead log (WAL) directory. + Filenames beginning with a dot, directories, and other special files + are excluded. + + + This function is restricted to superusers and roles with privileges of + the pg_monitor role by default, but other users can + be granted EXECUTE to run the function. + + + + + + + pg_ls_logicalmapdir + + pg_ls_logicalmapdir () + setof record + ( name text, + size bigint, + modification timestamp with time zone ) + + + Returns the name, size, and last modification time (mtime) of each + ordinary file in the server's pg_logical/mappings + directory. Filenames beginning with a dot, directories, and other + special files are excluded. + + + This function is restricted to superusers and members of + the pg_monitor role by default, but other users can + be granted EXECUTE to run the function. + + + + + + + pg_ls_logicalsnapdir + + pg_ls_logicalsnapdir () + setof record + ( name text, + size bigint, + modification timestamp with time zone ) + + + Returns the name, size, and last modification time (mtime) of each + ordinary file in the server's pg_logical/snapshots + directory. Filenames beginning with a dot, directories, and other + special files are excluded. + + + This function is restricted to superusers and members of + the pg_monitor role by default, but other users can + be granted EXECUTE to run the function. + + + + + + + pg_ls_replslotdir + + pg_ls_replslotdir ( slot_name text ) + setof record + ( name text, + size bigint, + modification timestamp with time zone ) + + + Returns the name, size, and last modification time (mtime) of each + ordinary file in the server's pg_replslot/slot_name + directory, where slot_name is the name of the + replication slot provided as input of the function. Filenames beginning + with a dot, directories, and other special files are excluded. + + + This function is restricted to superusers and members of + the pg_monitor role by default, but other users can + be granted EXECUTE to run the function. + + + + + + + pg_ls_summariesdir + + pg_ls_summariesdir () + setof record + ( name text, + size bigint, + modification timestamp with time zone ) + + + Returns the name, size, and last modification time (mtime) of each + ordinary file in the server's WAL summaries directory + (pg_wal/summaries). Filenames beginning + with a dot, directories, and other special files are excluded. + + + This function is restricted to superusers and members of + the pg_monitor role by default, but other users can + be granted EXECUTE to run the function. + + + + + + + pg_ls_archive_statusdir + + pg_ls_archive_statusdir () + setof record + ( name text, + size bigint, + modification timestamp with time zone ) + + + Returns the name, size, and last modification time (mtime) of each + ordinary file in the server's WAL archive status directory + (pg_wal/archive_status). Filenames beginning + with a dot, directories, and other special files are excluded. + + + This function is restricted to superusers and members of + the pg_monitor role by default, but other users can + be granted EXECUTE to run the function. + + + + + + + + pg_ls_tmpdir + + pg_ls_tmpdir ( tablespace oid ) + setof record + ( name text, + size bigint, + modification timestamp with time zone ) + + + Returns the name, size, and last modification time (mtime) of each + ordinary file in the temporary file directory for the + specified tablespace. + If tablespace is not provided, + the pg_default tablespace is examined. Filenames + beginning with a dot, directories, and other special files are + excluded. + + + This function is restricted to superusers and members of + the pg_monitor role by default, but other users can + be granted EXECUTE to run the function. + + + + + + + pg_read_file + + pg_read_file ( filename text , offset bigint, length bigint , missing_ok boolean ) + text + + + Returns all or part of a text file, starting at the + given byte offset, returning at + most length bytes (less if the end of file is + reached first). If offset is negative, it is + relative to the end of the file. If offset + and length are omitted, the entire file is + returned. The bytes read from the file are interpreted as a string in + the database's encoding; an error is thrown if they are not valid in + that encoding. + + + This function is restricted to superusers by default, but other users + can be granted EXECUTE to run the function. + + + + + + + pg_read_binary_file + + pg_read_binary_file ( filename text , offset bigint, length bigint , missing_ok boolean ) + bytea + + + Returns all or part of a file. This function is identical to + pg_read_file except that it can read arbitrary + binary data, returning the result as bytea + not text; accordingly, no encoding checks are performed. + + + This function is restricted to superusers by default, but other users + can be granted EXECUTE to run the function. + + + In combination with the convert_from function, + this function can be used to read a text file in a specified encoding + and convert to the database's encoding: + +SELECT convert_from(pg_read_binary_file('file_in_utf8.txt'), 'UTF8'); + + + + + + + + pg_stat_file + + pg_stat_file ( filename text , missing_ok boolean ) + record + ( size bigint, + access timestamp with time zone, + modification timestamp with time zone, + change timestamp with time zone, + creation timestamp with time zone, + isdir boolean ) + + + Returns a record containing the file's size, last access time stamp, + last modification time stamp, last file status change time stamp (Unix + platforms only), file creation time stamp (Windows only), and a flag + indicating if it is a directory. + + + This function is restricted to superusers by default, but other users + can be granted EXECUTE to run the function. + + + + + +
+ +
+ + + Advisory Lock Functions + + + The functions shown in + manage advisory locks. For details about proper use of these functions, + see . + + + + All these functions are intended to be used to lock application-defined + resources, which can be identified either by a single 64-bit key value or + two 32-bit key values (note that these two key spaces do not overlap). + If another session already holds a conflicting lock on the same resource + identifier, the functions will either wait until the resource becomes + available, or return a false result, as appropriate for + the function. + Locks can be either shared or exclusive: a shared lock does not conflict + with other shared locks on the same resource, only with exclusive locks. + Locks can be taken at session level (so that they are held until released + or the session ends) or at transaction level (so that they are held until + the current transaction ends; there is no provision for manual release). + Multiple session-level lock requests stack, so that if the same resource + identifier is locked three times there must then be three unlock requests + to release the resource in advance of session end. + + + + Advisory Lock Functions + + + + + Function + + + Description + + + + + + + + + pg_advisory_lock + + pg_advisory_lock ( key bigint ) + void + + + pg_advisory_lock ( key1 integer, key2 integer ) + void + + + Obtains an exclusive session-level advisory lock, waiting if necessary. + + + + + + + pg_advisory_lock_shared + + pg_advisory_lock_shared ( key bigint ) + void + + + pg_advisory_lock_shared ( key1 integer, key2 integer ) + void + + + Obtains a shared session-level advisory lock, waiting if necessary. + + + + + + + pg_advisory_unlock + + pg_advisory_unlock ( key bigint ) + boolean + + + pg_advisory_unlock ( key1 integer, key2 integer ) + boolean + + + Releases a previously-acquired exclusive session-level advisory lock. + Returns true if the lock is successfully released. + If the lock was not held, false is returned, and in + addition, an SQL warning will be reported by the server. + + + + + + + pg_advisory_unlock_all + + pg_advisory_unlock_all () + void + + + Releases all session-level advisory locks held by the current session. + (This function is implicitly invoked at session end, even if the + client disconnects ungracefully.) + + + + + + + pg_advisory_unlock_shared + + pg_advisory_unlock_shared ( key bigint ) + boolean + + + pg_advisory_unlock_shared ( key1 integer, key2 integer ) + boolean + + + Releases a previously-acquired shared session-level advisory lock. + Returns true if the lock is successfully released. + If the lock was not held, false is returned, and in + addition, an SQL warning will be reported by the server. + + + + + + + pg_advisory_xact_lock + + pg_advisory_xact_lock ( key bigint ) + void + + + pg_advisory_xact_lock ( key1 integer, key2 integer ) + void + + + Obtains an exclusive transaction-level advisory lock, waiting if + necessary. + + + + + + + pg_advisory_xact_lock_shared + + pg_advisory_xact_lock_shared ( key bigint ) + void + + + pg_advisory_xact_lock_shared ( key1 integer, key2 integer ) + void + + + Obtains a shared transaction-level advisory lock, waiting if + necessary. + + + + + + + pg_try_advisory_lock + + pg_try_advisory_lock ( key bigint ) + boolean + + + pg_try_advisory_lock ( key1 integer, key2 integer ) + boolean + + + Obtains an exclusive session-level advisory lock if available. + This will either obtain the lock immediately and + return true, or return false + without waiting if the lock cannot be acquired immediately. + + + + + + + pg_try_advisory_lock_shared + + pg_try_advisory_lock_shared ( key bigint ) + boolean + + + pg_try_advisory_lock_shared ( key1 integer, key2 integer ) + boolean + + + Obtains a shared session-level advisory lock if available. + This will either obtain the lock immediately and + return true, or return false + without waiting if the lock cannot be acquired immediately. + + + + + + + pg_try_advisory_xact_lock + + pg_try_advisory_xact_lock ( key bigint ) + boolean + + + pg_try_advisory_xact_lock ( key1 integer, key2 integer ) + boolean + + + Obtains an exclusive transaction-level advisory lock if available. + This will either obtain the lock immediately and + return true, or return false + without waiting if the lock cannot be acquired immediately. + + + + + + + pg_try_advisory_xact_lock_shared + + pg_try_advisory_xact_lock_shared ( key bigint ) + boolean + + + pg_try_advisory_xact_lock_shared ( key1 integer, key2 integer ) + boolean + + + Obtains a shared transaction-level advisory lock if available. + This will either obtain the lock immediately and + return true, or return false + without waiting if the lock cannot be acquired immediately. + + + + +
+ +
+ +
diff --git a/doc/src/sgml/func/func-aggregate.sgml b/doc/src/sgml/func/func-aggregate.sgml new file mode 100644 index 0000000000000..f50b692516b62 --- /dev/null +++ b/doc/src/sgml/func/func-aggregate.sgml @@ -0,0 +1,1418 @@ + + Aggregate Functions + + + aggregate function + built-in + + + + Aggregate functions compute a single result + from a set of input values. The built-in general-purpose aggregate + functions are listed in + while statistical aggregates are in . + The built-in within-group ordered-set aggregate functions + are listed in + while the built-in within-group hypothetical-set ones are in . Grouping operations, + which are closely related to aggregate functions, are listed in + . + The special syntax considerations for aggregate + functions are explained in . + Consult for additional introductory + information. + + + + Aggregate functions that support Partial Mode + are eligible to participate in various optimizations, such as parallel + aggregation. + + + + While all aggregates below accept an optional + ORDER BY clause (as outlined in ), the clause has only been added to + aggregates whose output is affected by ordering. + + + + General-Purpose Aggregate Functions + + + + + + + Function + + + Description + + Partial Mode + + + + + + + + any_value + + any_value ( anyelement ) + same as input type + + + Returns an arbitrary value from the non-null input values. + + Yes + + + + + + array_agg + + array_agg ( anynonarray ORDER BY input_sort_columns ) + anyarray + + + Collects all the input values, including nulls, into an array. + + Yes + + + + + array_agg ( anyarray ORDER BY input_sort_columns ) + anyarray + + + Concatenates all the input arrays into an array of one higher + dimension. (The inputs must all have the same dimensionality, and + cannot be empty or null.) + + Yes + + + + + + average + + + avg + + avg ( smallint ) + numeric + + + avg ( integer ) + numeric + + + avg ( bigint ) + numeric + + + avg ( numeric ) + numeric + + + avg ( real ) + double precision + + + avg ( double precision ) + double precision + + + avg ( interval ) + interval + + + Computes the average (arithmetic mean) of all the non-null input + values. + + Yes + + + + + + bit_and + + bit_and ( smallint ) + smallint + + + bit_and ( integer ) + integer + + + bit_and ( bigint ) + bigint + + + bit_and ( bit ) + bit + + + Computes the bitwise AND of all non-null input values. + + Yes + + + + + + bit_or + + bit_or ( smallint ) + smallint + + + bit_or ( integer ) + integer + + + bit_or ( bigint ) + bigint + + + bit_or ( bit ) + bit + + + Computes the bitwise OR of all non-null input values. + + Yes + + + + + + bit_xor + + bit_xor ( smallint ) + smallint + + + bit_xor ( integer ) + integer + + + bit_xor ( bigint ) + bigint + + + bit_xor ( bit ) + bit + + + Computes the bitwise exclusive OR of all non-null input values. + Can be useful as a checksum for an unordered set of values. + + Yes + + + + + + bool_and + + bool_and ( boolean ) + boolean + + + Returns true if all non-null input values are true, otherwise false. + + Yes + + + + + + bool_or + + bool_or ( boolean ) + boolean + + + Returns true if any non-null input value is true, otherwise false. + + Yes + + + + + + count + + count ( * ) + bigint + + + Computes the number of input rows. + + Yes + + + + + count ( "any" ) + bigint + + + Computes the number of input rows in which the input value is not + null. + + Yes + + + + + + every + + every ( boolean ) + boolean + + + This is the SQL standard's equivalent to bool_and. + + Yes + + + + + + json_agg + + json_agg ( anyelement ORDER BY input_sort_columns ) + json + + + + jsonb_agg + + jsonb_agg ( anyelement ORDER BY input_sort_columns ) + jsonb + + + Collects all the input values, including nulls, into a JSON array. + Values are converted to JSON as per to_json + or to_jsonb. + + No + + + + + + json_agg_strict + + json_agg_strict ( anyelement ) + json + + + + jsonb_agg_strict + + jsonb_agg_strict ( anyelement ) + jsonb + + + Collects all the input values, skipping nulls, into a JSON array. + Values are converted to JSON as per to_json + or to_jsonb. + + No + + + + + json_arrayagg + json_arrayagg ( + value_expression + ORDER BY sort_expression + { NULL | ABSENT } ON NULL + RETURNING data_type FORMAT JSON ENCODING UTF8 ) + + + Behaves in the same way as json_array + but as an aggregate function so it only takes one + value_expression parameter. + If ABSENT ON NULL is specified, any NULL + values are omitted. + If ORDER BY is specified, the elements will + appear in the array in that order rather than in the input order. + + + SELECT json_arrayagg(v) FROM (VALUES(2),(1)) t(v) + [2, 1] + + No + + + + + json_objectagg + json_objectagg ( + { key_expression { VALUE | ':' } value_expression } + { NULL | ABSENT } ON NULL + { WITH | WITHOUT } UNIQUE KEYS + RETURNING data_type FORMAT JSON ENCODING UTF8 ) + + + Behaves like json_object, but as an + aggregate function, so it only takes one + key_expression and one + value_expression parameter. + + + SELECT json_objectagg(k:v) FROM (VALUES ('a'::text,current_date),('b',current_date + 1)) AS t(k,v) + { "a" : "2022-05-10", "b" : "2022-05-11" } + + No + + + + + + json_object_agg + + json_object_agg ( key + "any", value + "any" + ORDER BY input_sort_columns ) + json + + + + jsonb_object_agg + + jsonb_object_agg ( key + "any", value + "any" + ORDER BY input_sort_columns ) + jsonb + + + Collects all the key/value pairs into a JSON object. Key arguments + are coerced to text; value arguments are converted as per + to_json or to_jsonb. + Values can be null, but keys cannot. + + No + + + + + + json_object_agg_strict + + json_object_agg_strict ( + key "any", + value "any" ) + json + + + + jsonb_object_agg_strict + + jsonb_object_agg_strict ( + key "any", + value "any" ) + jsonb + + + Collects all the key/value pairs into a JSON object. Key arguments + are coerced to text; value arguments are converted as per + to_json or to_jsonb. + The key can not be null. If the + value is null then the entry is skipped, + + No + + + + + + json_object_agg_unique + + json_object_agg_unique ( + key "any", + value "any" ) + json + + + + jsonb_object_agg_unique + + jsonb_object_agg_unique ( + key "any", + value "any" ) + jsonb + + + Collects all the key/value pairs into a JSON object. Key arguments + are coerced to text; value arguments are converted as per + to_json or to_jsonb. + Values can be null, but keys cannot. + If there is a duplicate key an error is thrown. + + No + + + + + + json_object_agg_unique_strict + + json_object_agg_unique_strict ( + key "any", + value "any" ) + json + + + + jsonb_object_agg_unique_strict + + jsonb_object_agg_unique_strict ( + key "any", + value "any" ) + jsonb + + + Collects all the key/value pairs into a JSON object. Key arguments + are coerced to text; value arguments are converted as per + to_json or to_jsonb. + The key can not be null. If the + value is null then the entry is skipped. + If there is a duplicate key an error is thrown. + + No + + + + + + max + + max ( see text ) + same as input type + + + Computes the maximum of the non-null input + values. Available for any numeric, string, date/time, or enum type, + as well as bytea, inet, interval, + money, oid, pg_lsn, + tid, xid8, + and also arrays and composite types containing sortable data types. + + Yes + + + + + + min + + min ( see text ) + same as input type + + + Computes the minimum of the non-null input + values. Available for any numeric, string, date/time, or enum type, + as well as bytea, inet, interval, + money, oid, pg_lsn, + tid, xid8, + and also arrays and composite types containing sortable data types. + + Yes + + + + + + range_agg + + range_agg ( value + anyrange ) + anymultirange + + + range_agg ( value + anymultirange ) + anymultirange + + + Computes the union of the non-null input values. + + No + + + + + + range_intersect_agg + + range_intersect_agg ( value + anyrange ) + anyrange + + + range_intersect_agg ( value + anymultirange ) + anymultirange + + + Computes the intersection of the non-null input values. + + No + + + + + + string_agg + + string_agg ( value + text, delimiter text ) + text + + + string_agg ( value + bytea, delimiter bytea + ORDER BY input_sort_columns ) + bytea + + + Concatenates the non-null input values into a string. Each value + after the first is preceded by the + corresponding delimiter (if it's not null). + + Yes + + + + + + sum + + sum ( smallint ) + bigint + + + sum ( integer ) + bigint + + + sum ( bigint ) + numeric + + + sum ( numeric ) + numeric + + + sum ( real ) + real + + + sum ( double precision ) + double precision + + + sum ( interval ) + interval + + + sum ( money ) + money + + + Computes the sum of the non-null input values. + + Yes + + + + + + xmlagg + + xmlagg ( xml ORDER BY input_sort_columns ) + xml + + + Concatenates the non-null XML input values (see + ). + + No + + + +
+ + + It should be noted that except for count, + these functions return a null value when no rows are selected. In + particular, sum of no rows returns null, not + zero as one might expect, and array_agg + returns null rather than an empty array when there are no input + rows. The coalesce function can be used to + substitute zero or an empty array for null when necessary. + + + + The aggregate functions array_agg, + json_agg, jsonb_agg, + json_agg_strict, jsonb_agg_strict, + json_object_agg, jsonb_object_agg, + json_object_agg_strict, jsonb_object_agg_strict, + json_object_agg_unique, jsonb_object_agg_unique, + json_object_agg_unique_strict, + jsonb_object_agg_unique_strict, + string_agg, + and xmlagg, as well as similar user-defined + aggregate functions, produce meaningfully different result values + depending on the order of the input values. This ordering is + unspecified by default, but can be controlled by writing an + ORDER BY clause within the aggregate call, as shown in + . + Alternatively, supplying the input values from a sorted subquery + will usually work. For example: + + + + Beware that this approach can fail if the outer query level contains + additional processing, such as a join, because that might cause the + subquery's output to be reordered before the aggregate is computed. + + + + + ANY + + + SOME + + + The boolean aggregates bool_and and + bool_or correspond to the standard SQL aggregates + every and any or + some. + PostgreSQL + supports every, but not any + or some, because there is an ambiguity built into + the standard syntax: + +SELECT b1 = ANY((SELECT b2 FROM t2 ...)) FROM t1 ...; + + Here ANY can be considered either as introducing + a subquery, or as being an aggregate function, if the subquery + returns one row with a Boolean value. + Thus the standard name cannot be given to these aggregates. + + + + + + Users accustomed to working with other SQL database management + systems might be disappointed by the performance of the + count aggregate when it is applied to the + entire table. A query like: + +SELECT count(*) FROM sometable; + + will require effort proportional to the size of the table: + PostgreSQL will need to scan either the + entire table or the entirety of an index that includes all rows in + the table. + + + + + shows + aggregate functions typically used in statistical analysis. + (These are separated out merely to avoid cluttering the listing + of more-commonly-used aggregates.) Functions shown as + accepting numeric_type are available for all + the types smallint, integer, + bigint, numeric, real, + and double precision. + Where the description mentions + N, it means the + number of input rows for which all the input expressions are non-null. + In all cases, null is returned if the computation is meaningless, + for example when N is zero. + + + + statistics + + + linear regression + + + + Aggregate Functions for Statistics + + + + + + + Function + + + Description + + Partial Mode + + + + + + + + correlation + + + corr + + corr ( Y double precision, X double precision ) + double precision + + + Computes the correlation coefficient. + + Yes + + + + + + covariance + population + + + covar_pop + + covar_pop ( Y double precision, X double precision ) + double precision + + + Computes the population covariance. + + Yes + + + + + + covariance + sample + + + covar_samp + + covar_samp ( Y double precision, X double precision ) + double precision + + + Computes the sample covariance. + + Yes + + + + + + regr_avgx + + regr_avgx ( Y double precision, X double precision ) + double precision + + + Computes the average of the independent variable, + sum(X)/N. + + Yes + + + + + + regr_avgy + + regr_avgy ( Y double precision, X double precision ) + double precision + + + Computes the average of the dependent variable, + sum(Y)/N. + + Yes + + + + + + regr_count + + regr_count ( Y double precision, X double precision ) + bigint + + + Computes the number of rows in which both inputs are non-null. + + Yes + + + + + + regression intercept + + + regr_intercept + + regr_intercept ( Y double precision, X double precision ) + double precision + + + Computes the y-intercept of the least-squares-fit linear equation + determined by the + (X, Y) pairs. + + Yes + + + + + + regr_r2 + + regr_r2 ( Y double precision, X double precision ) + double precision + + + Computes the square of the correlation coefficient. + + Yes + + + + + + regression slope + + + regr_slope + + regr_slope ( Y double precision, X double precision ) + double precision + + + Computes the slope of the least-squares-fit linear equation determined + by the (X, Y) + pairs. + + Yes + + + + + + regr_sxx + + regr_sxx ( Y double precision, X double precision ) + double precision + + + Computes the sum of squares of the independent + variable, + sum(X^2) - sum(X)^2/N. + + Yes + + + + + + regr_sxy + + regr_sxy ( Y double precision, X double precision ) + double precision + + + Computes the sum of products of independent times + dependent variables, + sum(X*Y) - sum(X) * sum(Y)/N. + + Yes + + + + + + regr_syy + + regr_syy ( Y double precision, X double precision ) + double precision + + + Computes the sum of squares of the dependent + variable, + sum(Y^2) - sum(Y)^2/N. + + Yes + + + + + + standard deviation + + + stddev + + stddev ( numeric_type ) + double precision + for real or double precision, + otherwise numeric + + + This is a historical alias for stddev_samp. + + Yes + + + + + + standard deviation + population + + + stddev_pop + + stddev_pop ( numeric_type ) + double precision + for real or double precision, + otherwise numeric + + + Computes the population standard deviation of the input values. + + Yes + + + + + + standard deviation + sample + + + stddev_samp + + stddev_samp ( numeric_type ) + double precision + for real or double precision, + otherwise numeric + + + Computes the sample standard deviation of the input values. + + Yes + + + + + + variance + + variance ( numeric_type ) + double precision + for real or double precision, + otherwise numeric + + + This is a historical alias for var_samp. + + Yes + + + + + + variance + population + + + var_pop + + var_pop ( numeric_type ) + double precision + for real or double precision, + otherwise numeric + + + Computes the population variance of the input values (square of the + population standard deviation). + + Yes + + + + + + variance + sample + + + var_samp + + var_samp ( numeric_type ) + double precision + for real or double precision, + otherwise numeric + + + Computes the sample variance of the input values (square of the sample + standard deviation). + + Yes + + + +
+ + + shows some + aggregate functions that use the ordered-set aggregate + syntax. These functions are sometimes referred to as inverse + distribution functions. Their aggregated input is introduced by + ORDER BY, and they may also take a direct + argument that is not aggregated, but is computed only once. + All these functions ignore null values in their aggregated input. + For those that take a fraction parameter, the + fraction value must be between 0 and 1; an error is thrown if not. + However, a null fraction value simply produces a + null result. + + + + ordered-set aggregate + built-in + + + inverse distribution + + + + Ordered-Set Aggregate Functions + + + + + + + Function + + + Description + + Partial Mode + + + + + + + + mode + statistical + + mode () WITHIN GROUP ( ORDER BY anyelement ) + anyelement + + + Computes the mode, the most frequent + value of the aggregated argument (arbitrarily choosing the first one + if there are multiple equally-frequent values). The aggregated + argument must be of a sortable type. + + No + + + + + + percentile + continuous + + percentile_cont ( fraction double precision ) WITHIN GROUP ( ORDER BY double precision ) + double precision + + + percentile_cont ( fraction double precision ) WITHIN GROUP ( ORDER BY interval ) + interval + + + Computes the continuous percentile, a value + corresponding to the specified fraction + within the ordered set of aggregated argument values. This will + interpolate between adjacent input items if needed. + + No + + + + + percentile_cont ( fractions double precision[] ) WITHIN GROUP ( ORDER BY double precision ) + double precision[] + + + percentile_cont ( fractions double precision[] ) WITHIN GROUP ( ORDER BY interval ) + interval[] + + + Computes multiple continuous percentiles. The result is an array of + the same dimensions as the fractions + parameter, with each non-null element replaced by the (possibly + interpolated) value corresponding to that percentile. + + No + + + + + + percentile + discrete + + percentile_disc ( fraction double precision ) WITHIN GROUP ( ORDER BY anyelement ) + anyelement + + + Computes the discrete percentile, the first + value within the ordered set of aggregated argument values whose + position in the ordering equals or exceeds the + specified fraction. The aggregated + argument must be of a sortable type. + + No + + + + + percentile_disc ( fractions double precision[] ) WITHIN GROUP ( ORDER BY anyelement ) + anyarray + + + Computes multiple discrete percentiles. The result is an array of the + same dimensions as the fractions parameter, + with each non-null element replaced by the input value corresponding + to that percentile. + The aggregated argument must be of a sortable type. + + No + + + +
+ + + hypothetical-set aggregate + built-in + + + + Each of the hypothetical-set aggregates listed in + is associated with a + window function of the same name defined in + . In each case, the aggregate's result + is the value that the associated window function would have + returned for the hypothetical row constructed from + args, if such a row had been added to the sorted + group of rows represented by the sorted_args. + For each of these functions, the list of direct arguments + given in args must match the number and types of + the aggregated arguments given in sorted_args. + Unlike most built-in aggregates, these aggregates are not strict, that is + they do not drop input rows containing nulls. Null values sort according + to the rule specified in the ORDER BY clause. + + + + Hypothetical-Set Aggregate Functions + + + + + + + Function + + + Description + + Partial Mode + + + + + + + + rank + hypothetical + + rank ( args ) WITHIN GROUP ( ORDER BY sorted_args ) + bigint + + + Computes the rank of the hypothetical row, with gaps; that is, the row + number of the first row in its peer group. + + No + + + + + + dense_rank + hypothetical + + dense_rank ( args ) WITHIN GROUP ( ORDER BY sorted_args ) + bigint + + + Computes the rank of the hypothetical row, without gaps; this function + effectively counts peer groups. + + No + + + + + + percent_rank + hypothetical + + percent_rank ( args ) WITHIN GROUP ( ORDER BY sorted_args ) + double precision + + + Computes the relative rank of the hypothetical row, that is + (rank - 1) / (total rows - 1). + The value thus ranges from 0 to 1 inclusive. + + No + + + + + + cume_dist + hypothetical + + cume_dist ( args ) WITHIN GROUP ( ORDER BY sorted_args ) + double precision + + + Computes the cumulative distribution, that is (number of rows + preceding or peers with hypothetical row) / (total rows). The value + thus ranges from 1/N to 1. + + No + + + +
+ + + Grouping Operations + + + + + Function + + + Description + + + + + + + + + GROUPING + + GROUPING ( group_by_expression(s) ) + integer + + + Returns a bit mask indicating which GROUP BY + expressions are not included in the current grouping set. + Bits are assigned with the rightmost argument corresponding to the + least-significant bit; each bit is 0 if the corresponding expression + is included in the grouping criteria of the grouping set generating + the current result row, and 1 if it is not included. + + + + +
+ + + The grouping operations shown in + are used in conjunction with + grouping sets (see ) to distinguish + result rows. The arguments to the GROUPING function + are not actually evaluated, but they must exactly match expressions given + in the GROUP BY clause of the associated query level. + For example: + +=> SELECT * FROM items_sold; + make | model | sales +-------+-------+------- + Foo | GT | 10 + Foo | Tour | 20 + Bar | City | 15 + Bar | Sport | 5 +(4 rows) + +=> SELECT make, model, GROUPING(make,model), sum(sales) FROM items_sold GROUP BY ROLLUP(make,model); + make | model | grouping | sum +-------+-------+----------+----- + Foo | GT | 0 | 10 + Foo | Tour | 0 | 20 + Bar | City | 0 | 15 + Bar | Sport | 0 | 5 + Foo | | 1 | 30 + Bar | | 1 | 20 + | | 3 | 50 +(7 rows) + + Here, the grouping value 0 in the + first four rows shows that those have been grouped normally, over both the + grouping columns. The value 1 indicates + that model was not grouped by in the next-to-last two + rows, and the value 3 indicates that + neither make nor model was grouped + by in the last row (which therefore is an aggregate over all the input + rows). + + +
diff --git a/doc/src/sgml/func/func-array.sgml b/doc/src/sgml/func/func-array.sgml new file mode 100644 index 0000000000000..97e4865a5f7d2 --- /dev/null +++ b/doc/src/sgml/func/func-array.sgml @@ -0,0 +1,646 @@ + + Array Functions and Operators + + + shows the specialized operators + available for array types. + In addition to those, the usual comparison operators shown in are available for + arrays. The comparison operators compare the array contents + element-by-element, using the default B-tree comparison function for + the element data type, and sort based on the first difference. + In multidimensional arrays the elements are visited in row-major order + (last subscript varies most rapidly). + If the contents of two arrays are equal but the dimensionality is + different, the first difference in the dimensionality information + determines the sort order. + + + + Array Operators + + + + + Operator + + + Description + + + Example(s) + + + + + + + + anyarray @> anyarray + boolean + + + Does the first array contain the second, that is, does each element + appearing in the second array equal some element of the first array? + (Duplicates are not treated specially, + thus ARRAY[1] and ARRAY[1,1] are + each considered to contain the other.) + + + ARRAY[1,4,3] @> ARRAY[3,1,3] + t + + + + + + anyarray <@ anyarray + boolean + + + Is the first array contained by the second? + + + ARRAY[2,2,7] <@ ARRAY[1,7,4,2,6] + t + + + + + + anyarray && anyarray + boolean + + + Do the arrays overlap, that is, have any elements in common? + + + ARRAY[1,4,3] && ARRAY[2,1] + t + + + + + + anycompatiblearray || anycompatiblearray + anycompatiblearray + + + Concatenates the two arrays. Concatenating a null or empty array is a + no-op; otherwise the arrays must have the same number of dimensions + (as illustrated by the first example) or differ in number of + dimensions by one (as illustrated by the second). + If the arrays are not of identical element types, they will be coerced + to a common type (see ). + + + ARRAY[1,2,3] || ARRAY[4,5,6,7] + {1,2,3,4,5,6,7} + + + ARRAY[1,2,3] || ARRAY[[4,5,6],[7,8,9.9]] + {{1,2,3},{4,5,6},{7,8,9.9}} + + + + + + anycompatible || anycompatiblearray + anycompatiblearray + + + Concatenates an element onto the front of an array (which must be + empty or one-dimensional). + + + 3 || ARRAY[4,5,6] + {3,4,5,6} + + + + + + anycompatiblearray || anycompatible + anycompatiblearray + + + Concatenates an element onto the end of an array (which must be + empty or one-dimensional). + + + ARRAY[4,5,6] || 7 + {4,5,6,7} + + + + +
+ + + See for more details about array operator + behavior. See for more details about + which operators support indexed operations. + + + + shows the functions + available for use with array types. See + for more information and examples of the use of these functions. + + + + Array Functions + + + + + Function + + + Description + + + Example(s) + + + + + + + + + array_append + + array_append ( anycompatiblearray, anycompatible ) + anycompatiblearray + + + Appends an element to the end of an array (same as + the anycompatiblearray || anycompatible + operator). + + + array_append(ARRAY[1,2], 3) + {1,2,3} + + + + + + + array_cat + + array_cat ( anycompatiblearray, anycompatiblearray ) + anycompatiblearray + + + Concatenates two arrays (same as + the anycompatiblearray || anycompatiblearray + operator). + + + array_cat(ARRAY[1,2,3], ARRAY[4,5]) + {1,2,3,4,5} + + + + + + + array_dims + + array_dims ( anyarray ) + text + + + Returns a text representation of the array's dimensions. + + + array_dims(ARRAY[[1,2,3], [4,5,6]]) + [1:2][1:3] + + + + + + + array_fill + + array_fill ( anyelement, integer[] + , integer[] ) + anyarray + + + Returns an array filled with copies of the given value, having + dimensions of the lengths specified by the second argument. + The optional third argument supplies lower-bound values for each + dimension (which default to all 1). + + + array_fill(11, ARRAY[2,3]) + {{11,11,11},{11,11,11}} + + + array_fill(7, ARRAY[3], ARRAY[2]) + [2:4]={7,7,7} + + + + + + + array_length + + array_length ( anyarray, integer ) + integer + + + Returns the length of the requested array dimension. + (Produces NULL instead of 0 for empty or missing array dimensions.) + + + array_length(array[1,2,3], 1) + 3 + + + array_length(array[]::int[], 1) + NULL + + + array_length(array['text'], 2) + NULL + + + + + + + array_lower + + array_lower ( anyarray, integer ) + integer + + + Returns the lower bound of the requested array dimension. + + + array_lower('[0:2]={1,2,3}'::integer[], 1) + 0 + + + + + + + array_ndims + + array_ndims ( anyarray ) + integer + + + Returns the number of dimensions of the array. + + + array_ndims(ARRAY[[1,2,3], [4,5,6]]) + 2 + + + + + + + array_position + + array_position ( anycompatiblearray, anycompatible , integer ) + integer + + + Returns the subscript of the first occurrence of the second argument + in the array, or NULL if it's not present. + If the third argument is given, the search begins at that subscript. + The array must be one-dimensional. + Comparisons are done using IS NOT DISTINCT FROM + semantics, so it is possible to search for NULL. + + + array_position(ARRAY['sun', 'mon', 'tue', 'wed', 'thu', 'fri', 'sat'], 'mon') + 2 + + + + + + + array_positions + + array_positions ( anycompatiblearray, anycompatible ) + integer[] + + + Returns an array of the subscripts of all occurrences of the second + argument in the array given as first argument. + The array must be one-dimensional. + Comparisons are done using IS NOT DISTINCT FROM + semantics, so it is possible to search for NULL. + NULL is returned only if the array + is NULL; if the value is not found in the array, an + empty array is returned. + + + array_positions(ARRAY['A','A','B','A'], 'A') + {1,2,4} + + + + + + + array_prepend + + array_prepend ( anycompatible, anycompatiblearray ) + anycompatiblearray + + + Prepends an element to the beginning of an array (same as + the anycompatible || anycompatiblearray + operator). + + + array_prepend(1, ARRAY[2,3]) + {1,2,3} + + + + + + + array_remove + + array_remove ( anycompatiblearray, anycompatible ) + anycompatiblearray + + + Removes all elements equal to the given value from the array. + The array must be one-dimensional. + Comparisons are done using IS NOT DISTINCT FROM + semantics, so it is possible to remove NULLs. + + + array_remove(ARRAY[1,2,3,2], 2) + {1,3} + + + + + + + array_replace + + array_replace ( anycompatiblearray, anycompatible, anycompatible ) + anycompatiblearray + + + Replaces each array element equal to the second argument with the + third argument. + + + array_replace(ARRAY[1,2,5,4], 5, 3) + {1,2,3,4} + + + + + + + array_reverse + + array_reverse ( anyarray ) + anyarray + + + Reverses the first dimension of the array. + + + array_reverse(ARRAY[[1,2],[3,4],[5,6]]) + {{5,6},{3,4},{1,2}} + + + + + + + array_sample + + array_sample ( array anyarray, n integer ) + anyarray + + + Returns an array of n items randomly selected + from array. n may not + exceed the length of array's first dimension. + If array is multi-dimensional, + an item is a slice having a given first subscript. + + + array_sample(ARRAY[1,2,3,4,5,6], 3) + {2,6,1} + + + array_sample(ARRAY[[1,2],[3,4],[5,6]], 2) + {{5,6},{1,2}} + + + + + + + array_shuffle + + array_shuffle ( anyarray ) + anyarray + + + Randomly shuffles the first dimension of the array. + + + array_shuffle(ARRAY[[1,2],[3,4],[5,6]]) + {{5,6},{1,2},{3,4}} + + + + + + + array_sort + + array_sort ( + array anyarray + , descending boolean + , nulls_first boolean + ) + anyarray + + + Sorts the first dimension of the array. + The sort order is determined by the default sort ordering of the + array's element type; however, if the element type is collatable, + the collation to use can be specified by adding + a COLLATE clause to + the array argument. + + + If descending is true then sort in + descending order, otherwise ascending order. If omitted, the + default is ascending order. + If nulls_first is true then nulls appear + before non-null values, otherwise nulls appear after non-null + values. + If omitted, nulls_first is taken to have + the same value as descending. + + + array_sort(ARRAY[[2,4],[2,1],[6,5]]) + {{2,1},{2,4},{6,5}} + + + + + + + array_to_string + + array_to_string ( array anyarray, delimiter text , null_string text ) + text + + + Converts each array element to its text representation, and + concatenates those separated by + the delimiter string. + If null_string is given and is + not NULL, then NULL array + entries are represented by that string; otherwise, they are omitted. + See also string_to_array. + + + array_to_string(ARRAY[1, 2, 3, NULL, 5], ',', '*') + 1,2,3,*,5 + + + + + + + array_upper + + array_upper ( anyarray, integer ) + integer + + + Returns the upper bound of the requested array dimension. + + + array_upper(ARRAY[1,8,3,7], 1) + 4 + + + + + + + cardinality + + cardinality ( anyarray ) + integer + + + Returns the total number of elements in the array, or 0 if the array + is empty. + + + cardinality(ARRAY[[1,2],[3,4]]) + 4 + + + + + + + trim_array + + trim_array ( array anyarray, n integer ) + anyarray + + + Trims an array by removing the last n elements. + If the array is multidimensional, only the first dimension is trimmed. + + + trim_array(ARRAY[1,2,3,4,5,6], 2) + {1,2,3,4} + + + + + + + unnest + + unnest ( anyarray ) + setof anyelement + + + Expands an array into a set of rows. + The array's elements are read out in storage order. + + + unnest(ARRAY[1,2]) + + + 1 + 2 + + + + unnest(ARRAY[['foo','bar'],['baz','quux']]) + + + foo + bar + baz + quux + + + + + + + unnest ( anyarray, anyarray , ... ) + setof anyelement, anyelement [, ... ] + + + Expands multiple arrays (possibly of different data types) into a set of + rows. If the arrays are not all the same length then the shorter ones + are padded with NULLs. This form is only allowed + in a query's FROM clause; see . + + + select * from unnest(ARRAY[1,2], ARRAY['foo','bar','baz']) as x(a,b) + + + a | b +---+----- + 1 | foo + 2 | bar + | baz + + + + + +
+ + + See also about the aggregate + function array_agg for use with arrays. + +
diff --git a/doc/src/sgml/func/func-binarystring.sgml b/doc/src/sgml/func/func-binarystring.sgml new file mode 100644 index 0000000000000..78814ee0685eb --- /dev/null +++ b/doc/src/sgml/func/func-binarystring.sgml @@ -0,0 +1,854 @@ + + Binary String Functions and Operators + + + binary data + functions + + + + This section describes functions and operators for examining and + manipulating binary strings, that is values of type bytea. + Many of these are equivalent, in purpose and syntax, to the + text-string functions described in the previous section. + + + + SQL defines some string functions that use + key words, rather than commas, to separate + arguments. Details are in + . + PostgreSQL also provides versions of these functions + that use the regular function invocation syntax + (see ). + + + + <acronym>SQL</acronym> Binary String Functions and Operators + + + + + Function/Operator + + + Description + + + Example(s) + + + + + + + + + binary string + concatenation + + bytea || bytea + bytea + + + Concatenates the two binary strings. + + + '\x123456'::bytea || '\x789a00bcde'::bytea + \x123456789a00bcde + + + + + + + bit_length + + bit_length ( bytea ) + integer + + + Returns number of bits in the binary string (8 + times the octet_length). + + + bit_length('\x123456'::bytea) + 24 + + + + + + + btrim + + btrim ( bytes bytea, + bytesremoved bytea ) + bytea + + + Removes the longest string containing only bytes appearing in + bytesremoved from the start and end of + bytes. + + + btrim('\x1234567890'::bytea, '\x9012'::bytea) + \x345678 + + + + + + + ltrim + + ltrim ( bytes bytea, + bytesremoved bytea ) + bytea + + + Removes the longest string containing only bytes appearing in + bytesremoved from the start of + bytes. + + + ltrim('\x1234567890'::bytea, '\x9012'::bytea) + \x34567890 + + + + + + + octet_length + + octet_length ( bytea ) + integer + + + Returns number of bytes in the binary string. + + + octet_length('\x123456'::bytea) + 3 + + + + + + + overlay + + overlay ( bytes bytea PLACING newsubstring bytea FROM start integer FOR count integer ) + bytea + + + Replaces the substring of bytes that starts at + the start'th byte and extends + for count bytes + with newsubstring. + If count is omitted, it defaults to the length + of newsubstring. + + + overlay('\x1234567890'::bytea placing '\002\003'::bytea from 2 for 3) + \x12020390 + + + + + + + position + + position ( substring bytea IN bytes bytea ) + integer + + + Returns first starting index of the specified + substring within + bytes, or zero if it's not present. + + + position('\x5678'::bytea in '\x1234567890'::bytea) + 3 + + + + + + + rtrim + + rtrim ( bytes bytea, + bytesremoved bytea ) + bytea + + + Removes the longest string containing only bytes appearing in + bytesremoved from the end of + bytes. + + + rtrim('\x1234567890'::bytea, '\x9012'::bytea) + \x12345678 + + + + + + + substring + + substring ( bytes bytea FROM start integer FOR count integer ) + bytea + + + Extracts the substring of bytes starting at + the start'th byte if that is specified, + and stopping after count bytes if that is + specified. Provide at least one of start + and count. + + + substring('\x1234567890'::bytea from 3 for 2) + \x5678 + + + + + + + trim + + trim ( LEADING | TRAILING | BOTH + bytesremoved bytea FROM + bytes bytea ) + bytea + + + Removes the longest string containing only bytes appearing in + bytesremoved from the start, + end, or both ends (BOTH is the default) + of bytes. + + + trim('\x9012'::bytea from '\x1234567890'::bytea) + \x345678 + + + + + + trim ( LEADING | TRAILING | BOTH FROM + bytes bytea, + bytesremoved bytea ) + bytea + + + This is a non-standard syntax for trim(). + + + trim(both from '\x1234567890'::bytea, '\x9012'::bytea) + \x345678 + + + + +
+ + + Additional binary string manipulation functions are available and + are listed in . Some + of them are used internally to implement the + SQL-standard string functions listed in . + + + + Other Binary String Functions + + + + + Function + + + Description + + + Example(s) + + + + + + + + + bit_count + + + popcount + bit_count + + bit_count ( bytes bytea ) + bigint + + + Returns the number of bits set in the binary string (also known as + popcount). + + + bit_count('\x1234567890'::bytea) + 15 + + + + + + + crc32 + + crc32 ( bytea ) + bigint + + + Computes the CRC-32 value of the binary string. + + + crc32('abc'::bytea) + 891568578 + + + + + + + crc32c + + crc32c ( bytea ) + bigint + + + Computes the CRC-32C value of the binary string. + + + crc32c('abc'::bytea) + 910901175 + + + + + + + get_bit + + get_bit ( bytes bytea, + n bigint ) + integer + + + Extracts n'th bit + from binary string. + + + get_bit('\x1234567890'::bytea, 30) + 1 + + + + + + + get_byte + + get_byte ( bytes bytea, + n integer ) + integer + + + Extracts n'th byte + from binary string. + + + get_byte('\x1234567890'::bytea, 4) + 144 + + + + + + + length + + + binary string + length + + + length + of a binary string + binary strings, length + + length ( bytea ) + integer + + + Returns the number of bytes in the binary string. + + + length('\x1234567890'::bytea) + 5 + + + + + + length ( bytes bytea, + encoding name ) + integer + + + Returns the number of characters in the binary string, assuming + that it is text in the given encoding. + + + length('jose'::bytea, 'UTF8') + 4 + + + + + + + md5 + + md5 ( bytea ) + text + + + Computes the MD5 hash of + the binary string, with the result written in hexadecimal. + + + md5('Th\000omas'::bytea) + 8ab2d3c9689aaf18&zwsp;b4958c334c82d8b1 + + + + + + + reverse + + reverse ( bytea ) + bytea + + + Reverses the order of the bytes in the binary string. + + + reverse('\xabcd'::bytea) + \xcdab + + + + + + + set_bit + + set_bit ( bytes bytea, + n bigint, + newvalue integer ) + bytea + + + Sets n'th bit in + binary string to newvalue. + + + set_bit('\x1234567890'::bytea, 30, 0) + \x1234563890 + + + + + + + set_byte + + set_byte ( bytes bytea, + n integer, + newvalue integer ) + bytea + + + Sets n'th byte in + binary string to newvalue. + + + set_byte('\x1234567890'::bytea, 4, 64) + \x1234567840 + + + + + + + sha224 + + sha224 ( bytea ) + bytea + + + Computes the SHA-224 hash + of the binary string. + + + sha224('abc'::bytea) + \x23097d223405d8228642a477bda2&zwsp;55b32aadbce4bda0b3f7e36c9da7 + + + + + + + sha256 + + sha256 ( bytea ) + bytea + + + Computes the SHA-256 hash + of the binary string. + + + sha256('abc'::bytea) + \xba7816bf8f01cfea414140de5dae2223&zwsp;b00361a396177a9cb410ff61f20015ad + + + + + + + sha384 + + sha384 ( bytea ) + bytea + + + Computes the SHA-384 hash + of the binary string. + + + sha384('abc'::bytea) + \xcb00753f45a35e8bb5a03d699ac65007&zwsp;272c32ab0eded1631a8b605a43ff5bed&zwsp;8086072ba1e7cc2358baeca134c825a7 + + + + + + + sha512 + + sha512 ( bytea ) + bytea + + + Computes the SHA-512 hash + of the binary string. + + + sha512('abc'::bytea) + \xddaf35a193617abacc417349ae204131&zwsp;12e6fa4e89a97ea20a9eeee64b55d39a&zwsp;2192992a274fc1a836ba3c23a3feebbd&zwsp;454d4423643ce80e2a9ac94fa54ca49f + + + + + + + substr + + substr ( bytes bytea, start integer , count integer ) + bytea + + + Extracts the substring of bytes starting at + the start'th byte, + and extending for count bytes if that is + specified. (Same + as substring(bytes + from start + for count).) + + + substr('\x1234567890'::bytea, 3, 2) + \x5678 + + + + +
+ + + Functions get_byte and set_byte + number the first byte of a binary string as byte 0. + Functions get_bit and set_bit + number bits from the right within each byte; for example bit 0 is the least + significant bit of the first byte, and bit 15 is the most significant bit + of the second byte. + + + + For historical reasons, the function md5 + returns a hex-encoded value of type text whereas the SHA-2 + functions return type bytea. Use the functions + encode + and decode to + convert between the two. For example write encode(sha256('abc'), + 'hex') to get a hex-encoded text representation, + or decode(md5('abc'), 'hex') to get + a bytea value. + + + + + character string + converting to binary string + + + binary string + converting to character string + + Functions for converting strings between different character sets + (encodings), and for representing arbitrary binary data in textual + form, are shown in + . For these + functions, an argument or result of type text is expressed + in the database's default encoding, while arguments or results of + type bytea are in an encoding named by another argument. + + + + Text/Binary String Conversion Functions + + + + + Function + + + Description + + + Example(s) + + + + + + + + + convert + + convert ( bytes bytea, + src_encoding name, + dest_encoding name ) + bytea + + + Converts a binary string representing text in + encoding src_encoding + to a binary string in encoding dest_encoding + (see for + available conversions). + + + convert('text_in_utf8', 'UTF8', 'LATIN1') + \x746578745f696e5f75746638 + + + + + + + convert_from + + convert_from ( bytes bytea, + src_encoding name ) + text + + + Converts a binary string representing text in + encoding src_encoding + to text in the database encoding + (see for + available conversions). + + + convert_from('text_in_utf8', 'UTF8') + text_in_utf8 + + + + + + + convert_to + + convert_to ( string text, + dest_encoding name ) + bytea + + + Converts a text string (in the database encoding) to a + binary string encoded in encoding dest_encoding + (see for + available conversions). + + + convert_to('some_text', 'UTF8') + \x736f6d655f74657874 + + + + + + + encode + + encode ( bytes bytea, + format text ) + text + + + Encodes binary data into a textual representation; supported + format values are: + base64, + escape, + hex. + + + encode('123\000\001', 'base64') + MTIzAAE= + + + + + + + decode + + decode ( string text, + format text ) + bytea + + + Decodes binary data from a textual representation; supported + format values are the same as + for encode. + + + decode('MTIzAAE=', 'base64') + \x3132330001 + + + + +
+ + + The encode and decode + functions support the following textual formats: + + + + base64 + + base64 format + + + + The base64 format is that + of RFC + 2045 Section 6.8. As per the RFC, encoded lines are + broken at 76 characters. However instead of the MIME CRLF + end-of-line marker, only a newline is used for end-of-line. + The decode function ignores carriage-return, + newline, space, and tab characters. Otherwise, an error is + raised when decode is supplied invalid + base64 data — including when trailing padding is incorrect. + + + + + + escape + + escape format + + + + The escape format converts zero bytes and + bytes with the high bit set into octal escape sequences + (\nnn), and it doubles + backslashes. Other byte values are represented literally. + The decode function will raise an error if a + backslash is not followed by either a second backslash or three + octal digits; it accepts other byte values unchanged. + + + + + + hex + + hex format + + + + The hex format represents each 4 bits of + data as one hexadecimal digit, 0 + through f, writing the higher-order digit of + each byte first. The encode function outputs + the a-f hex digits in lower + case. Because the smallest unit of data is 8 bits, there are + always an even number of characters returned + by encode. + The decode function + accepts the a-f characters in + either upper or lower case. An error is raised + when decode is given invalid hex data + — including when given an odd number of characters. + + + + + + + + In addition, it is possible to cast integral values to and from type + bytea. Casting an integer to bytea produces + 2, 4, or 8 bytes, depending on the width of the integer type. The result + is the two's complement representation of the integer, with the most + significant byte first. Some examples: + +1234::smallint::bytea \x04d2 +cast(1234 as bytea) \x000004d2 +cast(-1234 as bytea) \xfffffb2e +'\x8000'::bytea::smallint -32768 +'\x8000'::bytea::integer 32768 + + Casting a bytea to an integer will raise an error if the + length of the bytea exceeds the width of the integer type. + + + + See also the aggregate function string_agg in + and the large object functions + in . + +
diff --git a/doc/src/sgml/func/func-bitstring.sgml b/doc/src/sgml/func/func-bitstring.sgml new file mode 100644 index 0000000000000..f03dd63afcc6a --- /dev/null +++ b/doc/src/sgml/func/func-bitstring.sgml @@ -0,0 +1,358 @@ + + Bit String Functions and Operators + + + bit strings + functions + + + + This section describes functions and operators for examining and + manipulating bit strings, that is values of the types + bit and bit varying. (While only + type bit is mentioned in these tables, values of + type bit varying can be used interchangeably.) + Bit strings support the usual comparison operators shown in + , as well as the + operators shown in . + + + + Bit String Operators + + + + + Operator + + + Description + + + Example(s) + + + + + + + + bit || bit + bit + + + Concatenation + + + B'10001' || B'011' + 10001011 + + + + + + bit & bit + bit + + + Bitwise AND (inputs must be of equal length) + + + B'10001' & B'01101' + 00001 + + + + + + bit | bit + bit + + + Bitwise OR (inputs must be of equal length) + + + B'10001' | B'01101' + 11101 + + + + + + bit # bit + bit + + + Bitwise exclusive OR (inputs must be of equal length) + + + B'10001' # B'01101' + 11100 + + + + + + ~ bit + bit + + + Bitwise NOT + + + ~ B'10001' + 01110 + + + + + + bit << integer + bit + + + Bitwise shift left + (string length is preserved) + + + B'10001' << 3 + 01000 + + + + + + bit >> integer + bit + + + Bitwise shift right + (string length is preserved) + + + B'10001' >> 2 + 00100 + + + + +
+ + + Some of the functions available for binary strings are also available + for bit strings, as shown in . + + + + Bit String Functions + + + + + Function + + + Description + + + Example(s) + + + + + + + + + bit_count + + bit_count ( bit ) + bigint + + + Returns the number of bits set in the bit string (also known as + popcount). + + + bit_count(B'10111') + 4 + + + + + + + bit_length + + bit_length ( bit ) + integer + + + Returns number of bits in the bit string. + + + bit_length(B'10111') + 5 + + + + + + + length + + + bit string + length + + length ( bit ) + integer + + + Returns number of bits in the bit string. + + + length(B'10111') + 5 + + + + + + + octet_length + + octet_length ( bit ) + integer + + + Returns number of bytes in the bit string. + + + octet_length(B'1011111011') + 2 + + + + + + + overlay + + overlay ( bits bit PLACING newsubstring bit FROM start integer FOR count integer ) + bit + + + Replaces the substring of bits that starts at + the start'th bit and extends + for count bits + with newsubstring. + If count is omitted, it defaults to the length + of newsubstring. + + + overlay(B'01010101010101010' placing B'11111' from 2 for 3) + 0111110101010101010 + + + + + + + position + + position ( substring bit IN bits bit ) + integer + + + Returns first starting index of the specified substring + within bits, or zero if it's not present. + + + position(B'010' in B'000001101011') + 8 + + + + + + + substring + + substring ( bits bit FROM start integer FOR count integer ) + bit + + + Extracts the substring of bits starting at + the start'th bit if that is specified, + and stopping after count bits if that is + specified. Provide at least one of start + and count. + + + substring(B'110010111111' from 3 for 2) + 00 + + + + + + + get_bit + + get_bit ( bits bit, + n integer ) + integer + + + Extracts n'th bit + from bit string; the first (leftmost) bit is bit 0. + + + get_bit(B'101010101010101010', 6) + 1 + + + + + + + set_bit + + set_bit ( bits bit, + n integer, + newvalue integer ) + bit + + + Sets n'th bit in + bit string to newvalue; + the first (leftmost) bit is bit 0. + + + set_bit(B'101010101010101010', 6, 0) + 101010001010101010 + + + + +
+ + + In addition, it is possible to cast integral values to and from type + bit. + Casting an integer to bit(n) copies the rightmost + n bits. Casting an integer to a bit string width wider + than the integer itself will sign-extend on the left. + Some examples: + +44::bit(10) 0000101100 +44::bit(3) 100 +cast(-44 as bit(12)) 111111010100 +'1110'::bit(4)::integer 14 + + Note that casting to just bit means casting to + bit(1), and so will deliver only the least significant + bit of the integer. + +
diff --git a/doc/src/sgml/func/func-comparison.sgml b/doc/src/sgml/func/func-comparison.sgml new file mode 100644 index 0000000000000..c1205983f8bac --- /dev/null +++ b/doc/src/sgml/func/func-comparison.sgml @@ -0,0 +1,638 @@ + + Comparison Functions and Operators + + + comparison + operators + + + + The usual comparison operators are available, as shown in . + + + + Comparison Operators + + + + Operator + Description + + + + + + + datatype < datatype + boolean + + Less than + + + + + datatype > datatype + boolean + + Greater than + + + + + datatype <= datatype + boolean + + Less than or equal to + + + + + datatype >= datatype + boolean + + Greater than or equal to + + + + + datatype = datatype + boolean + + Equal + + + + + datatype <> datatype + boolean + + Not equal + + + + + datatype != datatype + boolean + + Not equal + + + +
+ + + + <> is the standard SQL notation for not + equal. != is an alias, which is converted + to <> at a very early stage of parsing. + Hence, it is not possible to implement != + and <> operators that do different things. + + + + + These comparison operators are available for all built-in data types + that have a natural ordering, including numeric, string, and date/time + types. In addition, arrays, composite types, and ranges can be compared + if their component data types are comparable. + + + + It is usually possible to compare values of related data + types as well; for example integer > + bigint will work. Some cases of this sort are implemented + directly by cross-type comparison operators, but if no + such operator is available, the parser will coerce the less-general type + to the more-general type and apply the latter's comparison operator. + + + + As shown above, all comparison operators are binary operators that + return values of type boolean. Thus, expressions like + 1 < 2 < 3 are not valid (because there is + no < operator to compare a Boolean value with + 3). Use the BETWEEN predicates + shown below to perform range tests. + + + + There are also some comparison predicates, as shown in . These behave much like + operators, but have special syntax mandated by the SQL standard. + + + + Comparison Predicates + + + + + Predicate + + + Description + + + Example(s) + + + + + + + + datatype BETWEEN datatype AND datatype + boolean + + + Between (inclusive of the range endpoints). + + + 2 BETWEEN 1 AND 3 + t + + + 2 BETWEEN 3 AND 1 + f + + + + + + datatype NOT BETWEEN datatype AND datatype + boolean + + + Not between (the negation of BETWEEN). + + + 2 NOT BETWEEN 1 AND 3 + f + + + + + + datatype BETWEEN SYMMETRIC datatype AND datatype + boolean + + + Between, after sorting the two endpoint values. + + + 2 BETWEEN SYMMETRIC 3 AND 1 + t + + + + + + datatype NOT BETWEEN SYMMETRIC datatype AND datatype + boolean + + + Not between, after sorting the two endpoint values. + + + 2 NOT BETWEEN SYMMETRIC 3 AND 1 + f + + + + + + datatype IS DISTINCT FROM datatype + boolean + + + Not equal, treating null as a comparable value. + + + 1 IS DISTINCT FROM NULL + t (rather than NULL) + + + NULL IS DISTINCT FROM NULL + f (rather than NULL) + + + + + + datatype IS NOT DISTINCT FROM datatype + boolean + + + Equal, treating null as a comparable value. + + + 1 IS NOT DISTINCT FROM NULL + f (rather than NULL) + + + NULL IS NOT DISTINCT FROM NULL + t (rather than NULL) + + + + + + datatype IS NULL + boolean + + + Test whether value is null. + + + 1.5 IS NULL + f + + + + + + datatype IS NOT NULL + boolean + + + Test whether value is not null. + + + 'null' IS NOT NULL + t + + + + + + datatype ISNULL + boolean + + + Test whether value is null (nonstandard syntax). + + + + + + datatype NOTNULL + boolean + + + Test whether value is not null (nonstandard syntax). + + + + + + boolean IS TRUE + boolean + + + Test whether boolean expression yields true. + + + true IS TRUE + t + + + NULL::boolean IS TRUE + f (rather than NULL) + + + + + + boolean IS NOT TRUE + boolean + + + Test whether boolean expression yields false or unknown. + + + true IS NOT TRUE + f + + + NULL::boolean IS NOT TRUE + t (rather than NULL) + + + + + + boolean IS FALSE + boolean + + + Test whether boolean expression yields false. + + + true IS FALSE + f + + + NULL::boolean IS FALSE + f (rather than NULL) + + + + + + boolean IS NOT FALSE + boolean + + + Test whether boolean expression yields true or unknown. + + + true IS NOT FALSE + t + + + NULL::boolean IS NOT FALSE + t (rather than NULL) + + + + + + boolean IS UNKNOWN + boolean + + + Test whether boolean expression yields unknown. + + + true IS UNKNOWN + f + + + NULL::boolean IS UNKNOWN + t (rather than NULL) + + + + + + boolean IS NOT UNKNOWN + boolean + + + Test whether boolean expression yields true or false. + + + true IS NOT UNKNOWN + t + + + NULL::boolean IS NOT UNKNOWN + f (rather than NULL) + + + + +
+ + + + BETWEEN + + + BETWEEN SYMMETRIC + + The BETWEEN predicate simplifies range tests: + +a BETWEEN x AND y + + is equivalent to + +a >= x AND a <= y + + Notice that BETWEEN treats the endpoint values as included + in the range. + BETWEEN SYMMETRIC is like BETWEEN + except there is no requirement that the argument to the left of + AND be less than or equal to the argument on the right. + If it is not, those two arguments are automatically swapped, so that + a nonempty range is always implied. + + + + The various variants of BETWEEN are implemented in + terms of the ordinary comparison operators, and therefore will work for + any data type(s) that can be compared. + + + + + The use of AND in the BETWEEN + syntax creates an ambiguity with the use of AND as a + logical operator. To resolve this, only a limited set of expression + types are allowed as the second argument of a BETWEEN + clause. If you need to write a more complex sub-expression + in BETWEEN, write parentheses around the + sub-expression. + + + + + + IS DISTINCT FROM + + + IS NOT DISTINCT FROM + + Ordinary comparison operators yield null (signifying unknown), + not true or false, when either input is null. For example, + 7 = NULL yields null, as does 7 <> NULL. When + this behavior is not suitable, use the + IS NOT DISTINCT FROM predicates: + +a IS DISTINCT FROM b +a IS NOT DISTINCT FROM b + + For non-null inputs, IS DISTINCT FROM is + the same as the <> operator. However, if both + inputs are null it returns false, and if only one input is + null it returns true. Similarly, IS NOT DISTINCT + FROM is identical to = for non-null + inputs, but it returns true when both inputs are null, and false when only + one input is null. Thus, these predicates effectively act as though null + were a normal data value, rather than unknown. + + + + + IS NULL + + + IS NOT NULL + + + ISNULL + + + NOTNULL + + To check whether a value is or is not null, use the predicates: + +expression IS NULL +expression IS NOT NULL + + or the equivalent, but nonstandard, predicates: + +expression ISNULL +expression NOTNULL + + null valuecomparing + + + + Do not write + expression = NULL + because NULL is not equal to + NULL. (The null value represents an unknown value, + and it is not known whether two unknown values are equal.) + + + + + Some applications might expect that + expression = NULL + returns true if expression evaluates to + the null value. It is highly recommended that these applications + be modified to comply with the SQL standard. However, if that + cannot be done the + configuration variable is available. If it is enabled, + PostgreSQL will convert x = + NULL clauses to x IS NULL. + + + + + If the expression is row-valued, then + IS NULL is true when the row expression itself is null + or when all the row's fields are null, while + IS NOT NULL is true when the row expression itself is non-null + and all the row's fields are non-null. Because of this behavior, + IS NULL and IS NOT NULL do not always return + inverse results for row-valued expressions; in particular, a row-valued + expression that contains both null and non-null fields will return false + for both tests. For example: + + +SELECT ROW(1,2.5,'this is a test') = ROW(1, 3, 'not the same'); + +SELECT ROW(table.*) IS NULL FROM table; -- detect all-null rows + +SELECT ROW(table.*) IS NOT NULL FROM table; -- detect all-non-null rows + +SELECT NOT(ROW(table.*) IS NOT NULL) FROM TABLE; -- detect at least one null in rows + + + In some cases, it may be preferable to + write row IS DISTINCT FROM NULL + or row IS NOT DISTINCT FROM NULL, + which will simply check whether the overall row value is null without any + additional tests on the row fields. + + + + + IS TRUE + + + IS NOT TRUE + + + IS FALSE + + + IS NOT FALSE + + + IS UNKNOWN + + + IS NOT UNKNOWN + + Boolean values can also be tested using the predicates + +boolean_expression IS TRUE +boolean_expression IS NOT TRUE +boolean_expression IS FALSE +boolean_expression IS NOT FALSE +boolean_expression IS UNKNOWN +boolean_expression IS NOT UNKNOWN + + These will always return true or false, never a null value, even when the + operand is null. + A null input is treated as the logical value unknown. + Notice that IS UNKNOWN and IS NOT UNKNOWN are + effectively the same as IS NULL and + IS NOT NULL, respectively, except that the input + expression must be of Boolean type. + + + + Some comparison-related functions are also available, as shown in . + + + + Comparison Functions + + + + + Function + + + Description + + + Example(s) + + + + + + + + + num_nonnulls + + num_nonnulls ( VARIADIC "any" ) + integer + + + Returns the number of non-null arguments. + + + num_nonnulls(1, NULL, 2) + 2 + + + + + + num_nulls + + num_nulls ( VARIADIC "any" ) + integer + + + Returns the number of null arguments. + + + num_nulls(1, NULL, 2) + 1 + + + + +
+ +
diff --git a/doc/src/sgml/func/func-comparisons.sgml b/doc/src/sgml/func/func-comparisons.sgml new file mode 100644 index 0000000000000..6a6e0bd401920 --- /dev/null +++ b/doc/src/sgml/func/func-comparisons.sgml @@ -0,0 +1,336 @@ + + Row and Array Comparisons + + + IN + + + + NOT IN + + + + ANY + + + + ALL + + + + SOME + + + + composite type + comparison + + + + row-wise comparison + + + + comparison + composite type + + + + comparison + row constructor + + + + IS DISTINCT FROM + + + + IS NOT DISTINCT FROM + + + + This section describes several specialized constructs for making + multiple comparisons between groups of values. These forms are + syntactically related to the subquery forms of the previous section, + but do not involve subqueries. + The forms involving array subexpressions are + PostgreSQL extensions; the rest are + SQL-compliant. + All of the expression forms documented in this section return + Boolean (true/false) results. + + + + <literal>IN</literal> + + +expression IN (value , ...) + + + + The right-hand side is a parenthesized list + of expressions. The result is true if the left-hand expression's + result is equal to any of the right-hand expressions. This is a shorthand + notation for + + +expression = value1 +OR +expression = value2 +OR +... + + + + + Note that if the left-hand expression yields null, or if there are + no equal right-hand values and at least one right-hand expression yields + null, the result of the IN construct will be null, not false. + This is in accordance with SQL's normal rules for Boolean combinations + of null values. + + + + + <literal>NOT IN</literal> + + +expression NOT IN (value , ...) + + + + The right-hand side is a parenthesized list + of expressions. The result is true if the left-hand expression's + result is unequal to all of the right-hand expressions. This is a shorthand + notation for + + +expression <> value1 +AND +expression <> value2 +AND +... + + + + + Note that if the left-hand expression yields null, or if there are + no equal right-hand values and at least one right-hand expression yields + null, the result of the NOT IN construct will be null, not true + as one might naively expect. + This is in accordance with SQL's normal rules for Boolean combinations + of null values. + + + + + x NOT IN y is equivalent to NOT (x IN y) in all + cases. However, null values are much more likely to trip up the novice when + working with NOT IN than when working with IN. + It is best to express your condition positively if possible. + + + + + + <literal>ANY</literal>/<literal>SOME</literal> (array) + + +expression operator ANY (array expression) +expression operator SOME (array expression) + + + + The right-hand side is a parenthesized expression, which must yield an + array value. + The left-hand expression + is evaluated and compared to each element of the array using the + given operator, which must yield a Boolean + result. + The result of ANY is true if any true result is obtained. + The result is false if no true result is found (including the + case where the array has zero elements). + + + + If the array expression yields a null array, the result of + ANY will be null. If the left-hand expression yields null, + the result of ANY is ordinarily null (though a non-strict + comparison operator could possibly yield a different result). + Also, if the right-hand array contains any null elements and no true + comparison result is obtained, the result of ANY + will be null, not false (again, assuming a strict comparison operator). + This is in accordance with SQL's normal rules for Boolean combinations + of null values. + + + + SOME is a synonym for ANY. + + + + + <literal>ALL</literal> (array) + + +expression operator ALL (array expression) + + + + The right-hand side is a parenthesized expression, which must yield an + array value. + The left-hand expression + is evaluated and compared to each element of the array using the + given operator, which must yield a Boolean + result. + The result of ALL is true if all comparisons yield true + (including the case where the array has zero elements). + The result is false if any false result is found. + + + + If the array expression yields a null array, the result of + ALL will be null. If the left-hand expression yields null, + the result of ALL is ordinarily null (though a non-strict + comparison operator could possibly yield a different result). + Also, if the right-hand array contains any null elements and no false + comparison result is obtained, the result of ALL + will be null, not true (again, assuming a strict comparison operator). + This is in accordance with SQL's normal rules for Boolean combinations + of null values. + + + + + Row Constructor Comparison + + +row_constructor operator row_constructor + + + + Each side is a row constructor, + as described in . + The two row constructors must have the same number of fields. + The given operator is applied to each pair + of corresponding fields. (Since the fields could be of different + types, this means that a different specific operator could be selected + for each pair.) + All the selected operators must be members of some B-tree operator + class, or be the negator of an = member of a B-tree + operator class, meaning that row constructor comparison is only + possible when the operator is + =, + <>, + <, + <=, + >, or + >=, + or has semantics similar to one of these. + + + + The = and <> cases work slightly differently + from the others. Two rows are considered + equal if all their corresponding members are non-null and equal; the rows + are unequal if any corresponding members are non-null and unequal; + otherwise the result of the row comparison is unknown (null). + + + + For the <, <=, > and + >= cases, the row elements are compared left-to-right, + stopping as soon as an unequal or null pair of elements is found. + If either of this pair of elements is null, the result of the + row comparison is unknown (null); otherwise comparison of this pair + of elements determines the result. For example, + ROW(1,2,NULL) < ROW(1,3,0) + yields true, not null, because the third pair of elements are not + considered. + + + +row_constructor IS DISTINCT FROM row_constructor + + + + This construct is similar to a <> row comparison, + but it does not yield null for null inputs. Instead, any null value is + considered unequal to (distinct from) any non-null value, and any two + nulls are considered equal (not distinct). Thus the result will + either be true or false, never null. + + + +row_constructor IS NOT DISTINCT FROM row_constructor + + + + This construct is similar to a = row comparison, + but it does not yield null for null inputs. Instead, any null value is + considered unequal to (distinct from) any non-null value, and any two + nulls are considered equal (not distinct). Thus the result will always + be either true or false, never null. + + + + + + Composite Type Comparison + + +record operator record + + + + The SQL specification requires row-wise comparison to return NULL if the + result depends on comparing two NULL values or a NULL and a non-NULL. + PostgreSQL does this only when comparing the + results of two row constructors (as in + ) or comparing a row constructor + to the output of a subquery (as in ). + In other contexts where two composite-type values are compared, two + NULL field values are considered equal, and a NULL is considered larger + than a non-NULL. This is necessary in order to have consistent sorting + and indexing behavior for composite types. + + + + Each side is evaluated and they are compared row-wise. Composite type + comparisons are allowed when the operator is + =, + <>, + <, + <=, + > or + >=, + or has semantics similar to one of these. (To be specific, an operator + can be a row comparison operator if it is a member of a B-tree operator + class, or is the negator of the = member of a B-tree operator + class.) The default behavior of the above operators is the same as for + IS [ NOT ] DISTINCT FROM for row constructors (see + ). + + + + To support matching of rows which include elements without a default + B-tree operator class, the following operators are defined for composite + type comparison: + *=, + *<>, + *<, + *<=, + *>, and + *>=. + These operators compare the internal binary representation of the two + rows. Two rows might have a different binary representation even + though comparisons of the two rows with the equality operator is true. + The ordering of rows under these comparison operators is deterministic + but not otherwise meaningful. These operators are used internally + for materialized views and might be useful for other specialized + purposes such as replication and B-Tree deduplication (see ). They are not intended to be + generally useful for writing queries, though. + + + diff --git a/doc/src/sgml/func/func-conditional.sgml b/doc/src/sgml/func/func-conditional.sgml new file mode 100644 index 0000000000000..7ca53dbf1ab03 --- /dev/null +++ b/doc/src/sgml/func/func-conditional.sgml @@ -0,0 +1,283 @@ + + Conditional Expressions + + + CASE + + + + conditional expression + + + + This section describes the SQL-compliant conditional expressions + available in PostgreSQL. + + + + + If your needs go beyond the capabilities of these conditional + expressions, you might want to consider writing a server-side function + in a more expressive programming language. + + + + + + Although COALESCE, GREATEST, and + LEAST are syntactically similar to functions, they are + not ordinary functions, and thus cannot be used with explicit + VARIADIC array arguments. + + + + + <literal>CASE</literal> + + + The SQL CASE expression is a + generic conditional expression, similar to if/else statements in + other programming languages: + + +CASE WHEN condition THEN result + WHEN ... + ELSE result +END + + + CASE clauses can be used wherever + an expression is valid. Each condition is an + expression that returns a boolean result. If the condition's + result is true, the value of the CASE expression is the + result that follows the condition, and the + remainder of the CASE expression is not processed. If the + condition's result is not true, any subsequent WHEN clauses + are examined in the same manner. If no WHEN + condition yields true, the value of the + CASE expression is the result of the + ELSE clause. If the ELSE clause is + omitted and no condition is true, the result is null. + + + + An example: + +SELECT * FROM test; + + a +--- + 1 + 2 + 3 + + +SELECT a, + CASE WHEN a=1 THEN 'one' + WHEN a=2 THEN 'two' + ELSE 'other' + END + FROM test; + + a | case +---+------- + 1 | one + 2 | two + 3 | other + + + + + The data types of all the result + expressions must be convertible to a single output type. + See for more details. + + + + There is a simple form of CASE expression + that is a variant of the general form above: + + +CASE expression + WHEN value THEN result + WHEN ... + ELSE result +END + + + The first + expression is computed, then compared to + each of the value expressions in the + WHEN clauses until one is found that is equal to it. If + no match is found, the result of the + ELSE clause (or a null value) is returned. This is similar + to the switch statement in C. + + + + The example above can be written using the simple + CASE syntax: + +SELECT a, + CASE a WHEN 1 THEN 'one' + WHEN 2 THEN 'two' + ELSE 'other' + END + FROM test; + + a | case +---+------- + 1 | one + 2 | two + 3 | other + + + + + A CASE expression does not evaluate any subexpressions + that are not needed to determine the result. For example, this is a + possible way of avoiding a division-by-zero failure: + +SELECT ... WHERE CASE WHEN x <> 0 THEN y/x > 1.5 ELSE false END; + + + + + + As described in , there are various + situations in which subexpressions of an expression are evaluated at + different times, so that the principle that CASE + evaluates only necessary subexpressions is not ironclad. For + example a constant 1/0 subexpression will usually result in + a division-by-zero failure at planning time, even if it's within + a CASE arm that would never be entered at run time. + + + + + + <literal>COALESCE</literal> + + + COALESCE + + + + NVL + + + + IFNULL + + + +COALESCE(value , ...) + + + + The COALESCE function returns the first of its + arguments that is not null. Null is returned only if all arguments + are null. It is often used to substitute a default value for + null values when data is retrieved for display, for example: + +SELECT COALESCE(description, short_description, '(none)') ... + + This returns description if it is not null, otherwise + short_description if it is not null, otherwise (none). + + + + The arguments must all be convertible to a common data type, which + will be the type of the result (see + for details). + + + + Like a CASE expression, COALESCE only + evaluates the arguments that are needed to determine the result; + that is, arguments to the right of the first non-null argument are + not evaluated. This SQL-standard function provides capabilities similar + to NVL and IFNULL, which are used in some other + database systems. + + + + + <literal>NULLIF</literal> + + + NULLIF + + + +NULLIF(value1, value2) + + + + The NULLIF function returns a null value if + value1 equals value2; + otherwise it returns value1. + This can be used to perform the inverse operation of the + COALESCE example given above: + +SELECT NULLIF(value, '(none)') ... + + In this example, if value is (none), + null is returned, otherwise the value of value + is returned. + + + + The two arguments must be of comparable types. + To be specific, they are compared exactly as if you had + written value1 + = value2, so there must be a + suitable = operator available. + + + + The result has the same type as the first argument — but there is + a subtlety. What is actually returned is the first argument of the + implied = operator, and in some cases that will have + been promoted to match the second argument's type. For + example, NULLIF(1, 2.2) yields numeric, + because there is no integer = + numeric operator, + only numeric = numeric. + + + + + + <literal>GREATEST</literal> and <literal>LEAST</literal> + + + GREATEST + + + LEAST + + + +GREATEST(value , ...) + + +LEAST(value , ...) + + + + The GREATEST and LEAST functions select the + largest or smallest value from a list of any number of expressions. + The expressions must all be convertible to a common data type, which + will be the type of the result + (see for details). + + + + NULL values in the argument list are ignored. The result will be NULL + only if all the expressions evaluate to NULL. (This is a deviation from + the SQL standard. According to the standard, the return value is NULL if + any argument is NULL. Some other databases behave this way.) + + + diff --git a/doc/src/sgml/func/func-datetime.sgml b/doc/src/sgml/func/func-datetime.sgml new file mode 100644 index 0000000000000..482fe45f42ebc --- /dev/null +++ b/doc/src/sgml/func/func-datetime.sgml @@ -0,0 +1,2200 @@ + + Date/Time Functions and Operators + + + shows the available + functions for date/time value processing, with details appearing in + the following subsections. illustrates the behaviors of + the basic arithmetic operators (+, + *, etc.). For formatting functions, refer to + . You should be familiar with + the background information on date/time data types from . + + + + In addition, the usual comparison operators shown in + are available for the + date/time types. Dates and timestamps (with or without time zone) are + all comparable, while times (with or without time zone) and intervals + can only be compared to other values of the same data type. When + comparing a timestamp without time zone to a timestamp with time zone, + the former value is assumed to be given in the time zone specified by + the configuration parameter, and is + rotated to UTC for comparison to the latter value (which is already + in UTC internally). Similarly, a date value is assumed to represent + midnight in the TimeZone zone when comparing it + to a timestamp. + + + + All the functions and operators described below that take time or timestamp + inputs actually come in two variants: one that takes time with time zone or timestamp + with time zone, and one that takes time without time zone or timestamp without time zone. + For brevity, these variants are not shown separately. Also, the + + and * operators come in commutative pairs (for + example both date + integer + and integer + date); we show + only one of each such pair. + + + + Date/Time Operators + + + + + + Operator + + + Description + + + Example(s) + + + + + + + + date + integer + date + + + Add a number of days to a date + + + date '2001-09-28' + 7 + 2001-10-05 + + + + + + date + interval + timestamp + + + Add an interval to a date + + + date '2001-09-28' + interval '1 hour' + 2001-09-28 01:00:00 + + + + + + date + time + timestamp + + + Add a time-of-day to a date + + + date '2001-09-28' + time '03:00' + 2001-09-28 03:00:00 + + + + + + interval + interval + interval + + + Add intervals + + + interval '1 day' + interval '1 hour' + 1 day 01:00:00 + + + + + + timestamp + interval + timestamp + + + Add an interval to a timestamp + + + timestamp '2001-09-28 01:00' + interval '23 hours' + 2001-09-29 00:00:00 + + + + + + time + interval + time + + + Add an interval to a time + + + time '01:00' + interval '3 hours' + 04:00:00 + + + + + + - interval + interval + + + Negate an interval + + + - interval '23 hours' + -23:00:00 + + + + + + date - date + integer + + + Subtract dates, producing the number of days elapsed + + + date '2001-10-01' - date '2001-09-28' + 3 + + + + + + date - integer + date + + + Subtract a number of days from a date + + + date '2001-10-01' - 7 + 2001-09-24 + + + + + + date - interval + timestamp + + + Subtract an interval from a date + + + date '2001-09-28' - interval '1 hour' + 2001-09-27 23:00:00 + + + + + + time - time + interval + + + Subtract times + + + time '05:00' - time '03:00' + 02:00:00 + + + + + + time - interval + time + + + Subtract an interval from a time + + + time '05:00' - interval '2 hours' + 03:00:00 + + + + + + timestamp - interval + timestamp + + + Subtract an interval from a timestamp + + + timestamp '2001-09-28 23:00' - interval '23 hours' + 2001-09-28 00:00:00 + + + + + + interval - interval + interval + + + Subtract intervals + + + interval '1 day' - interval '1 hour' + 1 day -01:00:00 + + + + + + timestamp - timestamp + interval + + + Subtract timestamps (converting 24-hour intervals into days, + similarly to justify_hours()) + + + timestamp '2001-09-29 03:00' - timestamp '2001-07-27 12:00' + 63 days 15:00:00 + + + + + + interval * double precision + interval + + + Multiply an interval by a scalar + + + interval '1 second' * 900 + 00:15:00 + + + interval '1 day' * 21 + 21 days + + + interval '1 hour' * 3.5 + 03:30:00 + + + + + + interval / double precision + interval + + + Divide an interval by a scalar + + + interval '1 hour' / 1.5 + 00:40:00 + + + + +
+ + + Date/Time Functions + + + + + Function + + + Description + + + Example(s) + + + + + + + + + age + + age ( timestamp, timestamp ) + interval + + + Subtract arguments, producing a symbolic result that + uses years and months, rather than just days + + + age(timestamp '2001-04-10', timestamp '1957-06-13') + 43 years 9 mons 27 days + + + + + + age ( timestamp ) + interval + + + Subtract argument from current_date (at midnight) + + + age(timestamp '1957-06-13') + 62 years 6 mons 10 days + + + + + + + clock_timestamp + + clock_timestamp ( ) + timestamp with time zone + + + Current date and time (changes during statement execution); + see + + + clock_timestamp() + 2019-12-23 14:39:53.662522-05 + + + + + + + current_date + + current_date + date + + + Current date; see + + + current_date + 2019-12-23 + + + + + + + current_time + + current_time + time with time zone + + + Current time of day; see + + + current_time + 14:39:53.662522-05 + + + + + + current_time ( integer ) + time with time zone + + + Current time of day, with limited precision; + see + + + current_time(2) + 14:39:53.66-05 + + + + + + + current_timestamp + + current_timestamp + timestamp with time zone + + + Current date and time (start of current transaction); + see + + + current_timestamp + 2019-12-23 14:39:53.662522-05 + + + + + + current_timestamp ( integer ) + timestamp with time zone + + + Current date and time (start of current transaction), with limited precision; + see + + + current_timestamp(0) + 2019-12-23 14:39:53-05 + + + + + + + date_add + + date_add ( timestamp with time zone, interval , text ) + timestamp with time zone + + + Add an interval to a timestamp with time + zone, computing times of day and daylight-savings adjustments + according to the time zone named by the third argument, or the + current setting if that is omitted. + The form with two arguments is equivalent to the timestamp with + time zone + interval operator. + + + date_add('2021-10-31 00:00:00+02'::timestamptz, '1 day'::interval, 'Europe/Warsaw') + 2021-10-31 23:00:00+00 + + + + + + date_bin ( interval, timestamp, timestamp ) + timestamp + + + Bin input into specified interval aligned with specified origin; see + + + date_bin('15 minutes', timestamp '2001-02-16 20:38:40', timestamp '2001-02-16 20:05:00') + 2001-02-16 20:35:00 + + + + + + + date_part + + date_part ( text, timestamp ) + double precision + + + Get timestamp subfield (equivalent to extract); + see + + + date_part('hour', timestamp '2001-02-16 20:38:40') + 20 + + + + + + date_part ( text, interval ) + double precision + + + Get interval subfield (equivalent to extract); + see + + + date_part('month', interval '2 years 3 months') + 3 + + + + + + + date_subtract + + date_subtract ( timestamp with time zone, interval , text ) + timestamp with time zone + + + Subtract an interval from a timestamp with time + zone, computing times of day and daylight-savings adjustments + according to the time zone named by the third argument, or the + current setting if that is omitted. + The form with two arguments is equivalent to the timestamp with + time zone - interval operator. + + + date_subtract('2021-11-01 00:00:00+01'::timestamptz, '1 day'::interval, 'Europe/Warsaw') + 2021-10-30 22:00:00+00 + + + + + + + date_trunc + + date_trunc ( text, timestamp ) + timestamp + + + Truncate to specified precision; see + + + date_trunc('hour', timestamp '2001-02-16 20:38:40') + 2001-02-16 20:00:00 + + + + + + date_trunc ( text, timestamp with time zone, text ) + timestamp with time zone + + + Truncate to specified precision in the specified time zone; see + + + + date_trunc('day', timestamptz '2001-02-16 20:38:40+00', 'Australia/Sydney') + 2001-02-16 13:00:00+00 + + + + + + date_trunc ( text, interval ) + interval + + + Truncate to specified precision; see + + + + date_trunc('hour', interval '2 days 3 hours 40 minutes') + 2 days 03:00:00 + + + + + + + extract + + extract ( field from timestamp ) + numeric + + + Get timestamp subfield; see + + + extract(hour from timestamp '2001-02-16 20:38:40') + 20 + + + + + + extract ( field from interval ) + numeric + + + Get interval subfield; see + + + extract(month from interval '2 years 3 months') + 3 + + + + + + + isfinite + + isfinite ( date ) + boolean + + + Test for finite date (not +/-infinity) + + + isfinite(date '2001-02-16') + true + + + + + + isfinite ( timestamp ) + boolean + + + Test for finite timestamp (not +/-infinity) + + + isfinite(timestamp 'infinity') + false + + + + + + isfinite ( interval ) + boolean + + + Test for finite interval (not +/-infinity) + + + isfinite(interval '4 hours') + true + + + + + + + justify_days + + justify_days ( interval ) + interval + + + Adjust interval, converting 30-day time periods to months + + + justify_days(interval '1 year 65 days') + 1 year 2 mons 5 days + + + + + + + justify_hours + + justify_hours ( interval ) + interval + + + Adjust interval, converting 24-hour time periods to days + + + justify_hours(interval '50 hours 10 minutes') + 2 days 02:10:00 + + + + + + + justify_interval + + justify_interval ( interval ) + interval + + + Adjust interval using justify_days + and justify_hours, with additional sign + adjustments + + + justify_interval(interval '1 mon -1 hour') + 29 days 23:00:00 + + + + + + + localtime + + localtime + time + + + Current time of day; + see + + + localtime + 14:39:53.662522 + + + + + + localtime ( integer ) + time + + + Current time of day, with limited precision; + see + + + localtime(0) + 14:39:53 + + + + + + + localtimestamp + + localtimestamp + timestamp + + + Current date and time (start of current transaction); + see + + + localtimestamp + 2019-12-23 14:39:53.662522 + + + + + + localtimestamp ( integer ) + timestamp + + + Current date and time (start of current + transaction), with limited precision; + see + + + localtimestamp(2) + 2019-12-23 14:39:53.66 + + + + + + + make_date + + make_date ( year int, + month int, + day int ) + date + + + Create date from year, month and day fields + (negative years signify BC) + + + make_date(2013, 7, 15) + 2013-07-15 + + + + + + make_interval + + make_interval ( years int + , months int + , weeks int + , days int + , hours int + , mins int + , secs double precision + ) + interval + + + Create interval from years, months, weeks, days, hours, minutes and + seconds fields, each of which can default to zero + + + make_interval(days => 10) + 10 days + + + + + + + make_time + + make_time ( hour int, + min int, + sec double precision ) + time + + + Create time from hour, minute and seconds fields + + + make_time(8, 15, 23.5) + 08:15:23.5 + + + + + + + make_timestamp + + make_timestamp ( year int, + month int, + day int, + hour int, + min int, + sec double precision ) + timestamp + + + Create timestamp from year, month, day, hour, minute and seconds fields + (negative years signify BC) + + + make_timestamp(2013, 7, 15, 8, 15, 23.5) + 2013-07-15 08:15:23.5 + + + + + + + make_timestamptz + + make_timestamptz ( year int, + month int, + day int, + hour int, + min int, + sec double precision + , timezone text ) + timestamp with time zone + + + Create timestamp with time zone from year, month, day, hour, minute + and seconds fields (negative years signify BC). + If timezone is not + specified, the current time zone is used; the examples assume the + session time zone is Europe/London + + + make_timestamptz(2013, 7, 15, 8, 15, 23.5) + 2013-07-15 08:15:23.5+01 + + + make_timestamptz(2013, 7, 15, 8, 15, 23.5, 'America/New_York') + 2013-07-15 13:15:23.5+01 + + + + + + + now + + now ( ) + timestamp with time zone + + + Current date and time (start of current transaction); + see + + + now() + 2019-12-23 14:39:53.662522-05 + + + + + + + statement_timestamp + + statement_timestamp ( ) + timestamp with time zone + + + Current date and time (start of current statement); + see + + + statement_timestamp() + 2019-12-23 14:39:53.662522-05 + + + + + + + timeofday + + timeofday ( ) + text + + + Current date and time + (like clock_timestamp, but as a text string); + see + + + timeofday() + Mon Dec 23 14:39:53.662522 2019 EST + + + + + + + transaction_timestamp + + transaction_timestamp ( ) + timestamp with time zone + + + Current date and time (start of current transaction); + see + + + transaction_timestamp() + 2019-12-23 14:39:53.662522-05 + + + + + + + to_timestamp + + to_timestamp ( double precision ) + timestamp with time zone + + + Convert Unix epoch (seconds since 1970-01-01 00:00:00+00) to + timestamp with time zone + + + to_timestamp(1284352323) + 2010-09-13 04:32:03+00 + + + + +
+ + + + OVERLAPS + + In addition to these functions, the SQL OVERLAPS operator is + supported: + +(start1, end1) OVERLAPS (start2, end2) +(start1, length1) OVERLAPS (start2, length2) + + This expression yields true when two time periods (defined by their + endpoints) overlap, false when they do not overlap. The endpoints + can be specified as pairs of dates, times, or time stamps; or as + a date, time, or time stamp followed by an interval. When a pair + of values is provided, either the start or the end can be written + first; OVERLAPS automatically takes the earlier value + of the pair as the start. Each time period is considered to + represent the half-open interval start <= + time < end, unless + start and end are equal in which case it + represents that single time instant. This means for instance that two + time periods with only an endpoint in common do not overlap. + + + +SELECT (DATE '2001-02-16', DATE '2001-12-21') OVERLAPS + (DATE '2001-10-30', DATE '2002-10-30'); +Result: true +SELECT (DATE '2001-02-16', INTERVAL '100 days') OVERLAPS + (DATE '2001-10-30', DATE '2002-10-30'); +Result: false +SELECT (DATE '2001-10-29', DATE '2001-10-30') OVERLAPS + (DATE '2001-10-30', DATE '2001-10-31'); +Result: false +SELECT (DATE '2001-10-30', DATE '2001-10-30') OVERLAPS + (DATE '2001-10-30', DATE '2001-10-31'); +Result: true + + + + When adding an interval value to (or subtracting an + interval value from) a timestamp + or timestamp with time zone value, the months, days, and + microseconds fields of the interval value are handled in turn. + First, a nonzero months field advances or decrements the date of the + timestamp by the indicated number of months, keeping the day of month the + same unless it would be past the end of the new month, in which case the + last day of that month is used. (For example, March 31 plus 1 month + becomes April 30, but March 31 plus 2 months becomes May 31.) + Then the days field advances or decrements the date of the timestamp by + the indicated number of days. In both these steps the local time of day + is kept the same. Finally, if there is a nonzero microseconds field, it + is added or subtracted literally. + When doing arithmetic on a timestamp with time zone value in + a time zone that recognizes DST, this means that adding or subtracting + (say) interval '1 day' does not necessarily have the + same result as adding or subtracting interval '24 + hours'. + For example, with the session time zone set + to America/Denver: + +SELECT timestamp with time zone '2005-04-02 12:00:00-07' + interval '1 day'; +Result: 2005-04-03 12:00:00-06 +SELECT timestamp with time zone '2005-04-02 12:00:00-07' + interval '24 hours'; +Result: 2005-04-03 13:00:00-06 + + This happens because an hour was skipped due to a change in daylight saving + time at 2005-04-03 02:00:00 in time zone + America/Denver. + + + + Note there can be ambiguity in the months field returned by + age because different months have different numbers of + days. PostgreSQL's approach uses the month from the + earlier of the two dates when calculating partial months. For example, + age('2004-06-01', '2004-04-30') uses April to yield + 1 mon 1 day, while using May would yield 1 mon 2 + days because May has 31 days, while April has only 30. + + + + Subtraction of dates and timestamps can also be complex. One conceptually + simple way to perform subtraction is to convert each value to a number + of seconds using EXTRACT(EPOCH FROM ...), then subtract the + results; this produces the + number of seconds between the two values. This will adjust + for the number of days in each month, timezone changes, and daylight + saving time adjustments. Subtraction of date or timestamp + values with the - operator + returns the number of days (24-hours) and hours/minutes/seconds + between the values, making the same adjustments. The age + function returns years, months, days, and hours/minutes/seconds, + performing field-by-field subtraction and then adjusting for negative + field values. The following queries illustrate the differences in these + approaches. The sample results were produced with timezone + = 'US/Eastern'; there is a daylight saving time change between the + two dates used: + + + +SELECT EXTRACT(EPOCH FROM timestamptz '2013-07-01 12:00:00') - + EXTRACT(EPOCH FROM timestamptz '2013-03-01 12:00:00'); +Result: 10537200.000000 +SELECT (EXTRACT(EPOCH FROM timestamptz '2013-07-01 12:00:00') - + EXTRACT(EPOCH FROM timestamptz '2013-03-01 12:00:00')) + / 60 / 60 / 24; +Result: 121.9583333333333333 +SELECT timestamptz '2013-07-01 12:00:00' - timestamptz '2013-03-01 12:00:00'; +Result: 121 days 23:00:00 +SELECT age(timestamptz '2013-07-01 12:00:00', timestamptz '2013-03-01 12:00:00'); +Result: 4 mons + + + + <function>EXTRACT</function>, <function>date_part</function> + + + date_part + + + extract + + + +EXTRACT(field FROM source) + + + + The extract function retrieves subfields + such as year or hour from date/time values. + source must be a value expression of + type timestamp, date, time, + or interval. (Timestamps and times can be with or + without time zone.) + field is an identifier or + string that selects what field to extract from the source value. + Not all fields are valid for every input data type; for example, fields + smaller than a day cannot be extracted from a date, while + fields of a day or more cannot be extracted from a time. + The extract function returns values of type + numeric. + + + + The following are valid field names: + + + + + century + + + The century; for interval values, the year field + divided by 100 + + + +SELECT EXTRACT(CENTURY FROM TIMESTAMP '2000-12-16 12:21:13'); +Result: 20 +SELECT EXTRACT(CENTURY FROM TIMESTAMP '2001-02-16 20:38:40'); +Result: 21 +SELECT EXTRACT(CENTURY FROM DATE '0001-01-01 AD'); +Result: 1 +SELECT EXTRACT(CENTURY FROM DATE '0001-12-31 BC'); +Result: -1 +SELECT EXTRACT(CENTURY FROM INTERVAL '2001 years'); +Result: 20 + + + + + + day + + + The day of the month (1–31); for interval + values, the number of days + + + +SELECT EXTRACT(DAY FROM TIMESTAMP '2001-02-16 20:38:40'); +Result: 16 +SELECT EXTRACT(DAY FROM INTERVAL '40 days 1 minute'); +Result: 40 + + + + + + + decade + + + The year field divided by 10 + + + +SELECT EXTRACT(DECADE FROM TIMESTAMP '2001-02-16 20:38:40'); +Result: 200 + + + + + + dow + + + The day of the week as Sunday (0) to + Saturday (6) + + + +SELECT EXTRACT(DOW FROM TIMESTAMP '2001-02-16 20:38:40'); +Result: 5 + + + Note that extract's day of the week numbering + differs from that of the to_char(..., + 'D') function. + + + + + + + doy + + + The day of the year (1–365/366) + + + +SELECT EXTRACT(DOY FROM TIMESTAMP '2001-02-16 20:38:40'); +Result: 47 + + + + + + epoch + + + For timestamp with time zone values, the + number of seconds since 1970-01-01 00:00:00 UTC (negative for + timestamps before that); + for date and timestamp values, the + nominal number of seconds since 1970-01-01 00:00:00, + without regard to timezone or daylight-savings rules; + for interval values, the total number + of seconds in the interval + + + +SELECT EXTRACT(EPOCH FROM TIMESTAMP WITH TIME ZONE '2001-02-16 20:38:40.12-08'); +Result: 982384720.120000 +SELECT EXTRACT(EPOCH FROM TIMESTAMP '2001-02-16 20:38:40.12'); +Result: 982355920.120000 +SELECT EXTRACT(EPOCH FROM INTERVAL '5 days 3 hours'); +Result: 442800.000000 + + + + You can convert an epoch value back to a timestamp with time zone + with to_timestamp: + + +SELECT to_timestamp(982384720.12); +Result: 2001-02-17 04:38:40.12+00 + + + + Beware that applying to_timestamp to an epoch + extracted from a date or timestamp value + could produce a misleading result: the result will effectively + assume that the original value had been given in UTC, which might + not be the case. + + + + + + hour + + + The hour field (0–23 in timestamps, unrestricted in + intervals) + + + +SELECT EXTRACT(HOUR FROM TIMESTAMP '2001-02-16 20:38:40'); +Result: 20 + + + + + + isodow + + + The day of the week as Monday (1) to + Sunday (7) + + + +SELECT EXTRACT(ISODOW FROM TIMESTAMP '2001-02-18 20:38:40'); +Result: 7 + + + This is identical to dow except for Sunday. This + matches the ISO 8601 day of the week numbering. + + + + + + + isoyear + + + The ISO 8601 week-numbering year that the date + falls in + + + +SELECT EXTRACT(ISOYEAR FROM DATE '2006-01-01'); +Result: 2005 +SELECT EXTRACT(ISOYEAR FROM DATE '2006-01-02'); +Result: 2006 + + + + Each ISO 8601 week-numbering year begins with the + Monday of the week containing the 4th of January, so in early + January or late December the ISO year may be + different from the Gregorian year. See the week + field for more information. + + + + + + julian + + + The Julian Date corresponding to the + date or timestamp. Timestamps + that are not local midnight result in a fractional value. See + for more information. + + + +SELECT EXTRACT(JULIAN FROM DATE '2006-01-01'); +Result: 2453737 +SELECT EXTRACT(JULIAN FROM TIMESTAMP '2006-01-01 12:00'); +Result: 2453737.50000000000000000000 + + + + + + microseconds + + + The seconds field, including fractional parts, multiplied by 1 + 000 000; note that this includes full seconds + + + +SELECT EXTRACT(MICROSECONDS FROM TIME '17:12:28.5'); +Result: 28500000 + + + + + + millennium + + + The millennium; for interval values, the year field + divided by 1000 + + + +SELECT EXTRACT(MILLENNIUM FROM TIMESTAMP '2001-02-16 20:38:40'); +Result: 3 +SELECT EXTRACT(MILLENNIUM FROM INTERVAL '2001 years'); +Result: 2 + + + + Years in the 1900s are in the second millennium. + The third millennium started January 1, 2001. + + + + + + milliseconds + + + The seconds field, including fractional parts, multiplied by + 1000. Note that this includes full seconds. + + + +SELECT EXTRACT(MILLISECONDS FROM TIME '17:12:28.5'); +Result: 28500.000 + + + + + + minute + + + The minutes field (0–59) + + + +SELECT EXTRACT(MINUTE FROM TIMESTAMP '2001-02-16 20:38:40'); +Result: 38 + + + + + + month + + + The number of the month within the year (1–12); + for interval values, the number of months modulo 12 + (0–11) + + + +SELECT EXTRACT(MONTH FROM TIMESTAMP '2001-02-16 20:38:40'); +Result: 2 +SELECT EXTRACT(MONTH FROM INTERVAL '2 years 3 months'); +Result: 3 +SELECT EXTRACT(MONTH FROM INTERVAL '2 years 13 months'); +Result: 1 + + + + + + quarter + + + The quarter of the year (1–4) that the date is in; + for interval values, the month field divided by 3 + plus 1 + + + +SELECT EXTRACT(QUARTER FROM TIMESTAMP '2001-02-16 20:38:40'); +Result: 1 +SELECT EXTRACT(QUARTER FROM INTERVAL '1 year 6 months'); +Result: 3 + + + + + + second + + + The seconds field, including any fractional seconds + + + +SELECT EXTRACT(SECOND FROM TIMESTAMP '2001-02-16 20:38:40'); +Result: 40.000000 +SELECT EXTRACT(SECOND FROM TIME '17:12:28.5'); +Result: 28.500000 + + + + + timezone + + + The time zone offset from UTC, measured in seconds. Positive values + correspond to time zones east of UTC, negative values to + zones west of UTC. (Technically, + PostgreSQL does not use UTC because + leap seconds are not handled.) + + + + + + timezone_hour + + + The hour component of the time zone offset + + + + + + timezone_minute + + + The minute component of the time zone offset + + + + + + week + + + The number of the ISO 8601 week-numbering week of + the year. By definition, ISO weeks start on Mondays and the first + week of a year contains January 4 of that year. In other words, the + first Thursday of a year is in week 1 of that year. + + + In the ISO week-numbering system, it is possible for early-January + dates to be part of the 52nd or 53rd week of the previous year, and for + late-December dates to be part of the first week of the next year. + For example, 2005-01-01 is part of the 53rd week of year + 2004, and 2006-01-01 is part of the 52nd week of year + 2005, while 2012-12-31 is part of the first week of 2013. + It's recommended to use the isoyear field together with + week to get consistent results. + + + + For interval values, the week field is simply the number + of integral days divided by 7. + + + +SELECT EXTRACT(WEEK FROM TIMESTAMP '2001-02-16 20:38:40'); +Result: 7 +SELECT EXTRACT(WEEK FROM INTERVAL '13 days 24 hours'); +Result: 1 + + + + + + year + + + The year field. Keep in mind there is no 0 AD, so subtracting + BC years from AD years should be done with care. + + + +SELECT EXTRACT(YEAR FROM TIMESTAMP '2001-02-16 20:38:40'); +Result: 2001 + + + + + + + + + When processing an interval value, + the extract function produces field values that + match the interpretation used by the interval output function. This + can produce surprising results if one starts with a non-normalized + interval representation, for example: + +SELECT INTERVAL '80 minutes'; +Result: 01:20:00 +SELECT EXTRACT(MINUTES FROM INTERVAL '80 minutes'); +Result: 20 + + + + + + When the input value is +/-Infinity, extract returns + +/-Infinity for monotonically-increasing fields (epoch, + julian, year, isoyear, + decade, century, and millennium + for timestamp inputs; epoch, hour, + day, year, decade, + century, and millennium for + interval inputs). + For other fields, NULL is returned. PostgreSQL + versions before 9.6 returned zero for all cases of infinite input. + + + + + The extract function is primarily intended + for computational processing. For formatting date/time values for + display, see . + + + + The date_part function is modeled on the traditional + Ingres equivalent to the + SQL-standard function extract: + +date_part('field', source) + + Note that here the field parameter needs to + be a string value, not a name. The valid field names for + date_part are the same as for + extract. + For historical reasons, the date_part function + returns values of type double precision. This can result in + a loss of precision in certain uses. Using extract + is recommended instead. + + + +SELECT date_part('day', TIMESTAMP '2001-02-16 20:38:40'); +Result: 16 +SELECT date_part('hour', INTERVAL '4 hours 3 minutes'); +Result: 4 + + + + + + <function>date_trunc</function> + + + date_trunc + + + + The function date_trunc is conceptually + similar to the trunc function for numbers. + + + + +date_trunc(field, source , time_zone ) + + source is a value expression of type + timestamp, timestamp with time zone, + or interval. + (Values of type date and + time are cast automatically to timestamp or + interval, respectively.) + field selects to which precision to + truncate the input value. The return value is likewise of type + timestamp, timestamp with time zone, + or interval, + and it has all fields that are less significant than the + selected one set to zero (or one, for day and month). + + + + Valid values for field are: + + microseconds + milliseconds + second + minute + hour + day + week + month + quarter + year + decade + century + millennium + + + + + When the input value is of type timestamp with time zone, + the truncation is performed with respect to a particular time zone; + for example, truncation to day produces a value that + is midnight in that zone. By default, truncation is done with respect + to the current setting, but the + optional time_zone argument can be provided + to specify a different time zone. The time zone name can be specified + in any of the ways described in . + + + + A time zone cannot be specified when processing timestamp without + time zone or interval inputs. These are always + taken at face value. + + + + Examples (assuming the local time zone is America/New_York): + +SELECT date_trunc('hour', TIMESTAMP '2001-02-16 20:38:40'); +Result: 2001-02-16 20:00:00 +SELECT date_trunc('year', TIMESTAMP '2001-02-16 20:38:40'); +Result: 2001-01-01 00:00:00 +SELECT date_trunc('day', TIMESTAMP WITH TIME ZONE '2001-02-16 20:38:40+00'); +Result: 2001-02-16 00:00:00-05 +SELECT date_trunc('day', TIMESTAMP WITH TIME ZONE '2001-02-16 20:38:40+00', 'Australia/Sydney'); +Result: 2001-02-16 08:00:00-05 +SELECT date_trunc('hour', INTERVAL '3 days 02:47:33'); +Result: 3 days 02:00:00 + + + + + + <function>date_bin</function> + + + date_bin + + + + The function date_bin bins the input + timestamp into the specified interval (the stride) + aligned with a specified origin. + + + + +date_bin(stride, source, origin) + + source is a value expression of type + timestamp or timestamp with time zone. (Values + of type date are cast automatically to + timestamp.) stride is a value + expression of type interval. The return value is likewise + of type timestamp or timestamp with time zone, + and it marks the beginning of the bin into which the + source is placed. + + + + Examples: + +SELECT date_bin('15 minutes', TIMESTAMP '2020-02-11 15:44:17', TIMESTAMP '2001-01-01'); +Result: 2020-02-11 15:30:00 +SELECT date_bin('15 minutes', TIMESTAMP '2020-02-11 15:44:17', TIMESTAMP '2001-01-01 00:02:30'); +Result: 2020-02-11 15:32:30 + + + + + In the case of full units (1 minute, 1 hour, etc.), it gives the same result as + the analogous date_trunc call, but the difference is + that date_bin can truncate to an arbitrary interval. + + + + The stride interval must be greater than zero and + cannot contain units of month or larger. + + + + + <literal>AT TIME ZONE</literal> and <literal>AT LOCAL</literal> + + + time zone + conversion + + + + AT TIME ZONE + + + + AT LOCAL + + + + The AT TIME ZONE operator converts time + stamp without time zone to/from + time stamp with time zone, and + time with time zone values to different time + zones. shows its + variants. + + + + <literal>AT TIME ZONE</literal> and <literal>AT LOCAL</literal> Variants + + + + + Operator + + + Description + + + Example(s) + + + + + + + + timestamp without time zone AT TIME ZONE zone + timestamp with time zone + + + Converts given time stamp without time zone to + time stamp with time zone, assuming the given + value is in the named time zone. + + + timestamp '2001-02-16 20:38:40' at time zone 'America/Denver' + 2001-02-17 03:38:40+00 + + + + + + timestamp without time zone AT LOCAL + timestamp with time zone + + + Converts given time stamp without time zone to + time stamp with the session's + TimeZone value as time zone. + + + timestamp '2001-02-16 20:38:40' at local + 2001-02-17 03:38:40+00 + + + + + + timestamp with time zone AT TIME ZONE zone + timestamp without time zone + + + Converts given time stamp with time zone to + time stamp without time zone, as the time would + appear in that zone. + + + timestamp with time zone '2001-02-16 20:38:40-05' at time zone 'America/Denver' + 2001-02-16 18:38:40 + + + + + + timestamp with time zone AT LOCAL + timestamp without time zone + + + Converts given time stamp with time zone to + time stamp without time zone, as the time would + appear with the session's TimeZone value as time zone. + + + timestamp with time zone '2001-02-16 20:38:40-05' at local + 2001-02-16 18:38:40 + + + + + + time with time zone AT TIME ZONE zone + time with time zone + + + Converts given time with time zone to a new time + zone. Since no date is supplied, this uses the currently active UTC + offset for the named destination zone. + + + time with time zone '05:34:17-05' at time zone 'UTC' + 10:34:17+00 + + + + + + time with time zone AT LOCAL + time with time zone + + + Converts given time with time zone to a new time + zone. Since no date is supplied, this uses the currently active UTC + offset for the session's TimeZone value. + + + Assuming the session's TimeZone is set to UTC: + + + time with time zone '05:34:17-05' at local + 10:34:17+00 + + + + +
+ + + In these expressions, the desired time zone zone can be + specified either as a text value (e.g., 'America/Los_Angeles') + or as an interval (e.g., INTERVAL '-08:00'). + In the text case, a time zone name can be specified in any of the ways + described in . + The interval case is only useful for zones that have fixed offsets from + UTC, so it is not very common in practice. + + + + The syntax AT LOCAL may be used as shorthand for + AT TIME ZONE local, where + local is the session's + TimeZone value. + + + + Examples (assuming the current setting + is America/Los_Angeles): + +SELECT TIMESTAMP '2001-02-16 20:38:40' AT TIME ZONE 'America/Denver'; +Result: 2001-02-16 19:38:40-08 +SELECT TIMESTAMP WITH TIME ZONE '2001-02-16 20:38:40-05' AT TIME ZONE 'America/Denver'; +Result: 2001-02-16 18:38:40 +SELECT TIMESTAMP '2001-02-16 20:38:40' AT TIME ZONE 'Asia/Tokyo' AT TIME ZONE 'America/Chicago'; +Result: 2001-02-16 05:38:40 +SELECT TIMESTAMP WITH TIME ZONE '2001-02-16 20:38:40-05' AT LOCAL; +Result: 2001-02-16 17:38:40 +SELECT TIMESTAMP WITH TIME ZONE '2001-02-16 20:38:40-05' AT TIME ZONE '+05'; +Result: 2001-02-16 20:38:40 +SELECT TIME WITH TIME ZONE '20:38:40-05' AT LOCAL; +Result: 17:38:40 + + The first example adds a time zone to a value that lacks it, and + displays the value using the current TimeZone + setting. The second example shifts the time stamp with time zone value + to the specified time zone, and returns the value without a time zone. + This allows storage and display of values different from the current + TimeZone setting. The third example converts + Tokyo time to Chicago time. The fourth example shifts the time stamp + with time zone value to the time zone currently specified by the + TimeZone setting and returns the value without a + time zone. The fifth example demonstrates that the sign in a POSIX-style + time zone specification has the opposite meaning of the sign in an + ISO-8601 datetime literal, as described in + and . + + + + The sixth example is a cautionary tale. Due to the fact that there is no + date associated with the input value, the conversion is made using the + current date of the session. Therefore, this static example may show a wrong + result depending on the time of the year it is viewed because + 'America/Los_Angeles' observes Daylight Savings Time. + + + + The function timezone(zone, + timestamp) is equivalent to the SQL-conforming construct + timestamp AT TIME ZONE + zone. + + + + The function timezone(zone, + time) is equivalent to the SQL-conforming construct + time AT TIME ZONE + zone. + + + + The function timezone(timestamp) + is equivalent to the SQL-conforming construct timestamp + AT LOCAL. + + + + The function timezone(time) + is equivalent to the SQL-conforming construct time + AT LOCAL. + +
+ + + Current Date/Time + + + date + current + + + + time + current + + + + PostgreSQL provides a number of functions + that return values related to the current date and time. These + SQL-standard functions all return values based on the start time of + the current transaction: + +CURRENT_DATE +CURRENT_TIME +CURRENT_TIMESTAMP +CURRENT_TIME(precision) +CURRENT_TIMESTAMP(precision) +LOCALTIME +LOCALTIMESTAMP +LOCALTIME(precision) +LOCALTIMESTAMP(precision) + + + + + CURRENT_TIME and + CURRENT_TIMESTAMP deliver values with time zone; + LOCALTIME and + LOCALTIMESTAMP deliver values without time zone. + + + + CURRENT_TIME, + CURRENT_TIMESTAMP, + LOCALTIME, and + LOCALTIMESTAMP + can optionally take + a precision parameter, which causes the result to be rounded + to that many fractional digits in the seconds field. Without a precision parameter, + the result is given to the full available precision. + + + + Some examples: + +SELECT CURRENT_TIME; +Result: 14:39:53.662522-05 +SELECT CURRENT_DATE; +Result: 2019-12-23 +SELECT CURRENT_TIMESTAMP; +Result: 2019-12-23 14:39:53.662522-05 +SELECT CURRENT_TIMESTAMP(2); +Result: 2019-12-23 14:39:53.66-05 +SELECT LOCALTIMESTAMP; +Result: 2019-12-23 14:39:53.662522 + + + + + Since these functions return + the start time of the current transaction, their values do not + change during the transaction. This is considered a feature: + the intent is to allow a single transaction to have a consistent + notion of the current time, so that multiple + modifications within the same transaction bear the same + time stamp. + + + + + Other database systems might advance these values more + frequently. + + + + + PostgreSQL also provides functions that + return the start time of the current statement, as well as the actual + current time at the instant the function is called. The complete list + of non-SQL-standard time functions is: + +transaction_timestamp() +statement_timestamp() +clock_timestamp() +timeofday() +now() + + + + + transaction_timestamp() is equivalent to + CURRENT_TIMESTAMP, but is named to clearly reflect + what it returns. + statement_timestamp() returns the start time of the current + statement (more specifically, the time of receipt of the latest command + message from the client). + statement_timestamp() and transaction_timestamp() + return the same value during the first statement of a transaction, but might + differ during subsequent statements. + clock_timestamp() returns the actual current time, and + therefore its value changes even within a single SQL statement. + timeofday() is a historical + PostgreSQL function. Like + clock_timestamp(), it returns the actual current time, + but as a formatted text string rather than a timestamp + with time zone value. + now() is a traditional PostgreSQL + equivalent to transaction_timestamp(). + + + + All the date/time data types also accept the special literal value + now to specify the current date and time (again, + interpreted as the transaction start time). Thus, + the following three all return the same result: + +SELECT CURRENT_TIMESTAMP; +SELECT now(); +SELECT TIMESTAMP 'now'; -- but see tip below + + + + + + Do not use the third form when specifying a value to be evaluated later, + for example in a DEFAULT clause for a table column. + The system will convert now + to a timestamp as soon as the constant is parsed, so that when + the default value is needed, + the time of the table creation would be used! The first two + forms will not be evaluated until the default value is used, + because they are function calls. Thus they will give the desired + behavior of defaulting to the time of row insertion. + (See also .) + + + + + + Delaying Execution + + + pg_sleep + + + pg_sleep_for + + + pg_sleep_until + + + sleep + + + delay + + + + The following functions are available to delay execution of the server + process: + +pg_sleep ( double precision ) +pg_sleep_for ( interval ) +pg_sleep_until ( timestamp with time zone ) + + + pg_sleep makes the current session's process + sleep until the given number of seconds have + elapsed. Fractional-second delays can be specified. + pg_sleep_for is a convenience function to + allow the sleep time to be specified as an interval. + pg_sleep_until is a convenience function for when + a specific wake-up time is desired. + For example: + + +SELECT pg_sleep(1.5); +SELECT pg_sleep_for('5 minutes'); +SELECT pg_sleep_until('tomorrow 03:00'); + + + + + + The effective resolution of the sleep interval is platform-specific; + 0.01 seconds is a common value. The sleep delay will be at least as long + as specified. It might be longer depending on factors such as server load. + In particular, pg_sleep_until is not guaranteed to + wake up exactly at the specified time, but it will not wake up any earlier. + + + + + + Make sure that your session does not hold more locks than necessary + when calling pg_sleep or its variants. Otherwise + other sessions might have to wait for your sleeping process, slowing down + the entire system. + + + + +
diff --git a/doc/src/sgml/func/func-enum.sgml b/doc/src/sgml/func/func-enum.sgml new file mode 100644 index 0000000000000..6227afe4057ba --- /dev/null +++ b/doc/src/sgml/func/func-enum.sgml @@ -0,0 +1,121 @@ + + Enum Support Functions + + + For enum types (described in ), + there are several functions that allow cleaner programming without + hard-coding particular values of an enum type. + These are listed in . The examples + assume an enum type created as: + + +CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple'); + + + + + + Enum Support Functions + + + + + Function + + + Description + + + Example(s) + + + + + + + + + enum_first + + enum_first ( anyenum ) + anyenum + + + Returns the first value of the input enum type. + + + enum_first(null::rainbow) + red + + + + + + enum_last + + enum_last ( anyenum ) + anyenum + + + Returns the last value of the input enum type. + + + enum_last(null::rainbow) + purple + + + + + + enum_range + + enum_range ( anyenum ) + anyarray + + + Returns all values of the input enum type in an ordered array. + + + enum_range(null::rainbow) + {red,orange,yellow,&zwsp;green,blue,purple} + + + + + enum_range ( anyenum, anyenum ) + anyarray + + + Returns the range between the two given enum values, as an ordered + array. The values must be from the same enum type. If the first + parameter is null, the result will start with the first value of + the enum type. + If the second parameter is null, the result will end with the last + value of the enum type. + + + enum_range('orange'::rainbow, 'green'::rainbow) + {orange,yellow,green} + + + enum_range(NULL, 'green'::rainbow) + {red,orange,&zwsp;yellow,green} + + + enum_range('orange'::rainbow, NULL) + {orange,yellow,green,&zwsp;blue,purple} + + + + +
+ + + Notice that except for the two-argument form of enum_range, + these functions disregard the specific value passed to them; they care + only about its declared data type. Either null or a specific value of + the type can be passed, with the same result. It is more common to + apply these functions to a table column or function argument than to + a hardwired type name as used in the examples. + +
diff --git a/doc/src/sgml/func/func-event-triggers.sgml b/doc/src/sgml/func/func-event-triggers.sgml new file mode 100644 index 0000000000000..9f3f51e9f5133 --- /dev/null +++ b/doc/src/sgml/func/func-event-triggers.sgml @@ -0,0 +1,332 @@ + + Event Trigger Functions + + + PostgreSQL provides these helper functions + to retrieve information from event triggers. + + + + For more information about event triggers, + see . + + + + Capturing Changes at Command End + + + pg_event_trigger_ddl_commands + + + +pg_event_trigger_ddl_commands () setof record + + + + pg_event_trigger_ddl_commands returns a list of + DDL commands executed by each user action, + when invoked in a function attached to a + ddl_command_end event trigger. If called in any other + context, an error is raised. + pg_event_trigger_ddl_commands returns one row for each + base command executed; some commands that are a single SQL sentence + may return more than one row. This function returns the following + columns: + + + + + + Name + Type + Description + + + + + + classid + oid + OID of catalog the object belongs in + + + objid + oid + OID of the object itself + + + objsubid + integer + Sub-object ID (e.g., attribute number for a column) + + + command_tag + text + Command tag + + + object_type + text + Type of the object + + + schema_name + text + + Name of the schema the object belongs in, if any; otherwise NULL. + No quoting is applied. + + + + object_identity + text + + Text rendering of the object identity, schema-qualified. Each + identifier included in the identity is quoted if necessary. + + + + in_extension + boolean + True if the command is part of an extension script + + + command + pg_ddl_command + + A complete representation of the command, in internal format. + This cannot be output directly, but it can be passed to other + functions to obtain different pieces of information about the + command. + + + + + + + + + + Processing Objects Dropped by a DDL Command + + + pg_event_trigger_dropped_objects + + + +pg_event_trigger_dropped_objects () setof record + + + + pg_event_trigger_dropped_objects returns a list of all objects + dropped by the command in whose sql_drop event it is called. + If called in any other context, an error is raised. + This function returns the following columns: + + + + + + Name + Type + Description + + + + + + classid + oid + OID of catalog the object belonged in + + + objid + oid + OID of the object itself + + + objsubid + integer + Sub-object ID (e.g., attribute number for a column) + + + original + boolean + True if this was one of the root object(s) of the deletion + + + normal + boolean + + True if there was a normal dependency relationship + in the dependency graph leading to this object + + + + is_temporary + boolean + + True if this was a temporary object + + + + object_type + text + Type of the object + + + schema_name + text + + Name of the schema the object belonged in, if any; otherwise NULL. + No quoting is applied. + + + + object_name + text + + Name of the object, if the combination of schema and name can be + used as a unique identifier for the object; otherwise NULL. + No quoting is applied, and name is never schema-qualified. + + + + object_identity + text + + Text rendering of the object identity, schema-qualified. Each + identifier included in the identity is quoted if necessary. + + + + address_names + text[] + + An array that, together with object_type and + address_args, can be used by + the pg_get_object_address function to + recreate the object address in a remote server containing an + identically named object of the same kind. + + + + address_args + text[] + + Complement for address_names + + + + + + + + + The pg_event_trigger_dropped_objects function can be used + in an event trigger like this: + +CREATE FUNCTION test_event_trigger_for_drops() + RETURNS event_trigger LANGUAGE plpgsql AS $$ +DECLARE + obj record; +BEGIN + FOR obj IN SELECT * FROM pg_event_trigger_dropped_objects() + LOOP + RAISE NOTICE '% dropped object: % %.% %', + tg_tag, + obj.object_type, + obj.schema_name, + obj.object_name, + obj.object_identity; + END LOOP; +END; +$$; +CREATE EVENT TRIGGER test_event_trigger_for_drops + ON sql_drop + EXECUTE FUNCTION test_event_trigger_for_drops(); + + + + + + Handling a Table Rewrite Event + + + The functions shown in + + provide information about a table for which a + table_rewrite event has just been called. + If called in any other context, an error is raised. + + + + Table Rewrite Information Functions + + + + + Function + + + Description + + + + + + + + + pg_event_trigger_table_rewrite_oid + + pg_event_trigger_table_rewrite_oid () + oid + + + Returns the OID of the table about to be rewritten. + + + + + + + pg_event_trigger_table_rewrite_reason + + pg_event_trigger_table_rewrite_reason () + integer + + + Returns a code explaining the reason(s) for rewriting. The value is + a bitmap built from the following values: 1 + (the table has changed its persistence), 2 + (default value of a column has changed), 4 + (a column has a new data type) and 8 + (the table access method has changed). + + + + +
+ + + These functions can be used in an event trigger like this: + +CREATE FUNCTION test_event_trigger_table_rewrite_oid() + RETURNS event_trigger + LANGUAGE plpgsql AS +$$ +BEGIN + RAISE NOTICE 'rewriting table % for reason %', + pg_event_trigger_table_rewrite_oid()::regclass, + pg_event_trigger_table_rewrite_reason(); +END; +$$; + +CREATE EVENT TRIGGER test_table_rewrite_oid + ON table_rewrite + EXECUTE FUNCTION test_event_trigger_table_rewrite_oid(); + + +
+
diff --git a/doc/src/sgml/func/func-formatting.sgml b/doc/src/sgml/func/func-formatting.sgml new file mode 100644 index 0000000000000..806302b2f7b34 --- /dev/null +++ b/doc/src/sgml/func/func-formatting.sgml @@ -0,0 +1,1193 @@ + + Data Type Formatting Functions + + + formatting + + + + The PostgreSQL formatting functions + provide a powerful set of tools for converting various data types + (date/time, integer, floating point, numeric) to formatted strings + and for converting from formatted strings to specific data types. + lists them. + These functions all follow a common calling convention: the first + argument is the value to be formatted and the second argument is a + template that defines the output or input format. + + + + Formatting Functions + + + + + Function + + + Description + + + Example(s) + + + + + + + + + to_char + + to_char ( timestamp, text ) + text + + + to_char ( timestamp with time zone, text ) + text + + + Converts time stamp to string according to the given format. + + + to_char(timestamp '2002-04-20 17:31:12.66', 'HH12:MI:SS') + 05:31:12 + + + + + + to_char ( interval, text ) + text + + + Converts interval to string according to the given format. + + + to_char(interval '15h 2m 12s', 'HH24:MI:SS') + 15:02:12 + + + + + + to_char ( numeric_type, text ) + text + + + Converts number to string according to the given format; available + for integer, bigint, numeric, + real, double precision. + + + to_char(125, '999') + 125 + + + to_char(125.8::real, '999D9') + 125.8 + + + to_char(-125.8, '999D99S') + 125.80- + + + + + + + to_date + + to_date ( text, text ) + date + + + Converts string to date according to the given format. + + + to_date('05 Dec 2000', 'DD Mon YYYY') + 2000-12-05 + + + + + + + to_number + + to_number ( text, text ) + numeric + + + Converts string to numeric according to the given format. + + + to_number('12,454.8-', '99G999D9S') + -12454.8 + + + + + + + to_timestamp + + to_timestamp ( text, text ) + timestamp with time zone + + + Converts string to time stamp according to the given format. + (See also to_timestamp(double precision) in + .) + + + to_timestamp('05 Dec 2000', 'DD Mon YYYY') + 2000-12-05 00:00:00-05 + + + + +
+ + + + to_timestamp and to_date + exist to handle input formats that cannot be converted by + simple casting. For most standard date/time formats, simply casting the + source string to the required data type works, and is much easier. + Similarly, to_number is unnecessary for standard numeric + representations. + + + + + In a to_char output template string, there are certain + patterns that are recognized and replaced with appropriately-formatted + data based on the given value. Any text that is not a template pattern is + simply copied verbatim. Similarly, in an input template string (for the + other functions), template patterns identify the values to be supplied by + the input data string. If there are characters in the template string + that are not template patterns, the corresponding characters in the input + data string are simply skipped over (whether or not they are equal to the + template string characters). + + + + shows the + template patterns available for formatting date and time values. + + + + Template Patterns for Date/Time Formatting + + + + Pattern + Description + + + + + HH + hour of day (01–12) + + + HH12 + hour of day (01–12) + + + HH24 + hour of day (00–23) + + + MI + minute (00–59) + + + SS + second (00–59) + + + MS + millisecond (000–999) + + + US + microsecond (000000–999999) + + + FF1 + tenth of second (0–9) + + + FF2 + hundredth of second (00–99) + + + FF3 + millisecond (000–999) + + + FF4 + tenth of a millisecond (0000–9999) + + + FF5 + hundredth of a millisecond (00000–99999) + + + FF6 + microsecond (000000–999999) + + + SSSS, SSSSS + seconds past midnight (0–86399) + + + AM, am, + PM or pm + meridiem indicator (without periods) + + + A.M., a.m., + P.M. or p.m. + meridiem indicator (with periods) + + + Y,YYY + year (4 or more digits) with comma + + + YYYY + year (4 or more digits) + + + YYY + last 3 digits of year + + + YY + last 2 digits of year + + + Y + last digit of year + + + IYYY + ISO 8601 week-numbering year (4 or more digits) + + + IYY + last 3 digits of ISO 8601 week-numbering year + + + IY + last 2 digits of ISO 8601 week-numbering year + + + I + last digit of ISO 8601 week-numbering year + + + BC, bc, + AD or ad + era indicator (without periods) + + + B.C., b.c., + A.D. or a.d. + era indicator (with periods) + + + MONTH + full upper case month name (blank-padded to 9 chars) + + + Month + full capitalized month name (blank-padded to 9 chars) + + + month + full lower case month name (blank-padded to 9 chars) + + + MON + abbreviated upper case month name (3 chars in English, localized lengths vary) + + + Mon + abbreviated capitalized month name (3 chars in English, localized lengths vary) + + + mon + abbreviated lower case month name (3 chars in English, localized lengths vary) + + + MM + month number (01–12) + + + DAY + full upper case day name (blank-padded to 9 chars) + + + Day + full capitalized day name (blank-padded to 9 chars) + + + day + full lower case day name (blank-padded to 9 chars) + + + DY + abbreviated upper case day name (3 chars in English, localized lengths vary) + + + Dy + abbreviated capitalized day name (3 chars in English, localized lengths vary) + + + dy + abbreviated lower case day name (3 chars in English, localized lengths vary) + + + DDD + day of year (001–366) + + + IDDD + day of ISO 8601 week-numbering year (001–371; day 1 of the year is Monday of the first ISO week) + + + DD + day of month (01–31) + + + D + day of the week, Sunday (1) to Saturday (7) + + + ID + ISO 8601 day of the week, Monday (1) to Sunday (7) + + + W + week of month (1–5) (the first week starts on the first day of the month) + + + WW + week number of year (1–53) (the first week starts on the first day of the year) + + + IW + week number of ISO 8601 week-numbering year (01–53; the first Thursday of the year is in week 1) + + + CC + century (2 digits) (the twenty-first century starts on 2001-01-01) + + + J + Julian Date (integer days since November 24, 4714 BC at local + midnight; see ) + + + Q + quarter + + + RM + month in upper case Roman numerals (I–XII; I=January) + + + rm + month in lower case Roman numerals (i–xii; i=January) + + + TZ + upper case time-zone abbreviation + + + tz + lower case time-zone abbreviation + + + TZH + time-zone hours + + + TZM + time-zone minutes + + + OF + time-zone offset from UTC (HH + or HH:MM) + + + +
+ + + Modifiers can be applied to any template pattern to alter its + behavior. For example, FMMonth + is the Month pattern with the + FM modifier. + shows the + modifier patterns for date/time formatting. + + + + Template Pattern Modifiers for Date/Time Formatting + + + + Modifier + Description + Example + + + + + FM prefix + fill mode (suppress leading zeroes and padding blanks) + FMMonth + + + TH suffix + upper case ordinal number suffix + DDTH, e.g., 12TH + + + th suffix + lower case ordinal number suffix + DDth, e.g., 12th + + + FX prefix + fixed format global option (see usage notes) + FX Month DD Day + + + TM prefix + translation mode (use localized day and month names based on + ) + TMMonth + + + SP suffix + spell mode (not implemented) + DDSP + + + +
+ + + Usage notes for date/time formatting: + + + + + FM suppresses leading zeroes and trailing blanks + that would otherwise be added to make the output of a pattern be + fixed-width. In PostgreSQL, + FM modifies only the next specification, while in + Oracle FM affects all subsequent + specifications, and repeated FM modifiers + toggle fill mode on and off. + + + + + + TM suppresses trailing blanks whether or + not FM is specified. + + + + + + to_timestamp and to_date + ignore letter case in the input; so for + example MON, Mon, + and mon all accept the same strings. When using + the TM modifier, case-folding is done according to + the rules of the function's input collation (see + ). + + + + + + to_timestamp and to_date + skip multiple blank spaces at the beginning of the input string and + around date and time values unless the FX option is used. For example, + to_timestamp(' 2000    JUN', 'YYYY MON') and + to_timestamp('2000 - JUN', 'YYYY-MON') work, but + to_timestamp('2000    JUN', 'FXYYYY MON') returns an error + because to_timestamp expects only a single space. + FX must be specified as the first item in + the template. + + + + + + A separator (a space or non-letter/non-digit character) in the template string of + to_timestamp and to_date + matches any single separator in the input string or is skipped, + unless the FX option is used. + For example, to_timestamp('2000JUN', 'YYYY///MON') and + to_timestamp('2000/JUN', 'YYYY MON') work, but + to_timestamp('2000//JUN', 'YYYY/MON') + returns an error because the number of separators in the input string + exceeds the number of separators in the template. + + + If FX is specified, a separator in the template string + matches exactly one character in the input string. But note that the + input string character is not required to be the same as the separator from the template string. + For example, to_timestamp('2000/JUN', 'FXYYYY MON') + works, but to_timestamp('2000/JUN', 'FXYYYY  MON') + returns an error because the second space in the template string consumes + the letter J from the input string. + + + + + + A TZH template pattern can match a signed number. + Without the FX option, minus signs may be ambiguous, + and could be interpreted as a separator. + This ambiguity is resolved as follows: If the number of separators before + TZH in the template string is less than the number of + separators before the minus sign in the input string, the minus sign + is interpreted as part of TZH. + Otherwise, the minus sign is considered to be a separator between values. + For example, to_timestamp('2000 -10', 'YYYY TZH') matches + -10 to TZH, but + to_timestamp('2000 -10', 'YYYY  TZH') + matches 10 to TZH. + + + + + + Ordinary text is allowed in to_char + templates and will be output literally. You can put a substring + in double quotes to force it to be interpreted as literal text + even if it contains template patterns. For example, in + '"Hello Year "YYYY', the YYYY + will be replaced by the year data, but the single Y in Year + will not be. + In to_date, to_number, + and to_timestamp, literal text and double-quoted + strings result in skipping the number of characters contained in the + string; for example "XX" skips two input characters + (whether or not they are XX). + + + + Prior to PostgreSQL 12, it was possible to + skip arbitrary text in the input string using non-letter or non-digit + characters. For example, + to_timestamp('2000y6m1d', 'yyyy-MM-DD') used to + work. Now you can only use letter characters for this purpose. For example, + to_timestamp('2000y6m1d', 'yyyytMMtDDt') and + to_timestamp('2000y6m1d', 'yyyy"y"MM"m"DD"d"') + skip y, m, and + d. + + + + + + + If you want to have a double quote in the output you must + precede it with a backslash, for example '\"YYYY + Month\"'. + Backslashes are not otherwise special outside of double-quoted + strings. Within a double-quoted string, a backslash causes the + next character to be taken literally, whatever it is (but this + has no special effect unless the next character is a double quote + or another backslash). + + + + + + In to_timestamp and to_date, + if the year format specification is less than four digits, e.g., + YYY, and the supplied year is less than four digits, + the year will be adjusted to be nearest to the year 2020, e.g., + 95 becomes 1995. + + + + + + In to_timestamp and to_date, + negative years are treated as signifying BC. If you write both a + negative year and an explicit BC field, you get AD + again. An input of year zero is treated as 1 BC. + + + + + + In to_timestamp and to_date, + the YYYY conversion has a restriction when + processing years with more than 4 digits. You must + use some non-digit character or template after YYYY, + otherwise the year is always interpreted as 4 digits. For example + (with the year 20000): + to_date('200001130', 'YYYYMMDD') will be + interpreted as a 4-digit year; instead use a non-digit + separator after the year, like + to_date('20000-1130', 'YYYY-MMDD') or + to_date('20000Nov30', 'YYYYMonDD'). + + + + + + In to_timestamp and to_date, + the CC (century) field is accepted but ignored + if there is a YYY, YYYY or + Y,YYY field. If CC is used with + YY or Y then the result is + computed as that year in the specified century. If the century is + specified but the year is not, the first year of the century + is assumed. + + + + + + In to_timestamp and to_date, + weekday names or numbers (DAY, D, + and related field types) are accepted but are ignored for purposes of + computing the result. The same is true for quarter + (Q) fields. + + + + + + In to_timestamp and to_date, + an ISO 8601 week-numbering date (as distinct from a Gregorian date) + can be specified in one of two ways: + + + + Year, week number, and weekday: for + example to_date('2006-42-4', 'IYYY-IW-ID') + returns the date 2006-10-19. + If you omit the weekday it is assumed to be 1 (Monday). + + + + + Year and day of year: for example to_date('2006-291', + 'IYYY-IDDD') also returns 2006-10-19. + + + + + + Attempting to enter a date using a mixture of ISO 8601 week-numbering + fields and Gregorian date fields is nonsensical, and will cause an + error. In the context of an ISO 8601 week-numbering year, the + concept of a month or day of month has no + meaning. In the context of a Gregorian year, the ISO week has no + meaning. + + + + While to_date will reject a mixture of + Gregorian and ISO week-numbering date + fields, to_char will not, since output format + specifications like YYYY-MM-DD (IYYY-IDDD) can be + useful. But avoid writing something like IYYY-MM-DD; + that would yield surprising results near the start of the year. + (See for more + information.) + + + + + + + In to_timestamp, millisecond + (MS) or microsecond (US) + fields are used as the + seconds digits after the decimal point. For example + to_timestamp('12.3', 'SS.MS') is not 3 milliseconds, + but 300, because the conversion treats it as 12 + 0.3 seconds. + So, for the format SS.MS, the input values + 12.3, 12.30, + and 12.300 specify the + same number of milliseconds. To get three milliseconds, one must write + 12.003, which the conversion treats as + 12 + 0.003 = 12.003 seconds. + + + + Here is a more + complex example: + to_timestamp('15:12:02.020.001230', 'HH24:MI:SS.MS.US') + is 15 hours, 12 minutes, and 2 seconds + 20 milliseconds + + 1230 microseconds = 2.021230 seconds. + + + + + + to_char(..., 'ID')'s day of the week numbering + matches the extract(isodow from ...) function, but + to_char(..., 'D')'s does not match + extract(dow from ...)'s day numbering. + + + + + + to_char(interval) formats HH and + HH12 as shown on a 12-hour clock, for example zero hours + and 36 hours both output as 12, while HH24 + outputs the full hour value, which can exceed 23 in + an interval value. + + + + + + + + shows the + template patterns available for formatting numeric values. + + + + Template Patterns for Numeric Formatting + + + + Pattern + Description + + + + + 9 + digit position (can be dropped if insignificant) + + + 0 + digit position (will not be dropped, even if insignificant) + + + . (period) + decimal point + + + , (comma) + group (thousands) separator + + + PR + negative value in angle brackets + + + S + sign anchored to number (uses locale) + + + L + currency symbol (uses locale) + + + D + decimal point (uses locale) + + + G + group separator (uses locale) + + + MI + minus sign in specified position (if number < 0) + + + PL + plus sign in specified position (if number > 0) + + + SG + plus/minus sign in specified position + + + RN or rn + Roman numeral (values between 1 and 3999) + + + TH or th + ordinal number suffix + + + V + shift specified number of digits (see notes) + + + EEEE + exponent for scientific notation + + + +
+ + + Usage notes for numeric formatting: + + + + + 0 specifies a digit position that will always be printed, + even if it contains a leading/trailing zero. 9 also + specifies a digit position, but if it is a leading zero then it will + be replaced by a space, while if it is a trailing zero and fill mode + is specified then it will be deleted. (For to_number(), + these two pattern characters are equivalent.) + + + + + + If the format provides fewer fractional digits than the number being + formatted, to_char() will round the number to + the specified number of fractional digits. + + + + + + The pattern characters S, L, D, + and G represent the sign, currency symbol, decimal point, + and thousands separator characters defined by the current locale + (see + and ). The pattern characters period + and comma represent those exact characters, with the meanings of + decimal point and thousands separator, regardless of locale. + + + + + + If no explicit provision is made for a sign + in to_char()'s pattern, one column will be reserved for + the sign, and it will be anchored to (appear just left of) the + number. If S appears just left of some 9's, + it will likewise be anchored to the number. + + + + + + A sign formatted using SG, PL, or + MI is not anchored to + the number; for example, + to_char(-12, 'MI9999') produces '-  12' + but to_char(-12, 'S9999') produces '  -12'. + (The Oracle implementation does not allow the use of + MI before 9, but rather + requires that 9 precede + MI.) + + + + + + TH does not convert values less than zero + and does not convert fractional numbers. + + + + + + PL, SG, and + TH are PostgreSQL + extensions. + + + + + + In to_number, if non-data template patterns such + as L or TH are used, the + corresponding number of input characters are skipped, whether or not + they match the template pattern, unless they are data characters + (that is, digits, sign, decimal point, or comma). For + example, TH would skip two non-data characters. + + + + + + V with to_char + multiplies the input values by + 10^n, where + n is the number of digits following + V. V with + to_number divides in a similar manner. + The V can be thought of as marking the position + of an implicit decimal point in the input or output string. + to_char and to_number + do not support the use of + V combined with a decimal point + (e.g., 99.9V99 is not allowed). + + + + + + EEEE (scientific notation) cannot be used in + combination with any of the other formatting patterns or + modifiers other than digit and decimal point patterns, and must be at the end of the format string + (e.g., 9.99EEEE is a valid pattern). + + + + + + In to_number(), the RN + pattern converts Roman numerals (in standard form) to numbers. + Input is case-insensitive, so RN + and rn are equivalent. RN + cannot be used in combination with any other formatting patterns or + modifiers except FM, which is applicable only + in to_char() and is ignored + in to_number(). + + + + + + + Certain modifiers can be applied to any template pattern to alter its + behavior. For example, FM99.99 + is the 99.99 pattern with the + FM modifier. + shows the + modifier patterns for numeric formatting. + + + + Template Pattern Modifiers for Numeric Formatting + + + + Modifier + Description + Example + + + + + FM prefix + fill mode (suppress trailing zeroes and padding blanks) + FM99.99 + + + TH suffix + upper case ordinal number suffix + 999TH + + + th suffix + lower case ordinal number suffix + 999th + + + +
+ + + shows some + examples of the use of the to_char function. + + + + <function>to_char</function> Examples + + + + Expression + Result + + + + + to_char(current_timestamp, 'Day, DD  HH12:MI:SS') + 'Tuesday  , 06  05:39:18' + + + to_char(current_timestamp, 'FMDay, FMDD  HH12:MI:SS') + 'Tuesday, 6  05:39:18' + + + to_char(current_timestamp AT TIME ZONE + 'UTC', 'YYYY-MM-DD"T"HH24:MI:SS"Z"') + '2022-12-06T05:39:18Z', + ISO 8601 extended format + + + to_char(-0.1, '99.99') + '  -.10' + + + to_char(-0.1, 'FM9.99') + '-.1' + + + to_char(-0.1, 'FM90.99') + '-0.1' + + + to_char(0.1, '0.9') + ' 0.1' + + + to_char(12, '9990999.9') + '    0012.0' + + + to_char(12, 'FM9990999.9') + '0012.' + + + to_char(485, '999') + ' 485' + + + to_char(-485, '999') + '-485' + + + to_char(485, '9 9 9') + ' 4 8 5' + + + to_char(1485, '9,999') + ' 1,485' + + + to_char(1485, '9G999') + ' 1 485' + + + to_char(148.5, '999.999') + ' 148.500' + + + to_char(148.5, 'FM999.999') + '148.5' + + + to_char(148.5, 'FM999.990') + '148.500' + + + to_char(148.5, '999D999') + ' 148,500' + + + to_char(3148.5, '9G999D999') + ' 3 148,500' + + + to_char(-485, '999S') + '485-' + + + to_char(-485, '999MI') + '485-' + + + to_char(485, '999MI') + '485 ' + + + to_char(485, 'FM999MI') + '485' + + + to_char(485, 'PL999') + '+485' + + + to_char(485, 'SG999') + '+485' + + + to_char(-485, 'SG999') + '-485' + + + to_char(-485, '9SG99') + '4-85' + + + to_char(-485, '999PR') + '<485>' + + + to_char(485, 'L999') + 'DM 485' + + + to_char(485, 'RN') + '        CDLXXXV' + + + to_char(485, 'FMRN') + 'CDLXXXV' + + + to_char(5.2, 'FMRN') + 'V' + + + to_char(482, '999th') + ' 482nd' + + + to_char(485, '"Good number:"999') + 'Good number: 485' + + + to_char(485.8, '"Pre:"999" Post:" .999') + 'Pre: 485 Post: .800' + + + to_char(12, '99V999') + ' 12000' + + + to_char(12.4, '99V999') + ' 12400' + + + to_char(12.45, '99V9') + ' 125' + + + to_char(0.0004859, '9.99EEEE') + ' 4.86e-04' + + + +
+ +
diff --git a/doc/src/sgml/func/func-geometry.sgml b/doc/src/sgml/func/func-geometry.sgml new file mode 100644 index 0000000000000..ba203af3bd289 --- /dev/null +++ b/doc/src/sgml/func/func-geometry.sgml @@ -0,0 +1,1261 @@ + + Geometric Functions and Operators + + + The geometric types point, box, + lseg, line, path, + polygon, and circle have a large set of + native support functions and operators, shown in , , and . + + + + Geometric Operators + + + + + Operator + + + Description + + + Example(s) + + + + + + + + geometric_type + point + geometric_type + + + Adds the coordinates of the second point to those of each + point of the first argument, thus performing translation. + Available for point, box, path, + circle. + + + box '(1,1),(0,0)' + point '(2,0)' + (3,1),(2,0) + + + + + + path + path + path + + + Concatenates two open paths (returns NULL if either path is closed). + + + path '[(0,0),(1,1)]' + path '[(2,2),(3,3),(4,4)]' + [(0,0),(1,1),(2,2),(3,3),(4,4)] + + + + + + geometric_type - point + geometric_type + + + Subtracts the coordinates of the second point from those + of each point of the first argument, thus performing translation. + Available for point, box, path, + circle. + + + box '(1,1),(0,0)' - point '(2,0)' + (-1,1),(-2,0) + + + + + + geometric_type * point + geometric_type + + + Multiplies each point of the first argument by the second + point (treating a point as being a complex number + represented by real and imaginary parts, and performing standard + complex multiplication). If one interprets + the second point as a vector, this is equivalent to + scaling the object's size and distance from the origin by the length + of the vector, and rotating it counterclockwise around the origin by + the vector's angle from the x axis. + Available for point, box,Rotating a + box with these operators only moves its corner points: the box is + still considered to have sides parallel to the axes. Hence the box's + size is not preserved, as a true rotation would do. + path, circle. + + + path '((0,0),(1,0),(1,1))' * point '(3.0,0)' + ((0,0),(3,0),(3,3)) + + + path '((0,0),(1,0),(1,1))' * point(cosd(45), sind(45)) + ((0,0),&zwsp;(0.7071067811865475,0.7071067811865475),&zwsp;(0,1.414213562373095)) + + + + + + geometric_type / point + geometric_type + + + Divides each point of the first argument by the second + point (treating a point as being a complex number + represented by real and imaginary parts, and performing standard + complex division). If one interprets + the second point as a vector, this is equivalent to + scaling the object's size and distance from the origin down by the + length of the vector, and rotating it clockwise around the origin by + the vector's angle from the x axis. + Available for point, box, path, + circle. + + + path '((0,0),(1,0),(1,1))' / point '(2.0,0)' + ((0,0),(0.5,0),(0.5,0.5)) + + + path '((0,0),(1,0),(1,1))' / point(cosd(45), sind(45)) + ((0,0),&zwsp;(0.7071067811865476,-0.7071067811865476),&zwsp;(1.4142135623730951,0)) + + + + + + @-@ geometric_type + double precision + + + Computes the total length. + Available for lseg, path. + + + @-@ path '[(0,0),(1,0),(1,1)]' + 2 + + + + + + @@ geometric_type + point + + + Computes the center point. + Available for box, lseg, + polygon, circle. + + + @@ box '(2,2),(0,0)' + (1,1) + + + + + + # geometric_type + integer + + + Returns the number of points. + Available for path, polygon. + + + # path '((1,0),(0,1),(-1,0))' + 3 + + + + + + geometric_type # geometric_type + point + + + Computes the point of intersection, or NULL if there is none. + Available for lseg, line. + + + lseg '[(0,0),(1,1)]' # lseg '[(1,0),(0,1)]' + (0.5,0.5) + + + + + + box # box + box + + + Computes the intersection of two boxes, or NULL if there is none. + + + box '(2,2),(-1,-1)' # box '(1,1),(-2,-2)' + (1,1),(-1,-1) + + + + + + geometric_type ## geometric_type + point + + + Computes the closest point to the first object on the second object. + Available for these pairs of types: + (point, box), + (point, lseg), + (point, line), + (lseg, box), + (lseg, lseg), + (line, lseg). + + + point '(0,0)' ## lseg '[(2,0),(0,2)]' + (1,1) + + + + + + geometric_type <-> geometric_type + double precision + + + Computes the distance between the objects. + Available for all seven geometric types, for all combinations + of point with another geometric type, and for + these additional pairs of types: + (box, lseg), + (lseg, line), + (polygon, circle) + (and the commutator cases). + + + circle '<(0,0),1>' <-> circle '<(5,0),1>' + 3 + + + + + + geometric_type @> geometric_type + boolean + + + Does first object contain second? + Available for these pairs of types: + (box, point), + (box, box), + (path, point), + (polygon, point), + (polygon, polygon), + (circle, point), + (circle, circle). + + + circle '<(0,0),2>' @> point '(1,1)' + t + + + + + + geometric_type <@ geometric_type + boolean + + + Is first object contained in or on second? + Available for these pairs of types: + (point, box), + (point, lseg), + (point, line), + (point, path), + (point, polygon), + (point, circle), + (box, box), + (lseg, box), + (lseg, line), + (polygon, polygon), + (circle, circle). + + + point '(1,1)' <@ circle '<(0,0),2>' + t + + + + + + geometric_type && geometric_type + boolean + + + Do these objects overlap? (One point in common makes this true.) + Available for box, polygon, + circle. + + + box '(1,1),(0,0)' && box '(2,2),(0,0)' + t + + + + + + geometric_type << geometric_type + boolean + + + Is first object strictly left of second? + Available for point, box, + polygon, circle. + + + circle '<(0,0),1>' << circle '<(5,0),1>' + t + + + + + + geometric_type >> geometric_type + boolean + + + Is first object strictly right of second? + Available for point, box, + polygon, circle. + + + circle '<(5,0),1>' >> circle '<(0,0),1>' + t + + + + + + geometric_type &< geometric_type + boolean + + + Does first object not extend to the right of second? + Available for box, polygon, + circle. + + + box '(1,1),(0,0)' &< box '(2,2),(0,0)' + t + + + + + + geometric_type &> geometric_type + boolean + + + Does first object not extend to the left of second? + Available for box, polygon, + circle. + + + box '(3,3),(0,0)' &> box '(2,2),(0,0)' + t + + + + + + geometric_type <<| geometric_type + boolean + + + Is first object strictly below second? + Available for point, box, polygon, + circle. + + + box '(3,3),(0,0)' <<| box '(5,5),(3,4)' + t + + + + + + geometric_type |>> geometric_type + boolean + + + Is first object strictly above second? + Available for point, box, polygon, + circle. + + + box '(5,5),(3,4)' |>> box '(3,3),(0,0)' + t + + + + + + geometric_type &<| geometric_type + boolean + + + Does first object not extend above second? + Available for box, polygon, + circle. + + + box '(1,1),(0,0)' &<| box '(2,2),(0,0)' + t + + + + + + geometric_type |&> geometric_type + boolean + + + Does first object not extend below second? + Available for box, polygon, + circle. + + + box '(3,3),(0,0)' |&> box '(2,2),(0,0)' + t + + + + + + box <^ box + boolean + + + Is first object below second (allows edges to touch)? + + + box '((1,1),(0,0))' <^ box '((2,2),(1,1))' + t + + + + + + box >^ box + boolean + + + Is first object above second (allows edges to touch)? + + + box '((2,2),(1,1))' >^ box '((1,1),(0,0))' + t + + + + + + geometric_type ?# geometric_type + boolean + + + Do these objects intersect? + Available for these pairs of types: + (box, box), + (lseg, box), + (lseg, lseg), + (lseg, line), + (line, box), + (line, line), + (path, path). + + + lseg '[(-1,0),(1,0)]' ?# box '(2,2),(-2,-2)' + t + + + + + + ?- line + boolean + + + ?- lseg + boolean + + + Is line horizontal? + + + ?- lseg '[(-1,0),(1,0)]' + t + + + + + + point ?- point + boolean + + + Are points horizontally aligned (that is, have same y coordinate)? + + + point '(1,0)' ?- point '(0,0)' + t + + + + + + ?| line + boolean + + + ?| lseg + boolean + + + Is line vertical? + + + ?| lseg '[(-1,0),(1,0)]' + f + + + + + + point ?| point + boolean + + + Are points vertically aligned (that is, have same x coordinate)? + + + point '(0,1)' ?| point '(0,0)' + t + + + + + + line ?-| line + boolean + + + lseg ?-| lseg + boolean + + + Are lines perpendicular? + + + lseg '[(0,0),(0,1)]' ?-| lseg '[(0,0),(1,0)]' + t + + + + + + line ?|| line + boolean + + + lseg ?|| lseg + boolean + + + Are lines parallel? + + + lseg '[(-1,0),(1,0)]' ?|| lseg '[(-1,2),(1,2)]' + t + + + + + + geometric_type ~= geometric_type + boolean + + + Are these objects the same? + Available for point, box, + polygon, circle. + + + polygon '((0,0),(1,1))' ~= polygon '((1,1),(0,0))' + t + + + + +
+ + + + Note that the same as operator, ~=, + represents the usual notion of equality for the point, + box, polygon, and circle types. + Some of the geometric types also have an = operator, but + = compares for equal areas only. + The other scalar comparison operators (<= and so + on), where available for these types, likewise compare areas. + + + + + + Before PostgreSQL 14, the point + is strictly below/above comparison operators point + <<| point and point + |>> point were respectively + called <^ and >^. These + names are still available, but are deprecated and will eventually be + removed. + + + + + Geometric Functions + + + + + Function + + + Description + + + Example(s) + + + + + + + + + area + + area ( geometric_type ) + double precision + + + Computes area. + Available for box, path, circle. + A path input must be closed, else NULL is returned. + Also, if the path is self-intersecting, the result may be + meaningless. + + + area(box '(2,2),(0,0)') + 4 + + + + + + + center + + center ( geometric_type ) + point + + + Computes center point. + Available for box, circle. + + + center(box '(1,2),(0,0)') + (0.5,1) + + + + + + + diagonal + + diagonal ( box ) + lseg + + + Extracts box's diagonal as a line segment + (same as lseg(box)). + + + diagonal(box '(1,2),(0,0)') + [(1,2),(0,0)] + + + + + + + diameter + + diameter ( circle ) + double precision + + + Computes diameter of circle. + + + diameter(circle '<(0,0),2>') + 4 + + + + + + + height + + height ( box ) + double precision + + + Computes vertical size of box. + + + height(box '(1,2),(0,0)') + 2 + + + + + + + isclosed + + isclosed ( path ) + boolean + + + Is path closed? + + + isclosed(path '((0,0),(1,1),(2,0))') + t + + + + + + + isopen + + isopen ( path ) + boolean + + + Is path open? + + + isopen(path '[(0,0),(1,1),(2,0)]') + t + + + + + + + length + + length ( geometric_type ) + double precision + + + Computes the total length. + Available for lseg, path. + + + length(path '((-1,0),(1,0))') + 4 + + + + + + + npoints + + npoints ( geometric_type ) + integer + + + Returns the number of points. + Available for path, polygon. + + + npoints(path '[(0,0),(1,1),(2,0)]') + 3 + + + + + + + pclose + + pclose ( path ) + path + + + Converts path to closed form. + + + pclose(path '[(0,0),(1,1),(2,0)]') + ((0,0),(1,1),(2,0)) + + + + + + + popen + + popen ( path ) + path + + + Converts path to open form. + + + popen(path '((0,0),(1,1),(2,0))') + [(0,0),(1,1),(2,0)] + + + + + + + radius + + radius ( circle ) + double precision + + + Computes radius of circle. + + + radius(circle '<(0,0),2>') + 2 + + + + + + + slope + + slope ( point, point ) + double precision + + + Computes slope of a line drawn through the two points. + + + slope(point '(0,0)', point '(2,1)') + 0.5 + + + + + + + width + + width ( box ) + double precision + + + Computes horizontal size of box. + + + width(box '(1,2),(0,0)') + 1 + + + + +
+ + + Geometric Type Conversion Functions + + + + + Function + + + Description + + + Example(s) + + + + + + + + + box + + box ( circle ) + box + + + Computes box inscribed within the circle. + + + box(circle '<(0,0),2>') + (1.414213562373095,1.414213562373095),&zwsp;(-1.414213562373095,-1.414213562373095) + + + + + + box ( point ) + box + + + Converts point to empty box. + + + box(point '(1,0)') + (1,0),(1,0) + + + + + + box ( point, point ) + box + + + Converts any two corner points to box. + + + box(point '(0,1)', point '(1,0)') + (1,1),(0,0) + + + + + + box ( polygon ) + box + + + Computes bounding box of polygon. + + + box(polygon '((0,0),(1,1),(2,0))') + (2,1),(0,0) + + + + + + + bound_box + + bound_box ( box, box ) + box + + + Computes bounding box of two boxes. + + + bound_box(box '(1,1),(0,0)', box '(4,4),(3,3)') + (4,4),(0,0) + + + + + + + circle + + circle ( box ) + circle + + + Computes smallest circle enclosing box. + + + circle(box '(1,1),(0,0)') + <(0.5,0.5),0.7071067811865476> + + + + + + circle ( point, double precision ) + circle + + + Constructs circle from center and radius. + + + circle(point '(0,0)', 2.0) + <(0,0),2> + + + + + + circle ( polygon ) + circle + + + Converts polygon to circle. The circle's center is the mean of the + positions of the polygon's points, and the radius is the average + distance of the polygon's points from that center. + + + circle(polygon '((0,0),(1,3),(2,0))') + <(1,1),1.6094757082487299> + + + + + + + line + + line ( point, point ) + line + + + Converts two points to the line through them. + + + line(point '(-1,0)', point '(1,0)') + {0,-1,0} + + + + + + + lseg + + lseg ( box ) + lseg + + + Extracts box's diagonal as a line segment. + + + lseg(box '(1,0),(-1,0)') + [(1,0),(-1,0)] + + + + + + lseg ( point, point ) + lseg + + + Constructs line segment from two endpoints. + + + lseg(point '(-1,0)', point '(1,0)') + [(-1,0),(1,0)] + + + + + + + path + + path ( polygon ) + path + + + Converts polygon to a closed path with the same list of points. + + + path(polygon '((0,0),(1,1),(2,0))') + ((0,0),(1,1),(2,0)) + + + + + + + point + + point ( double precision, double precision ) + point + + + Constructs point from its coordinates. + + + point(23.4, -44.5) + (23.4,-44.5) + + + + + + point ( box ) + point + + + Computes center of box. + + + point(box '(1,0),(-1,0)') + (0,0) + + + + + + point ( circle ) + point + + + Computes center of circle. + + + point(circle '<(0,0),2>') + (0,0) + + + + + + point ( lseg ) + point + + + Computes center of line segment. + + + point(lseg '[(-1,0),(1,0)]') + (0,0) + + + + + + point ( polygon ) + point + + + Computes center of polygon (the mean of the + positions of the polygon's points). + + + point(polygon '((0,0),(1,1),(2,0))') + (1,0.3333333333333333) + + + + + + + polygon + + polygon ( box ) + polygon + + + Converts box to a 4-point polygon. + + + polygon(box '(1,1),(0,0)') + ((0,0),(0,1),(1,1),(1,0)) + + + + + + polygon ( circle ) + polygon + + + Converts circle to a 12-point polygon. + + + polygon(circle '<(0,0),2>') + ((-2,0),&zwsp;(-1.7320508075688774,0.9999999999999999),&zwsp;(-1.0000000000000002,1.7320508075688772),&zwsp;(-1.2246063538223773e-16,2),&zwsp;(0.9999999999999996,1.7320508075688774),&zwsp;(1.732050807568877,1.0000000000000007),&zwsp;(2,2.4492127076447545e-16),&zwsp;(1.7320508075688776,-0.9999999999999994),&zwsp;(1.0000000000000009,-1.7320508075688767),&zwsp;(3.673819061467132e-16,-2),&zwsp;(-0.9999999999999987,-1.732050807568878),&zwsp;(-1.7320508075688767,-1.0000000000000009)) + + + + + + polygon ( integer, circle ) + polygon + + + Converts circle to an n-point polygon. + + + polygon(4, circle '<(3,0),1>') + ((2,0),&zwsp;(3,1),&zwsp;(4,1.2246063538223773e-16),&zwsp;(3,-1)) + + + + + + polygon ( path ) + polygon + + + Converts closed path to a polygon with the same list of points. + + + polygon(path '((0,0),(1,1),(2,0))') + ((0,0),(1,1),(2,0)) + + + + + +
+ + + It is possible to access the two component numbers of a point + as though the point were an array with indexes 0 and 1. For example, if + t.p is a point column then + SELECT p[0] FROM t retrieves the X coordinate and + UPDATE t SET p[1] = ... changes the Y coordinate. + In the same way, a value of type box or lseg can be treated + as an array of two point values. + + +
diff --git a/doc/src/sgml/func/func-info.sgml b/doc/src/sgml/func/func-info.sgml new file mode 100644 index 0000000000000..b507bfaf64b19 --- /dev/null +++ b/doc/src/sgml/func/func-info.sgml @@ -0,0 +1,3790 @@ + + System Information Functions and Operators + + + The functions described in this section are used to obtain various + information about a PostgreSQL installation. + + + + Session Information Functions + + + shows several + functions that extract session and system information. + + + + In addition to the functions listed in this section, there are a number of + functions related to the statistics system that also provide system + information. See for more + information. + + + + Session Information Functions + + + + + Function + + + Description + + + + + + + + + current_catalog + + current_catalog + name + + + + current_database + + current_database () + name + + + Returns the name of the current database. (Databases are + called catalogs in the SQL standard, + so current_catalog is the standard's + spelling.) + + + + + + + current_query + + current_query () + text + + + Returns the text of the currently executing query, as submitted + by the client (which might contain more than one statement). + + + + + + + current_role + + current_role + name + + + This is equivalent to current_user. + + + + + + + current_schema + + + schema + current + + current_schema + name + + + current_schema () + name + + + Returns the name of the schema that is first in the search path (or a + null value if the search path is empty). This is the schema that will + be used for any tables or other named objects that are created without + specifying a target schema. + + + + + + + current_schemas + + + search path + current + + current_schemas ( include_implicit boolean ) + name[] + + + Returns an array of the names of all schemas presently in the + effective search path, in their priority order. (Items in the current + setting that do not correspond to + existing, searchable schemas are omitted.) If the Boolean argument + is true, then implicitly-searched system schemas + such as pg_catalog are included in the result. + + + + + + + current_user + + + user + current + + current_user + name + + + Returns the user name of the current execution context. + + + + + + + inet_client_addr + + inet_client_addr () + inet + + + Returns the IP address of the current client, + or NULL if the current connection is via a + Unix-domain socket. + + + + + + + inet_client_port + + inet_client_port () + integer + + + Returns the IP port number of the current client, + or NULL if the current connection is via a + Unix-domain socket. + + + + + + + inet_server_addr + + inet_server_addr () + inet + + + Returns the IP address on which the server accepted the current + connection, + or NULL if the current connection is via a + Unix-domain socket. + + + + + + + inet_server_port + + inet_server_port () + integer + + + Returns the IP port number on which the server accepted the current + connection, + or NULL if the current connection is via a + Unix-domain socket. + + + + + + + pg_backend_pid + + pg_backend_pid () + integer + + + Returns the process ID of the server process attached to the current + session. + + + + + + + pg_blocking_pids + + pg_blocking_pids ( integer ) + integer[] + + + Returns an array of the process ID(s) of the sessions that are + blocking the server process with the specified process ID from + acquiring a lock, or an empty array if there is no such server process + or it is not blocked. + + + One server process blocks another if it either holds a lock that + conflicts with the blocked process's lock request (hard block), or is + waiting for a lock that would conflict with the blocked process's lock + request and is ahead of it in the wait queue (soft block). When using + parallel queries the result always lists client-visible process IDs + (that is, pg_backend_pid results) even if the + actual lock is held or awaited by a child worker process. As a result + of that, there may be duplicated PIDs in the result. Also note that + when a prepared transaction holds a conflicting lock, it will be + represented by a zero process ID. + + + Frequent calls to this function could have some impact on database + performance, because it needs exclusive access to the lock manager's + shared state for a short time. + + + + + + + pg_conf_load_time + + pg_conf_load_time () + timestamp with time zone + + + Returns the time when the server configuration files were last loaded. + If the current session was alive at the time, this will be the time + when the session itself re-read the configuration files (so the + reading will vary a little in different sessions). Otherwise it is + the time when the postmaster process re-read the configuration files. + + + + + + + pg_current_logfile + + + Logging + pg_current_logfile function + + + current_logfiles + and the pg_current_logfile function + + + Logging + current_logfiles file and the pg_current_logfile + function + + pg_current_logfile ( text ) + text + + + Returns the path name of the log file currently in use by the logging + collector. The path includes the + directory and the individual log file name. The result + is NULL if the logging collector is disabled. + When multiple log files exist, each in a different + format, pg_current_logfile without an argument + returns the path of the file having the first format found in the + ordered list: stderr, + csvlog, jsonlog. + NULL is returned if no log file has any of these + formats. + To request information about a specific log file format, supply + either csvlog, jsonlog or + stderr as the + value of the optional parameter. The result is NULL + if the log format requested is not configured in + . + The result reflects the contents of + the current_logfiles file. + + + This function is restricted to superusers and roles with privileges of + the pg_monitor role by default, but other users can + be granted EXECUTE to run the function. + + + + + + + pg_get_loaded_modules + + pg_get_loaded_modules () + setof record + ( module_name text, + version text, + file_name text ) + + + Returns a list of the loadable modules that are loaded into the + current server session. The module_name + and version fields are NULL unless the + module author supplied values for them using + the PG_MODULE_MAGIC_EXT macro. + The file_name field gives the file + name of the module (shared library). + + + + + + + pg_my_temp_schema + + pg_my_temp_schema () + oid + + + Returns the OID of the current session's temporary schema, or zero if + it has none (because it has not created any temporary tables). + + + + + + + pg_is_other_temp_schema + + pg_is_other_temp_schema ( oid ) + boolean + + + Returns true if the given OID is the OID of another session's + temporary schema. (This can be useful, for example, to exclude other + sessions' temporary tables from a catalog display.) + + + + + + + pg_jit_available + + pg_jit_available () + boolean + + + Returns true if a JIT compiler extension is + available (see ) and the + configuration parameter is set to + on. + + + + + + + pg_numa_available + + pg_numa_available () + boolean + + + Returns true if the server has been compiled with NUMA support. + + + + + + + pg_listening_channels + + pg_listening_channels () + setof text + + + Returns the set of names of asynchronous notification channels that + the current session is listening to. + + + + + + + pg_notification_queue_usage + + pg_notification_queue_usage () + double precision + + + Returns the fraction (0–1) of the asynchronous notification + queue's maximum size that is currently occupied by notifications that + are waiting to be processed. + See and + for more information. + + + + + + + pg_postmaster_start_time + + pg_postmaster_start_time () + timestamp with time zone + + + Returns the time when the server started. + + + + + + + pg_safe_snapshot_blocking_pids + + pg_safe_snapshot_blocking_pids ( integer ) + integer[] + + + Returns an array of the process ID(s) of the sessions that are blocking + the server process with the specified process ID from acquiring a safe + snapshot, or an empty array if there is no such server process or it + is not blocked. + + + A session running a SERIALIZABLE transaction blocks + a SERIALIZABLE READ ONLY DEFERRABLE transaction + from acquiring a snapshot until the latter determines that it is safe + to avoid taking any predicate locks. See + for more information about + serializable and deferrable transactions. + + + Frequent calls to this function could have some impact on database + performance, because it needs access to the predicate lock manager's + shared state for a short time. + + + + + + + pg_trigger_depth + + pg_trigger_depth () + integer + + + Returns the current nesting level + of PostgreSQL triggers (0 if not called, + directly or indirectly, from inside a trigger). + + + + + + + session_user + + session_user + name + + + Returns the session user's name. + + + + + + + system_user + + system_user + text + + + Returns the authentication method and the identity (if any) that the + user presented during the authentication cycle before they were + assigned a database role. It is represented as + auth_method:identity or + NULL if the user has not been authenticated (for + example if Trust authentication has + been used). + + + + + + + user + + user + name + + + This is equivalent to current_user. + + + + +
+ + + + current_catalog, + current_role, + current_schema, + current_user, + session_user, + and user have special syntactic status + in SQL: they must be called without trailing + parentheses. In PostgreSQL, parentheses can optionally be used with + current_schema, but not with the others. + + + + + The session_user is normally the user who initiated + the current database connection; but superusers can change this setting + with . + The current_user is the user identifier + that is applicable for permission checking. Normally it is equal + to the session user, but it can be changed with + . + It also changes during the execution of + functions with the attribute SECURITY DEFINER. + In Unix parlance, the session user is the real user and + the current user is the effective user. + current_role and user are + synonyms for current_user. (The SQL standard draws + a distinction between current_role + and current_user, but PostgreSQL + does not, since it unifies users and roles into a single kind of entity.) + + +
+ + + Access Privilege Inquiry Functions + + + privilege + querying + + + + lists functions that + allow querying object access privileges programmatically. + (See for more information about + privileges.) + In these functions, the user whose privileges are being inquired about + can be specified by name or by OID + (pg_authid.oid), or if + the name is given as public then the privileges of the + PUBLIC pseudo-role are checked. Also, the user + argument can be omitted entirely, in which case + the current_user is assumed. + The object that is being inquired about can be specified either by name or + by OID, too. When specifying by name, a schema name can be included if + relevant. + The access privilege of interest is specified by a text string, which must + evaluate to one of the appropriate privilege keywords for the object's type + (e.g., SELECT). Optionally, WITH GRANT + OPTION can be added to a privilege type to test whether the + privilege is held with grant option. Also, multiple privilege types can be + listed separated by commas, in which case the result will be true if any of + the listed privileges is held. (Case of the privilege string is not + significant, and extra whitespace is allowed between but not within + privilege names.) + Some examples: + +SELECT has_table_privilege('myschema.mytable', 'select'); +SELECT has_table_privilege('joe', 'mytable', 'INSERT, SELECT WITH GRANT OPTION'); + + + + + Access Privilege Inquiry Functions + + + + + Function + + + Description + + + + + + + + + has_any_column_privilege + + has_any_column_privilege ( + user name or oid, + table text or oid, + privilege text ) + boolean + + + Does user have privilege for any column of table? + This succeeds either if the privilege is held for the whole table, or + if there is a column-level grant of the privilege for at least one + column. + Allowable privilege types are + SELECT, INSERT, + UPDATE, and REFERENCES. + + + + + + + has_column_privilege + + has_column_privilege ( + user name or oid, + table text or oid, + column text or smallint, + privilege text ) + boolean + + + Does user have privilege for the specified table column? + This succeeds either if the privilege is held for the whole table, or + if there is a column-level grant of the privilege for the column. + The column can be specified by name or by attribute number + (pg_attribute.attnum). + Allowable privilege types are + SELECT, INSERT, + UPDATE, and REFERENCES. + + + + + + + has_database_privilege + + has_database_privilege ( + user name or oid, + database text or oid, + privilege text ) + boolean + + + Does user have privilege for database? + Allowable privilege types are + CREATE, + CONNECT, + TEMPORARY, and + TEMP (which is equivalent to + TEMPORARY). + + + + + + + has_foreign_data_wrapper_privilege + + has_foreign_data_wrapper_privilege ( + user name or oid, + fdw text or oid, + privilege text ) + boolean + + + Does user have privilege for foreign-data wrapper? + The only allowable privilege type is USAGE. + + + + + + + has_function_privilege + + has_function_privilege ( + user name or oid, + function text or oid, + privilege text ) + boolean + + + Does user have privilege for function? + The only allowable privilege type is EXECUTE. + + + When specifying a function by name rather than by OID, the allowed + input is the same as for the regprocedure data type (see + ). + An example is: + +SELECT has_function_privilege('joeuser', 'myfunc(int, text)', 'execute'); + + + + + + + + has_language_privilege + + has_language_privilege ( + user name or oid, + language text or oid, + privilege text ) + boolean + + + Does user have privilege for language? + The only allowable privilege type is USAGE. + + + + + + + has_largeobject_privilege + + has_largeobject_privilege ( + user name or oid, + largeobject oid, + privilege text ) + boolean + + + Does user have privilege for large object? + Allowable privilege types are + SELECT and UPDATE. + + + + + + + has_parameter_privilege + + has_parameter_privilege ( + user name or oid, + parameter text, + privilege text ) + boolean + + + Does user have privilege for configuration parameter? + The parameter name is case-insensitive. + Allowable privilege types are SET + and ALTER SYSTEM. + + + + + + + has_schema_privilege + + has_schema_privilege ( + user name or oid, + schema text or oid, + privilege text ) + boolean + + + Does user have privilege for schema? + Allowable privilege types are + CREATE and + USAGE. + + + + + + + has_sequence_privilege + + has_sequence_privilege ( + user name or oid, + sequence text or oid, + privilege text ) + boolean + + + Does user have privilege for sequence? + Allowable privilege types are + USAGE, + SELECT, and + UPDATE. + + + + + + + has_server_privilege + + has_server_privilege ( + user name or oid, + server text or oid, + privilege text ) + boolean + + + Does user have privilege for foreign server? + The only allowable privilege type is USAGE. + + + + + + + has_table_privilege + + has_table_privilege ( + user name or oid, + table text or oid, + privilege text ) + boolean + + + Does user have privilege for table? + Allowable privilege types + are SELECT, INSERT, + UPDATE, DELETE, + TRUNCATE, REFERENCES, + TRIGGER, and MAINTAIN. + + + + + + + has_tablespace_privilege + + has_tablespace_privilege ( + user name or oid, + tablespace text or oid, + privilege text ) + boolean + + + Does user have privilege for tablespace? + The only allowable privilege type is CREATE. + + + + + + + has_type_privilege + + has_type_privilege ( + user name or oid, + type text or oid, + privilege text ) + boolean + + + Does user have privilege for data type? + The only allowable privilege type is USAGE. + When specifying a type by name rather than by OID, the allowed input + is the same as for the regtype data type (see + ). + + + + + + + pg_has_role + + pg_has_role ( + user name or oid, + role text or oid, + privilege text ) + boolean + + + Does user have privilege for role? + Allowable privilege types are + MEMBER, USAGE, + and SET. + MEMBER denotes direct or indirect membership in + the role without regard to what specific privileges may be conferred. + USAGE denotes whether the privileges of the role + are immediately available without doing SET ROLE, + while SET denotes whether it is possible to change + to the role using the SET ROLE command. + WITH ADMIN OPTION or WITH GRANT + OPTION can be added to any of these privilege types to + test whether the ADMIN privilege is held (all + six spellings test the same thing). + This function does not allow the special case of + setting user to public, + because the PUBLIC pseudo-role can never be a member of real roles. + + + + + + + row_security_active + + row_security_active ( + table text or oid ) + boolean + + + Is row-level security active for the specified table in the context of + the current user and current environment? + + + + +
+ + + shows the operators + available for the aclitem type, which is the catalog + representation of access privileges. See + for information about how to read access privilege values. + + + + <type>aclitem</type> Operators + + + + + Operator + + + Description + + + Example(s) + + + + + + + + + aclitemeq + + aclitem = aclitem + boolean + + + Are aclitems equal? (Notice that + type aclitem lacks the usual set of comparison + operators; it has only equality. In turn, aclitem + arrays can only be compared for equality.) + + + 'calvin=r*w/hobbes'::aclitem = 'calvin=r*w*/hobbes'::aclitem + f + + + + + + + aclcontains + + aclitem[] @> aclitem + boolean + + + Does array contain the specified privileges? (This is true if there + is an array entry that matches the aclitem's grantee and + grantor, and has at least the specified set of privileges.) + + + '{calvin=r*w/hobbes,hobbes=r*w*/postgres}'::aclitem[] @> 'calvin=r*/hobbes'::aclitem + t + + + + + + aclitem[] ~ aclitem + boolean + + + This is a deprecated alias for @>. + + + '{calvin=r*w/hobbes,hobbes=r*w*/postgres}'::aclitem[] ~ 'calvin=r*/hobbes'::aclitem + t + + + + +
+ + + shows some additional + functions to manage the aclitem type. + + + + <type>aclitem</type> Functions + + + + + Function + + + Description + + + + + + + + + acldefault + + acldefault ( + type "char", + ownerId oid ) + aclitem[] + + + Constructs an aclitem array holding the default access + privileges for an object of type type belonging + to the role with OID ownerId. This represents + the access privileges that will be assumed when an object's + ACL entry is null. (The default access privileges + are described in .) + The type parameter must be one of + 'c' for COLUMN, + 'r' for TABLE and table-like objects, + 's' for SEQUENCE, + 'd' for DATABASE, + 'f' for FUNCTION or PROCEDURE, + 'l' for LANGUAGE, + 'L' for LARGE OBJECT, + 'n' for SCHEMA, + 'p' for PARAMETER, + 't' for TABLESPACE, + 'F' for FOREIGN DATA WRAPPER, + 'S' for FOREIGN SERVER, + or + 'T' for TYPE or DOMAIN. + + + + + + + aclexplode + + aclexplode ( aclitem[] ) + setof record + ( grantor oid, + grantee oid, + privilege_type text, + is_grantable boolean ) + + + Returns the aclitem array as a set of rows. + If the grantee is the pseudo-role PUBLIC, it is represented by zero in + the grantee column. Each granted privilege is + represented as SELECT, INSERT, + etc (see for a full list). + Note that each privilege is broken out as a separate row, so + only one keyword appears in the privilege_type + column. + + + + + + + makeaclitem + + makeaclitem ( + grantee oid, + grantor oid, + privileges text, + is_grantable boolean ) + aclitem + + + Constructs an aclitem with the given properties. + privileges is a comma-separated list of + privilege names such as SELECT, + INSERT, etc, all of which are set in the + result. (Case of the privilege string is not significant, and + extra whitespace is allowed between but not within privilege + names.) + + + + +
+ +
+ + + Schema Visibility Inquiry Functions + + + shows functions that + determine whether a certain object is visible in the + current schema search path. + For example, a table is said to be visible if its + containing schema is in the search path and no table of the same + name appears earlier in the search path. This is equivalent to the + statement that the table can be referenced by name without explicit + schema qualification. Thus, to list the names of all visible tables: + +SELECT relname FROM pg_class WHERE pg_table_is_visible(oid); + + For functions and operators, an object in the search path is said to be + visible if there is no object of the same name and argument data + type(s) earlier in the path. For operator classes and families, + both the name and the associated index access method are considered. + + + + search path + object visibility + + + + Schema Visibility Inquiry Functions + + + + + Function + + + Description + + + + + + + + + pg_collation_is_visible + + pg_collation_is_visible ( collation oid ) + boolean + + + Is collation visible in search path? + + + + + + + pg_conversion_is_visible + + pg_conversion_is_visible ( conversion oid ) + boolean + + + Is conversion visible in search path? + + + + + + + pg_function_is_visible + + pg_function_is_visible ( function oid ) + boolean + + + Is function visible in search path? + (This also works for procedures and aggregates.) + + + + + + + pg_opclass_is_visible + + pg_opclass_is_visible ( opclass oid ) + boolean + + + Is operator class visible in search path? + + + + + + + pg_operator_is_visible + + pg_operator_is_visible ( operator oid ) + boolean + + + Is operator visible in search path? + + + + + + + pg_opfamily_is_visible + + pg_opfamily_is_visible ( opclass oid ) + boolean + + + Is operator family visible in search path? + + + + + + + pg_statistics_obj_is_visible + + pg_statistics_obj_is_visible ( stat oid ) + boolean + + + Is statistics object visible in search path? + + + + + + + pg_table_is_visible + + pg_table_is_visible ( table oid ) + boolean + + + Is table visible in search path? + (This works for all types of relations, including views, materialized + views, indexes, sequences and foreign tables.) + + + + + + + pg_ts_config_is_visible + + pg_ts_config_is_visible ( config oid ) + boolean + + + Is text search configuration visible in search path? + + + + + + + pg_ts_dict_is_visible + + pg_ts_dict_is_visible ( dict oid ) + boolean + + + Is text search dictionary visible in search path? + + + + + + + pg_ts_parser_is_visible + + pg_ts_parser_is_visible ( parser oid ) + boolean + + + Is text search parser visible in search path? + + + + + + + pg_ts_template_is_visible + + pg_ts_template_is_visible ( template oid ) + boolean + + + Is text search template visible in search path? + + + + + + + pg_type_is_visible + + pg_type_is_visible ( type oid ) + boolean + + + Is type (or domain) visible in search path? + + + + +
+ + + All these functions require object OIDs to identify the object to be + checked. If you want to test an object by name, it is convenient to use + the OID alias types (regclass, regtype, + regprocedure, regoperator, regconfig, + or regdictionary), + for example: + +SELECT pg_type_is_visible('myschema.widget'::regtype); + + Note that it would not make much sense to test a non-schema-qualified + type name in this way — if the name can be recognized at all, it must be visible. + + +
+ + + System Catalog Information Functions + + + lists functions that + extract information from the system catalogs. + + + + System Catalog Information Functions + + + + + Function + + + Description + + + + + + + + + format_type + + format_type ( type oid, typemod integer ) + text + + + Returns the SQL name for a data type that is identified by its type + OID and possibly a type modifier. Pass NULL for the type modifier if + no specific modifier is known. + + + + + + + pg_basetype + + pg_basetype ( regtype ) + regtype + + + Returns the OID of the base type of a domain identified by its + type OID. If the argument is the OID of a non-domain type, + returns the argument as-is. Returns NULL if the argument is + not a valid type OID. If there's a chain of domain dependencies, + it will recurse until finding the base type. + + + Assuming CREATE DOMAIN mytext AS text: + + + pg_basetype('mytext'::regtype) + text + + + + + + + pg_char_to_encoding + + pg_char_to_encoding ( encoding name ) + integer + + + Converts the supplied encoding name into an integer representing the + internal identifier used in some system catalog tables. + Returns -1 if an unknown encoding name is provided. + + + + + + + pg_encoding_to_char + + pg_encoding_to_char ( encoding integer ) + name + + + Converts the integer used as the internal identifier of an encoding in some + system catalog tables into a human-readable string. + Returns an empty string if an invalid encoding number is provided. + + + + + + + pg_get_catalog_foreign_keys + + pg_get_catalog_foreign_keys () + setof record + ( fktable regclass, + fkcols text[], + pktable regclass, + pkcols text[], + is_array boolean, + is_opt boolean ) + + + Returns a set of records describing the foreign key relationships + that exist within the PostgreSQL system + catalogs. + The fktable column contains the name of the + referencing catalog, and the fkcols column + contains the name(s) of the referencing column(s). Similarly, + the pktable column contains the name of the + referenced catalog, and the pkcols column + contains the name(s) of the referenced column(s). + If is_array is true, the last referencing + column is an array, each of whose elements should match some entry + in the referenced catalog. + If is_opt is true, the referencing column(s) + are allowed to contain zeroes instead of a valid reference. + + + + + + + pg_get_constraintdef + + pg_get_constraintdef ( constraint oid , pretty boolean ) + text + + + Reconstructs the creating command for a constraint. + (This is a decompiled reconstruction, not the original text + of the command.) + + + + + + + pg_get_expr + + pg_get_expr ( expr pg_node_tree, relation oid , pretty boolean ) + text + + + Decompiles the internal form of an expression stored in the system + catalogs, such as the default value for a column. If the expression + might contain Vars, specify the OID of the relation they refer to as + the second parameter; if no Vars are expected, passing zero is + sufficient. + + + + + + + pg_get_functiondef + + pg_get_functiondef ( func oid ) + text + + + Reconstructs the creating command for a function or procedure. + (This is a decompiled reconstruction, not the original text + of the command.) + The result is a complete CREATE OR REPLACE FUNCTION + or CREATE OR REPLACE PROCEDURE statement. + + + + + + + pg_get_function_arguments + + pg_get_function_arguments ( func oid ) + text + + + Reconstructs the argument list of a function or procedure, in the form + it would need to appear in within CREATE FUNCTION + (including default values). + + + + + + + pg_get_function_identity_arguments + + pg_get_function_identity_arguments ( func oid ) + text + + + Reconstructs the argument list necessary to identify a function or + procedure, in the form it would need to appear in within commands such + as ALTER FUNCTION. This form omits default values. + + + + + + + pg_get_function_result + + pg_get_function_result ( func oid ) + text + + + Reconstructs the RETURNS clause of a function, in + the form it would need to appear in within CREATE + FUNCTION. Returns NULL for a procedure. + + + + + + + pg_get_indexdef + + pg_get_indexdef ( index oid , column integer, pretty boolean ) + text + + + Reconstructs the creating command for an index. + (This is a decompiled reconstruction, not the original text + of the command.) If column is supplied and is + not zero, only the definition of that column is reconstructed. + + + + + + + pg_get_keywords + + pg_get_keywords () + setof record + ( word text, + catcode "char", + barelabel boolean, + catdesc text, + baredesc text ) + + + Returns a set of records describing the SQL keywords recognized by the + server. The word column contains the + keyword. The catcode column contains a + category code: U for an unreserved + keyword, C for a keyword that can be a column + name, T for a keyword that can be a type or + function name, or R for a fully reserved keyword. + The barelabel column + contains true if the keyword can be used as + a bare column label in SELECT lists, + or false if it can only be used + after AS. + The catdesc column contains a + possibly-localized string describing the keyword's category. + The baredesc column contains a + possibly-localized string describing the keyword's column label status. + + + + + + + pg_get_partkeydef + + pg_get_partkeydef ( table oid ) + text + + + Reconstructs the definition of a partitioned table's partition + key, in the form it would have in the PARTITION + BY clause of CREATE TABLE. + (This is a decompiled reconstruction, not the original text + of the command.) + + + + + + + pg_get_ruledef + + pg_get_ruledef ( rule oid , pretty boolean ) + text + + + Reconstructs the creating command for a rule. + (This is a decompiled reconstruction, not the original text + of the command.) + + + + + + + pg_get_serial_sequence + + pg_get_serial_sequence ( table text, column text ) + text + + + Returns the name of the sequence associated with a column, + or NULL if no sequence is associated with the column. + If the column is an identity column, the associated sequence is the + sequence internally created for that column. + For columns created using one of the serial types + (serial, smallserial, bigserial), + it is the sequence created for that serial column definition. + In the latter case, the association can be modified or removed + with ALTER SEQUENCE OWNED BY. + (This function probably should have been + called pg_get_owned_sequence; its current name + reflects the fact that it has historically been used with serial-type + columns.) The first parameter is a table name with optional + schema, and the second parameter is a column name. Because the first + parameter potentially contains both schema and table names, it is + parsed per usual SQL rules, meaning it is lower-cased by default. + The second parameter, being just a column name, is treated literally + and so has its case preserved. The result is suitably formatted + for passing to the sequence functions (see + ). + + + A typical use is in reading the current value of the sequence for an + identity or serial column, for example: + +SELECT currval(pg_get_serial_sequence('sometable', 'id')); + + + + + + + + pg_get_statisticsobjdef + + pg_get_statisticsobjdef ( statobj oid ) + text + + + Reconstructs the creating command for an extended statistics object. + (This is a decompiled reconstruction, not the original text + of the command.) + + + + + + + pg_get_triggerdef + +pg_get_triggerdef ( trigger oid , pretty boolean ) + text + + + Reconstructs the creating command for a trigger. + (This is a decompiled reconstruction, not the original text + of the command.) + + + + + + + pg_get_userbyid + + pg_get_userbyid ( role oid ) + name + + + Returns a role's name given its OID. + + + + + + + pg_get_viewdef + + pg_get_viewdef ( view oid , pretty boolean ) + text + + + Reconstructs the underlying SELECT command for a + view or materialized view. (This is a decompiled reconstruction, not + the original text of the command.) + + + + + + pg_get_viewdef ( view oid, wrap_column integer ) + text + + + Reconstructs the underlying SELECT command for a + view or materialized view. (This is a decompiled reconstruction, not + the original text of the command.) In this form of the function, + pretty-printing is always enabled, and long lines are wrapped to try + to keep them shorter than the specified number of columns. + + + + + + pg_get_viewdef ( view text , pretty boolean ) + text + + + Reconstructs the underlying SELECT command for a + view or materialized view, working from a textual name for the view + rather than its OID. (This is deprecated; use the OID variant + instead.) + + + + + + + pg_index_column_has_property + + pg_index_column_has_property ( index regclass, column integer, property text ) + boolean + + + Tests whether an index column has the named property. + Common index column properties are listed in + . + (Note that extension access methods can define additional property + names for their indexes.) + NULL is returned if the property name is not known + or does not apply to the particular object, or if the OID or column + number does not identify a valid object. + + + + + + + pg_index_has_property + + pg_index_has_property ( index regclass, property text ) + boolean + + + Tests whether an index has the named property. + Common index properties are listed in + . + (Note that extension access methods can define additional property + names for their indexes.) + NULL is returned if the property name is not known + or does not apply to the particular object, or if the OID does not + identify a valid object. + + + + + + + pg_indexam_has_property + + pg_indexam_has_property ( am oid, property text ) + boolean + + + Tests whether an index access method has the named property. + Access method properties are listed in + . + NULL is returned if the property name is not known + or does not apply to the particular object, or if the OID does not + identify a valid object. + + + + + + + pg_options_to_table + + pg_options_to_table ( options_array text[] ) + setof record + ( option_name text, + option_value text ) + + + Returns the set of storage options represented by a value from + pg_class.reloptions or + pg_attribute.attoptions. + + + + + + + pg_settings_get_flags + + pg_settings_get_flags ( guc text ) + text[] + + + Returns an array of the flags associated with the given GUC, or + NULL if it does not exist. The result is + an empty array if the GUC exists but there are no flags to show. + Only the most useful flags listed in + are exposed. + + + + + + + pg_tablespace_databases + + pg_tablespace_databases ( tablespace oid ) + setof oid + + + Returns the set of OIDs of databases that have objects stored in the + specified tablespace. If this function returns any rows, the + tablespace is not empty and cannot be dropped. To identify the specific + objects populating the tablespace, you will need to connect to the + database(s) identified by pg_tablespace_databases + and query their pg_class catalogs. + + + + + + + pg_tablespace_location + + pg_tablespace_location ( tablespace oid ) + text + + + Returns the file system path that this tablespace is located in. + + + + + + + pg_typeof + + pg_typeof ( "any" ) + regtype + + + Returns the OID of the data type of the value that is passed to it. + This can be helpful for troubleshooting or dynamically constructing + SQL queries. The function is declared as + returning regtype, which is an OID alias type (see + ); this means that it is the same as an + OID for comparison purposes but displays as a type name. + + + pg_typeof(33) + integer + + + + + + + COLLATION FOR + + COLLATION FOR ( "any" ) + text + + + Returns the name of the collation of the value that is passed to it. + The value is quoted and schema-qualified if necessary. If no + collation was derived for the argument expression, + then NULL is returned. If the argument is not of a + collatable data type, then an error is raised. + + + collation for ('foo'::text) + "default" + + + collation for ('foo' COLLATE "de_DE") + "de_DE" + + + + + + + to_regclass + + to_regclass ( text ) + regclass + + + Translates a textual relation name to its OID. A similar result is + obtained by casting the string to type regclass (see + ); however, this function will return + NULL rather than throwing an error if the name is + not found. + + + + + + + to_regdatabase + + to_regdatabase ( text ) + regdatabase + + + Translates a textual database name to its OID. A similar result is + obtained by casting the string to type regdatabase (see + ); however, this function will return + NULL rather than throwing an error if the name is + not found. + + + + + + + to_regcollation + + to_regcollation ( text ) + regcollation + + + Translates a textual collation name to its OID. A similar result is + obtained by casting the string to type regcollation (see + ); however, this function will return + NULL rather than throwing an error if the name is + not found. + + + + + + + to_regnamespace + + to_regnamespace ( text ) + regnamespace + + + Translates a textual schema name to its OID. A similar result is + obtained by casting the string to type regnamespace (see + ); however, this function will return + NULL rather than throwing an error if the name is + not found. + + + + + + + to_regoper + + to_regoper ( text ) + regoper + + + Translates a textual operator name to its OID. A similar result is + obtained by casting the string to type regoper (see + ); however, this function will return + NULL rather than throwing an error if the name is + not found or is ambiguous. + + + + + + + to_regoperator + + to_regoperator ( text ) + regoperator + + + Translates a textual operator name (with parameter types) to its OID. A similar result is + obtained by casting the string to type regoperator (see + ); however, this function will return + NULL rather than throwing an error if the name is + not found. + + + + + + + to_regproc + + to_regproc ( text ) + regproc + + + Translates a textual function or procedure name to its OID. A similar result is + obtained by casting the string to type regproc (see + ); however, this function will return + NULL rather than throwing an error if the name is + not found or is ambiguous. + + + + + + + to_regprocedure + + to_regprocedure ( text ) + regprocedure + + + Translates a textual function or procedure name (with argument types) to its OID. A similar result is + obtained by casting the string to type regprocedure (see + ); however, this function will return + NULL rather than throwing an error if the name is + not found. + + + + + + + to_regrole + + to_regrole ( text ) + regrole + + + Translates a textual role name to its OID. A similar result is + obtained by casting the string to type regrole (see + ); however, this function will return + NULL rather than throwing an error if the name is + not found. + + + + + + + to_regtype + + to_regtype ( text ) + regtype + + + Parses a string of text, extracts a potential type name from it, + and translates that name into a type OID. A syntax error in the + string will result in an error; but if the string is a + syntactically valid type name that happens not to be found in the + catalogs, the result is NULL. A similar result + is obtained by casting the string to type regtype + (see ), except that that will throw + error for name not found. + + + + + + + to_regtypemod + + to_regtypemod ( text ) + integer + + + Parses a string of text, extracts a potential type name from it, + and translates its type modifier, if any. A syntax error in the + string will result in an error; but if the string is a + syntactically valid type name that happens not to be found in the + catalogs, the result is NULL. The result is + -1 if no type modifier is present. + + + to_regtypemod can be combined with + to produce appropriate inputs for + , allowing a string representing a + type name to be canonicalized. + + + format_type(to_regtype('varchar(32)'), to_regtypemod('varchar(32)')) + character varying(32) + + + + +
+ + + Most of the functions that reconstruct (decompile) database objects + have an optional pretty flag, which + if true causes the result to + be pretty-printed. Pretty-printing suppresses unnecessary + parentheses and adds whitespace for legibility. + The pretty-printed format is more readable, but the default format + is more likely to be interpreted the same way by future versions of + PostgreSQL; so avoid using pretty-printed output + for dump purposes. Passing false for + the pretty parameter yields the same result as + omitting the parameter. + + + + Index Column Properties + + + NameDescription + + + + asc + Does the column sort in ascending order on a forward scan? + + + + desc + Does the column sort in descending order on a forward scan? + + + + nulls_first + Does the column sort with nulls first on a forward scan? + + + + nulls_last + Does the column sort with nulls last on a forward scan? + + + + orderable + Does the column possess any defined sort ordering? + + + + distance_orderable + Can the column be scanned in order by a distance + operator, for example ORDER BY col <-> constant ? + + + + returnable + Can the column value be returned by an index-only scan? + + + + search_array + Does the column natively support col = ANY(array) + searches? + + + + search_nulls + Does the column support IS NULL and + IS NOT NULL searches? + + + + +
+ + + Index Properties + + + NameDescription + + + + clusterable + Can the index be used in a CLUSTER command? + + + + index_scan + Does the index support plain (non-bitmap) scans? + + + + bitmap_scan + Does the index support bitmap scans? + + + + backward_scan + Can the scan direction be changed in mid-scan (to + support FETCH BACKWARD on a cursor without + needing materialization)? + + + + +
+ + + Index Access Method Properties + + + NameDescription + + + + can_order + Does the access method support ASC, + DESC and related keywords in + CREATE INDEX? + + + + can_unique + Does the access method support unique indexes? + + + + can_multi_col + Does the access method support indexes with multiple columns? + + + + can_exclude + Does the access method support exclusion constraints? + + + + can_include + Does the access method support the INCLUDE + clause of CREATE INDEX? + + + + +
+ + + GUC Flags + + + FlagDescription + + + + EXPLAIN + Parameters with this flag are included in + EXPLAIN (SETTINGS) commands. + + + + NO_SHOW_ALL + Parameters with this flag are excluded from + SHOW ALL commands. + + + + NO_RESET + Parameters with this flag do not support + RESET commands. + + + + NO_RESET_ALL + Parameters with this flag are excluded from + RESET ALL commands. + + + + NOT_IN_SAMPLE + Parameters with this flag are not included in + postgresql.conf by default. + + + + RUNTIME_COMPUTED + Parameters with this flag are runtime-computed ones. + + + + +
+ +
+ + + Object Information and Addressing Functions + + + lists functions related to + database object identification and addressing. + + + + Object Information and Addressing Functions + + + + + Function + + + Description + + + + + + + + + pg_get_acl + + pg_get_acl ( classid oid, objid oid, objsubid integer ) + aclitem[] + + + Returns the ACL for a database object, specified + by catalog OID, object OID and sub-object ID. This function returns + NULL values for undefined objects. + + + + + + + pg_describe_object + + pg_describe_object ( classid oid, objid oid, objsubid integer ) + text + + + Returns a textual description of a database object identified by + catalog OID, object OID, and sub-object ID (such as a column number + within a table; the sub-object ID is zero when referring to a whole + object). This description is intended to be human-readable, and might + be translated, depending on server configuration. This is especially + useful to determine the identity of an object referenced in the + pg_depend catalog. This function returns + NULL values for undefined objects. + + + + + + + pg_identify_object + + pg_identify_object ( classid oid, objid oid, objsubid integer ) + record + ( type text, + schema text, + name text, + identity text ) + + + Returns a row containing enough information to uniquely identify the + database object specified by catalog OID, object OID and sub-object + ID. + This information is intended to be machine-readable, and is never + translated. + type identifies the type of database object; + schema is the schema name that the object + belongs in, or NULL for object types that do not + belong to schemas; + name is the name of the object, quoted if + necessary, if the name (along with schema name, if pertinent) is + sufficient to uniquely identify the object, + otherwise NULL; + identity is the complete object identity, with + the precise format depending on object type, and each name within the + format being schema-qualified and quoted as necessary. Undefined + objects are identified with NULL values. + + + + + + + pg_identify_object_as_address + + pg_identify_object_as_address ( classid oid, objid oid, objsubid integer ) + record + ( type text, + object_names text[], + object_args text[] ) + + + Returns a row containing enough information to uniquely identify the + database object specified by catalog OID, object OID and sub-object + ID. + The returned information is independent of the current server, that + is, it could be used to identify an identically named object in + another server. + type identifies the type of database object; + object_names and + object_args + are text arrays that together form a reference to the object. + These three values can be passed + to pg_get_object_address to obtain the internal + address of the object. + + + + + + + pg_get_object_address + + pg_get_object_address ( type text, object_names text[], object_args text[] ) + record + ( classid oid, + objid oid, + objsubid integer ) + + + Returns a row containing enough information to uniquely identify the + database object specified by a type code and object name and argument + arrays. + The returned values are the ones that would be used in system catalogs + such as pg_depend; they can be passed to + other system functions such as pg_describe_object + or pg_identify_object. + classid is the OID of the system catalog + containing the object; + objid is the OID of the object itself, and + objsubid is the sub-object ID, or zero if none. + This function is the inverse + of pg_identify_object_as_address. + Undefined objects are identified with NULL values. + + + + +
+ + + pg_get_acl is useful for retrieving and inspecting + the privileges associated with database objects without looking at + specific catalogs. For example, to retrieve all the granted privileges + on objects in the current database: + +postgres=# SELECT + (pg_identify_object(s.classid,s.objid,s.objsubid)).*, + pg_catalog.pg_get_acl(s.classid,s.objid,s.objsubid) AS acl +FROM pg_catalog.pg_shdepend AS s +JOIN pg_catalog.pg_database AS d + ON d.datname = current_database() AND + d.oid = s.dbid +JOIN pg_catalog.pg_authid AS a + ON a.oid = s.refobjid AND + s.refclassid = 'pg_authid'::regclass +WHERE s.deptype = 'a'; +-[ RECORD 1 ]----------------------------------------- +type | table +schema | public +name | testtab +identity | public.testtab +acl | {postgres=arwdDxtm/postgres,foo=r/postgres} + + + +
+ + + Comment Information Functions + + + comment + about database objects + + + + The functions shown in + extract comments previously stored with the + command. A null value is returned if no + comment could be found for the specified parameters. + + + + Comment Information Functions + + + + + Function + + + Description + + + + + + + + + col_description + + col_description ( table oid, column integer ) + text + + + Returns the comment for a table column, which is specified by the OID + of its table and its column number. + (obj_description cannot be used for table + columns, since columns do not have OIDs of their own.) + + + + + + + obj_description + + obj_description ( object oid, catalog name ) + text + + + Returns the comment for a database object specified by its OID and the + name of the containing system catalog. For + example, obj_description(123456, 'pg_class') would + retrieve the comment for the table with OID 123456. + + + + + + obj_description ( object oid ) + text + + + Returns the comment for a database object specified by its OID alone. + This is deprecated since there is no guarantee + that OIDs are unique across different system catalogs; therefore, the + wrong comment might be returned. + + + + + + + shobj_description + + shobj_description ( object oid, catalog name ) + text + + + Returns the comment for a shared database object specified by its OID + and the name of the containing system catalog. This is just + like obj_description except that it is used for + retrieving comments on shared objects (that is, databases, roles, and + tablespaces). Some system catalogs are global to all databases within + each cluster, and the descriptions for objects in them are stored + globally as well. + + + + +
+ +
+ + + Data Validity Checking Functions + + + The functions shown in + can be helpful for checking validity of proposed input data. + + + + Data Validity Checking Functions + + + + + Function + + + Description + + + Example(s) + + + + + + + + + pg_input_is_valid + + pg_input_is_valid ( + string text, + type text + ) + boolean + + + Tests whether the given string is valid + input for the specified data type, returning true or false. + + + This function will only work as desired if the data type's input + function has been updated to report invalid input as + a soft error. Otherwise, invalid input will abort + the transaction, just as if the string had been cast to the type + directly. + + + pg_input_is_valid('42', 'integer') + t + + + pg_input_is_valid('42000000000', 'integer') + f + + + pg_input_is_valid('1234.567', 'numeric(7,4)') + f + + + + + + pg_input_error_info + + pg_input_error_info ( + string text, + type text + ) + record + ( message text, + detail text, + hint text, + sql_error_code text ) + + + Tests whether the given string is valid + input for the specified data type; if not, return the details of + the error that would have been thrown. If the input is valid, the + results are NULL. The inputs are the same as + for pg_input_is_valid. + + + This function will only work as desired if the data type's input + function has been updated to report invalid input as + a soft error. Otherwise, invalid input will abort + the transaction, just as if the string had been cast to the type + directly. + + + SELECT * FROM pg_input_error_info('42000000000', 'integer') + + + message | detail | hint | sql_error_code +------------------------------------------------------+--------+------+---------------- + value "42000000000" is out of range for type integer | | | 22003 + + + + + +
+ +
+ + + Transaction ID and Snapshot Information Functions + + + The functions shown in + provide server transaction information in an exportable form. The main + use of these functions is to determine which transactions were committed + between two snapshots. + + + + Transaction ID and Snapshot Information Functions + + + + + Function + + + Description + + + + + + + + + age + + age ( xid ) + integer + + + Returns the number of transactions between the supplied + transaction id and the current transaction counter. + + + + + + + mxid_age + + mxid_age ( xid ) + integer + + + Returns the number of multixacts IDs between the supplied + multixact ID and the current multixacts counter. + + + + + + + pg_current_xact_id + + pg_current_xact_id () + xid8 + + + Returns the current transaction's ID. It will assign a new one if the + current transaction does not have one already (because it has not + performed any database updates); see for details. If executed in a + subtransaction, this will return the top-level transaction ID; + see for details. + + + + + + + pg_current_xact_id_if_assigned + + pg_current_xact_id_if_assigned () + xid8 + + + Returns the current transaction's ID, or NULL if no + ID is assigned yet. (It's best to use this variant if the transaction + might otherwise be read-only, to avoid unnecessary consumption of an + XID.) + If executed in a subtransaction, this will return the top-level + transaction ID. + + + + + + + pg_xact_status + + pg_xact_status ( xid8 ) + text + + + Reports the commit status of a recent transaction. + The result is one of in progress, + committed, or aborted, + provided that the transaction is recent enough that the system retains + the commit status of that transaction. + If it is old enough that no references to the transaction survive in + the system and the commit status information has been discarded, the + result is NULL. + Applications might use this function, for example, to determine + whether their transaction committed or aborted after the application + and database server become disconnected while + a COMMIT is in progress. + Note that prepared transactions are reported as in + progress; applications must check pg_prepared_xacts + if they need to determine whether a transaction ID belongs to a + prepared transaction. + + + + + + + pg_current_snapshot + + pg_current_snapshot () + pg_snapshot + + + Returns a current snapshot, a data structure + showing which transaction IDs are now in-progress. + Only top-level transaction IDs are included in the snapshot; + subtransaction IDs are not shown; see + for details. + + + + + + + pg_snapshot_xip + + pg_snapshot_xip ( pg_snapshot ) + setof xid8 + + + Returns the set of in-progress transaction IDs contained in a snapshot. + + + + + + + pg_snapshot_xmax + + pg_snapshot_xmax ( pg_snapshot ) + xid8 + + + Returns the xmax of a snapshot. + + + + + + + pg_snapshot_xmin + + pg_snapshot_xmin ( pg_snapshot ) + xid8 + + + Returns the xmin of a snapshot. + + + + + + + pg_visible_in_snapshot + + pg_visible_in_snapshot ( xid8, pg_snapshot ) + boolean + + + Is the given transaction ID visible according + to this snapshot (that is, was it completed before the snapshot was + taken)? Note that this function will not give the correct answer for + a subtransaction ID (subxid); see for + details. + + + + + + + pg_get_multixact_members + + pg_get_multixact_members ( multixid xid ) + setof record + ( xid xid, + mode text ) + + + Returns the transaction ID and lock mode for each member of the + specified multixact ID. The lock modes forupd, + fornokeyupd, sh, and + keysh correspond to the row-level locks + FOR UPDATE, FOR NO KEY UPDATE, + FOR SHARE, and FOR KEY SHARE, + respectively, as described in . Two + additional modes are specific to multixacts: + nokeyupd, used by updates that do not modify key + columns, and upd, used by updates or deletes that + modify key columns. + + + + +
+ + + The internal transaction ID type xid is 32 bits wide and + wraps around every 4 billion transactions. However, + the functions shown in , except + age, mxid_age, and + pg_get_multixact_members, use a + 64-bit type xid8 that does not wrap around during the life + of an installation and can be converted to xid by casting if + required; see for details. + The data type pg_snapshot stores information about + transaction ID visibility at a particular moment in time. Its components + are described in . + pg_snapshot's textual representation is + xmin:xmax:xip_list. + For example 10:20:10,14,15 means + xmin=10, xmax=20, xip_list=10, 14, 15. + + + + Snapshot Components + + + + Name + Description + + + + + + xmin + + Lowest transaction ID that was still active. All transaction IDs + less than xmin are either committed and visible, + or rolled back and dead. + + + + + xmax + + One past the highest completed transaction ID. All transaction IDs + greater than or equal to xmax had not yet + completed as of the time of the snapshot, and thus are invisible. + + + + + xip_list + + Transactions in progress at the time of the snapshot. A transaction + ID that is xmin <= X < + xmax and not in this list was already completed at the time + of the snapshot, and thus is either visible or dead according to its + commit status. This list does not include the transaction IDs of + subtransactions (subxids). + + + + +
+ + + In releases of PostgreSQL before 13 there was + no xid8 type, so variants of these functions were provided + that used bigint to represent a 64-bit XID, with a + correspondingly distinct snapshot data type txid_snapshot. + These older functions have txid in their names. They + are still supported for backward compatibility, but may be removed from a + future release. See . + + + + Deprecated Transaction ID and Snapshot Information Functions + + + + + Function + + + Description + + + + + + + + + + txid_current + + txid_current () + bigint + + + See pg_current_xact_id(). + + + + + + + txid_current_if_assigned + + txid_current_if_assigned () + bigint + + + See pg_current_xact_id_if_assigned(). + + + + + + + txid_current_snapshot + + txid_current_snapshot () + txid_snapshot + + + See pg_current_snapshot(). + + + + + + + txid_snapshot_xip + + txid_snapshot_xip ( txid_snapshot ) + setof bigint + + + See pg_snapshot_xip(). + + + + + + + txid_snapshot_xmax + + txid_snapshot_xmax ( txid_snapshot ) + bigint + + + See pg_snapshot_xmax(). + + + + + + + txid_snapshot_xmin + + txid_snapshot_xmin ( txid_snapshot ) + bigint + + + See pg_snapshot_xmin(). + + + + + + + txid_visible_in_snapshot + + txid_visible_in_snapshot ( bigint, txid_snapshot ) + boolean + + + See pg_visible_in_snapshot(). + + + + + + + txid_status + + txid_status ( bigint ) + text + + + See pg_xact_status(). + + + + +
+ +
+ + + Committed Transaction Information Functions + + + The functions shown in + provide information about when past transactions were committed. + They only provide useful data when the + configuration option is + enabled, and only for transactions that were committed after it was + enabled. Commit timestamp information is routinely removed during + vacuum. + + + + Committed Transaction Information Functions + + + + + Function + + + Description + + + + + + + + + pg_xact_commit_timestamp + + pg_xact_commit_timestamp ( xid ) + timestamp with time zone + + + Returns the commit timestamp of a transaction. + + + + + + + pg_xact_commit_timestamp_origin + + pg_xact_commit_timestamp_origin ( xid ) + record + ( timestamp timestamp with time zone, + roident oid) + + + Returns the commit timestamp and replication origin of a transaction. + + + + + + + pg_last_committed_xact + + pg_last_committed_xact () + record + ( xid xid, + timestamp timestamp with time zone, + roident oid ) + + + Returns the transaction ID, commit timestamp and replication origin + of the latest committed transaction. + + + + +
+ +
+ + + Control Data Functions + + + The functions shown in + print information initialized during initdb, such + as the catalog version. They also show information about write-ahead + logging and checkpoint processing. This information is cluster-wide, + not specific to any one database. These functions provide most of the same + information, from the same source, as the + application. + + + + Control Data Functions + + + + + Function + + + Description + + + + + + + + + pg_control_checkpoint + + pg_control_checkpoint () + record + + + Returns information about current checkpoint state, as shown in + . + + + + + + + pg_control_system + + pg_control_system () + record + + + Returns information about current control file state, as shown in + . + + + + + + + pg_control_init + + pg_control_init () + record + + + Returns information about cluster initialization state, as shown in + . + + + + + + + pg_control_recovery + + pg_control_recovery () + record + + + Returns information about recovery state, as shown in + . + + + + +
+ + + <function>pg_control_checkpoint</function> Output Columns + + + + Column Name + Data Type + + + + + + + checkpoint_lsn + pg_lsn + + + + redo_lsn + pg_lsn + + + + redo_wal_file + text + + + + timeline_id + integer + + + + prev_timeline_id + integer + + + + full_page_writes + boolean + + + + next_xid + text + + + + next_oid + oid + + + + next_multixact_id + xid + + + + next_multi_offset + xid + + + + oldest_xid + xid + + + + oldest_xid_dbid + oid + + + + oldest_active_xid + xid + + + + oldest_multi_xid + xid + + + + oldest_multi_dbid + oid + + + + oldest_commit_ts_xid + xid + + + + newest_commit_ts_xid + xid + + + + checkpoint_time + timestamp with time zone + + + + +
+ + + <function>pg_control_system</function> Output Columns + + + + Column Name + Data Type + + + + + + + pg_control_version + integer + + + + catalog_version_no + integer + + + + system_identifier + bigint + + + + pg_control_last_modified + timestamp with time zone + + + + +
+ + + <function>pg_control_init</function> Output Columns + + + + Column Name + Data Type + + + + + + + max_data_alignment + integer + + + + database_block_size + integer + + + + blocks_per_segment + integer + + + + wal_block_size + integer + + + + bytes_per_wal_segment + integer + + + + max_identifier_length + integer + + + + max_index_columns + integer + + + + max_toast_chunk_size + integer + + + + large_object_chunk_size + integer + + + + float8_pass_by_value + boolean + + + + data_page_checksum_version + integer + + + + default_char_signedness + boolean + + + + +
+ + + <function>pg_control_recovery</function> Output Columns + + + + Column Name + Data Type + + + + + + + min_recovery_end_lsn + pg_lsn + + + + min_recovery_end_timeline + integer + + + + backup_start_lsn + pg_lsn + + + + backup_end_lsn + pg_lsn + + + + end_of_backup_record_required + boolean + + + + +
+ +
+ + + Version Information Functions + + + The functions shown in + print version information. + + + + Version Information Functions + + + + + Function + + + Description + + + + + + + + + version + + version () + text + + + Returns a string describing the PostgreSQL + server's version. You can also get this information from + , or for a machine-readable + version use . Software + developers should use server_version_num (available + since 8.2) or instead of + parsing the text version. + + + + + + + unicode_version + + unicode_version () + text + + + Returns a string representing the version of Unicode used by + PostgreSQL. + + + + + + icu_unicode_version + + icu_unicode_version () + text + + + Returns a string representing the version of Unicode used by ICU, if + the server was built with ICU support; otherwise returns + NULL + + + +
+ +
+ + + WAL Summarization Information Functions + + + The functions shown in + print information about the status of WAL summarization. + See . + + + + WAL Summarization Information Functions + + + + + Function + + + Description + + + + + + + + + pg_available_wal_summaries + + pg_available_wal_summaries () + setof record + ( tli bigint, + start_lsn pg_lsn, + end_lsn pg_lsn ) + + + Returns information about the WAL summary files present in the + data directory, under pg_wal/summaries. + One row will be returned per WAL summary file. Each file summarizes + WAL on the indicated TLI within the indicated LSN range. This function + might be useful to determine whether enough WAL summaries are present + on the server to take an incremental backup based on some prior + backup whose start LSN is known. + + + + + + + pg_wal_summary_contents + + pg_wal_summary_contents ( tli bigint, start_lsn pg_lsn, end_lsn pg_lsn ) + setof record + ( relfilenode oid, + reltablespace oid, + reldatabase oid, + relforknumber smallint, + relblocknumber bigint, + is_limit_block boolean ) + + + Returns one information about the contents of a single WAL summary file + identified by TLI and starting and ending LSNs. Each row with + is_limit_block false indicates that the block + identified by the remaining output columns was modified by at least + one WAL record within the range of records summarized by this file. + Each row with is_limit_block true indicates either + that (a) the relation fork was truncated to the length given by + relblocknumber within the relevant range of WAL + records or (b) that the relation fork was created or dropped within + the relevant range of WAL records; in such cases, + relblocknumber will be zero. + + + + + + + pg_get_wal_summarizer_state + + pg_get_wal_summarizer_state () + record + ( summarized_tli bigint, + summarized_lsn pg_lsn, + pending_lsn pg_lsn, + summarizer_pid int ) + + + Returns information about the progress of the WAL summarizer. If the + WAL summarizer has never run since the instance was started, then + summarized_tli and summarized_lsn + will be 0 and 0/00000000 respectively; + otherwise, they will be the TLI and ending LSN of the last WAL summary + file written to disk. If the WAL summarizer is currently running, + pending_lsn will be the ending LSN of the last + record that it has consumed, which must always be greater than or + equal to summarized_lsn; if the WAL summarizer is + not running, it will be equal to summarized_lsn. + summarizer_pid is the PID of the WAL summarizer + process, if it is running, and otherwise NULL. + + + As a special exception, the WAL summarizer will refuse to generate + WAL summary files if run on WAL generated under + wal_level=minimal, since such summaries would be + unsafe to use as the basis for an incremental backup. In this case, + the fields above will continue to advance as if summaries were being + generated, but nothing will be written to disk. Once the summarizer + reaches WAL generated while wal_level was set + to replica or higher, it will resume writing + summaries to disk. + + + + +
+ +
+ +
diff --git a/doc/src/sgml/func/func-json.sgml b/doc/src/sgml/func/func-json.sgml new file mode 100644 index 0000000000000..91f98a345d445 --- /dev/null +++ b/doc/src/sgml/func/func-json.sgml @@ -0,0 +1,3945 @@ + + JSON Functions and Operators + + + JSON + functions and operators + + + SQL/JSON + functions and expressions + + + + This section describes: + + + + + functions and operators for processing and creating JSON data + + + + + the SQL/JSON path language + + + + + the SQL/JSON query functions + + + + + + + To provide native support for JSON data types within the SQL environment, + PostgreSQL implements the + SQL/JSON data model. + This model comprises sequences of items. Each item can hold SQL scalar + values, with an additional SQL/JSON null value, and composite data structures + that use JSON arrays and objects. The model is a formalization of the implied + data model in the JSON specification + RFC 7159. + + + + SQL/JSON allows you to handle JSON data alongside regular SQL data, + with transaction support, including: + + + + + Uploading JSON data into the database and storing it in + regular SQL columns as character or binary strings. + + + + + Generating JSON objects and arrays from relational data. + + + + + Querying JSON data using SQL/JSON query functions and + SQL/JSON path language expressions. + + + + + + + To learn more about the SQL/JSON standard, see + . For details on JSON types + supported in PostgreSQL, + see . + + + + Processing and Creating JSON Data + + + shows the operators that + are available for use with JSON data types (see ). + In addition, the usual comparison operators shown in are available for + jsonb, though not for json. The comparison + operators follow the ordering rules for B-tree operations outlined in + . + See also for the aggregate + function json_agg which aggregates record + values as JSON, the aggregate function + json_object_agg which aggregates pairs of values + into a JSON object, and their jsonb equivalents, + jsonb_agg and jsonb_object_agg. + + + + <type>json</type> and <type>jsonb</type> Operators + + + + + Operator + + + Description + + + Example(s) + + + + + + + + json -> integer + json + + + jsonb -> integer + jsonb + + + Extracts n'th element of JSON array + (array elements are indexed from zero, but negative integers count + from the end). + + + '[{"a":"foo"},{"b":"bar"},{"c":"baz"}]'::json -> 2 + {"c":"baz"} + + + '[{"a":"foo"},{"b":"bar"},{"c":"baz"}]'::json -> -3 + {"a":"foo"} + + + + + + json -> text + json + + + jsonb -> text + jsonb + + + Extracts JSON object field with the given key. + + + '{"a": {"b":"foo"}}'::json -> 'a' + {"b":"foo"} + + + + + + json ->> integer + text + + + jsonb ->> integer + text + + + Extracts n'th element of JSON array, + as text. + + + '[1,2,3]'::json ->> 2 + 3 + + + + + + json ->> text + text + + + jsonb ->> text + text + + + Extracts JSON object field with the given key, as text. + + + '{"a":1,"b":2}'::json ->> 'b' + 2 + + + + + + json #> text[] + json + + + jsonb #> text[] + jsonb + + + Extracts JSON sub-object at the specified path, where path elements + can be either field keys or array indexes. + + + '{"a": {"b": ["foo","bar"]}}'::json #> '{a,b,1}' + "bar" + + + + + + json #>> text[] + text + + + jsonb #>> text[] + text + + + Extracts JSON sub-object at the specified path as text. + + + '{"a": {"b": ["foo","bar"]}}'::json #>> '{a,b,1}' + bar + + + + +
+ + + + The field/element/path extraction operators return NULL, rather than + failing, if the JSON input does not have the right structure to match + the request; for example if no such key or array element exists. + + + + + Some further operators exist only for jsonb, as shown + in . + + describes how these operators can be used to effectively search indexed + jsonb data. + + + + Additional <type>jsonb</type> Operators + + + + + Operator + + + Description + + + Example(s) + + + + + + + + jsonb @> jsonb + boolean + + + Does the first JSON value contain the second? + (See for details about containment.) + + + '{"a":1, "b":2}'::jsonb @> '{"b":2}'::jsonb + t + + + + + + jsonb <@ jsonb + boolean + + + Is the first JSON value contained in the second? + + + '{"b":2}'::jsonb <@ '{"a":1, "b":2}'::jsonb + t + + + + + + jsonb ? text + boolean + + + Does the text string exist as a top-level key or array element within + the JSON value? + + + '{"a":1, "b":2}'::jsonb ? 'b' + t + + + '["a", "b", "c"]'::jsonb ? 'b' + t + + + + + + jsonb ?| text[] + boolean + + + Do any of the strings in the text array exist as top-level keys or + array elements? + + + '{"a":1, "b":2, "c":3}'::jsonb ?| array['b', 'd'] + t + + + + + + jsonb ?& text[] + boolean + + + Do all of the strings in the text array exist as top-level keys or + array elements? + + + '["a", "b", "c"]'::jsonb ?& array['a', 'b'] + t + + + + + + jsonb || jsonb + jsonb + + + Concatenates two jsonb values. + Concatenating two arrays generates an array containing all the + elements of each input. Concatenating two objects generates an + object containing the union of their + keys, taking the second object's value when there are duplicate keys. + All other cases are treated by converting a non-array input into a + single-element array, and then proceeding as for two arrays. + Does not operate recursively: only the top-level array or object + structure is merged. + + + '["a", "b"]'::jsonb || '["a", "d"]'::jsonb + ["a", "b", "a", "d"] + + + '{"a": "b"}'::jsonb || '{"c": "d"}'::jsonb + {"a": "b", "c": "d"} + + + '[1, 2]'::jsonb || '3'::jsonb + [1, 2, 3] + + + '{"a": "b"}'::jsonb || '42'::jsonb + [{"a": "b"}, 42] + + + To append an array to another array as a single entry, wrap it + in an additional layer of array, for example: + + + '[1, 2]'::jsonb || jsonb_build_array('[3, 4]'::jsonb) + [1, 2, [3, 4]] + + + + + + jsonb - text + jsonb + + + Deletes a key (and its value) from a JSON object, or matching string + value(s) from a JSON array. + + + '{"a": "b", "c": "d"}'::jsonb - 'a' + {"c": "d"} + + + '["a", "b", "c", "b"]'::jsonb - 'b' + ["a", "c"] + + + + + + jsonb - text[] + jsonb + + + Deletes all matching keys or array elements from the left operand. + + + '{"a": "b", "c": "d"}'::jsonb - '{a,c}'::text[] + {} + + + + + + jsonb - integer + jsonb + + + Deletes the array element with specified index (negative + integers count from the end). Throws an error if JSON value + is not an array. + + + '["a", "b"]'::jsonb - 1 + ["a"] + + + + + + jsonb #- text[] + jsonb + + + Deletes the field or array element at the specified path, where path + elements can be either field keys or array indexes. + + + '["a", {"b":1}]'::jsonb #- '{1,b}' + ["a", {}] + + + + + + jsonb @? jsonpath + boolean + + + Does JSON path return any item for the specified JSON value? + (This is useful only with SQL-standard JSON path expressions, not + predicate check + expressions, since those always return a value.) + + + '{"a":[1,2,3,4,5]}'::jsonb @? '$.a[*] ? (@ > 2)' + t + + + + + + jsonb @@ jsonpath + boolean + + + Returns the result of a JSON path predicate check for the + specified JSON value. + (This is useful only + with predicate + check expressions, not SQL-standard JSON path expressions, + since it will return NULL if the path result is + not a single boolean value.) + + + '{"a":[1,2,3,4,5]}'::jsonb @@ '$.a[*] > 2' + t + + + + +
+ + + + The jsonpath operators @? + and @@ suppress the following errors: missing object + field or array element, unexpected JSON item type, datetime and numeric + errors. The jsonpath-related functions described below can + also be told to suppress these types of errors. This behavior might be + helpful when searching JSON document collections of varying structure. + + + + + shows the functions that are + available for constructing json and jsonb values. + Some functions in this table have a RETURNING clause, + which specifies the data type returned. It must be one of json, + jsonb, bytea, a character string type (text, + char, or varchar), or a type + that can be cast to json. + By default, the json type is returned. + + + + JSON Creation Functions + + + + + Function + + + Description + + + Example(s) + + + + + + + + + to_json + + to_json ( anyelement ) + json + + + + to_jsonb + + to_jsonb ( anyelement ) + jsonb + + + Converts any SQL value to json or jsonb. + Arrays and composites are converted recursively to arrays and + objects (multidimensional arrays become arrays of arrays in JSON). + Otherwise, if there is a cast from the SQL data type + to json, the cast function will be used to perform the + conversion; + + For example, the extension has a cast + from hstore to json, so that + hstore values converted via the JSON creation functions + will be represented as JSON objects, not as primitive string values. + + + otherwise, a scalar JSON value is produced. For any scalar other than + a number, a Boolean, or a null value, the text representation will be + used, with escaping as necessary to make it a valid JSON string value. + + + to_json('Fred said "Hi."'::text) + "Fred said \"Hi.\"" + + + to_jsonb(row(42, 'Fred said "Hi."'::text)) + {"f1": 42, "f2": "Fred said \"Hi.\""} + + + + + + + array_to_json + + array_to_json ( anyarray , boolean ) + json + + + Converts an SQL array to a JSON array. The behavior is the same + as to_json except that line feeds will be added + between top-level array elements if the optional boolean parameter is + true. + + + array_to_json('{{1,5},{99,100}}'::int[]) + [[1,5],[99,100]] + + + + + + + json_array + json_array ( + { value_expression FORMAT JSON } , ... + { NULL | ABSENT } ON NULL + RETURNING data_type FORMAT JSON ENCODING UTF8 ) + + + json_array ( + query_expression + RETURNING data_type FORMAT JSON ENCODING UTF8 ) + + + Constructs a JSON array from either a series of + value_expression parameters or from the results + of query_expression, + which must be a SELECT query returning a single column. If + ABSENT ON NULL is specified, NULL values are ignored. + This is always the case if a + query_expression is used. + + + json_array(1,true,json '{"a":null}') + [1, true, {"a":null}] + + + json_array(SELECT * FROM (VALUES(1),(2)) t) + [1, 2] + + + + + + + row_to_json + + row_to_json ( record , boolean ) + json + + + Converts an SQL composite value to a JSON object. The behavior is the + same as to_json except that line feeds will be + added between top-level elements if the optional boolean parameter is + true. + + + row_to_json(row(1,'foo')) + {"f1":1,"f2":"foo"} + + + + + + + json_build_array + + json_build_array ( VARIADIC "any" ) + json + + + + jsonb_build_array + + jsonb_build_array ( VARIADIC "any" ) + jsonb + + + Builds a possibly-heterogeneously-typed JSON array out of a variadic + argument list. Each argument is converted as + per to_json or to_jsonb. + + + json_build_array(1, 2, 'foo', 4, 5) + [1, 2, "foo", 4, 5] + + + + + + + json_build_object + + json_build_object ( VARIADIC "any" ) + json + + + + jsonb_build_object + + jsonb_build_object ( VARIADIC "any" ) + jsonb + + + Builds a JSON object out of a variadic argument list. By convention, + the argument list consists of alternating keys and values. Key + arguments are coerced to text; value arguments are converted as + per to_json or to_jsonb. + + + json_build_object('foo', 1, 2, row(3,'bar')) + {"foo" : 1, "2" : {"f1":3,"f2":"bar"}} + + + + + + json_object + json_object ( + { key_expression { VALUE | ':' } + value_expression FORMAT JSON ENCODING UTF8 }, ... + { NULL | ABSENT } ON NULL + { WITH | WITHOUT } UNIQUE KEYS + RETURNING data_type FORMAT JSON ENCODING UTF8 ) + + + Constructs a JSON object of all the key/value pairs given, + or an empty object if none are given. + key_expression is a scalar expression + defining the JSON key, which is + converted to the text type. + It cannot be NULL nor can it + belong to a type that has a cast to the json type. + If WITH UNIQUE KEYS is specified, there must not + be any duplicate key_expression. + Any pair for which the value_expression + evaluates to NULL is omitted from the output + if ABSENT ON NULL is specified; + if NULL ON NULL is specified or the clause + omitted, the key is included with value NULL. + + + json_object('code' VALUE 'P123', 'title': 'Jaws') + {"code" : "P123", "title" : "Jaws"} + + + + + + + json_object + + json_object ( text[] ) + json + + + + jsonb_object + + jsonb_object ( text[] ) + jsonb + + + Builds a JSON object out of a text array. The array must have either + exactly one dimension with an even number of members, in which case + they are taken as alternating key/value pairs, or two dimensions + such that each inner array has exactly two elements, which + are taken as a key/value pair. All values are converted to JSON + strings. + + + json_object('{a, 1, b, "def", c, 3.5}') + {"a" : "1", "b" : "def", "c" : "3.5"} + + json_object('{{a, 1}, {b, "def"}, {c, 3.5}}') + {"a" : "1", "b" : "def", "c" : "3.5"} + + + + + + json_object ( keys text[], values text[] ) + json + + + jsonb_object ( keys text[], values text[] ) + jsonb + + + This form of json_object takes keys and values + pairwise from separate text arrays. Otherwise it is identical to + the one-argument form. + + + json_object('{a,b}', '{1,2}') + {"a": "1", "b": "2"} + + + + + + json constructor + json ( + expression + FORMAT JSON ENCODING UTF8 + { WITH | WITHOUT } UNIQUE KEYS ) + json + + + Converts a given expression specified as text or + bytea string (in UTF8 encoding) into a JSON + value. If expression is NULL, an + SQL null value is returned. + If WITH UNIQUE is specified, the + expression must not contain any duplicate + object keys. + + + json('{"a":123, "b":[true,"foo"], "a":"bar"}') + {"a":123, "b":[true,"foo"], "a":"bar"} + + + + + + + json_scalar + json_scalar ( expression ) + + + Converts a given SQL scalar value into a JSON scalar value. + If the input is NULL, an SQL null is returned. If + the input is number or a boolean value, a corresponding JSON number + or boolean value is returned. For any other value, a JSON string is + returned. + + + json_scalar(123.45) + 123.45 + + + json_scalar(CURRENT_TIMESTAMP) + "2022-05-10T10:51:04.62128-04:00" + + + + + + json_serialize ( + expression FORMAT JSON ENCODING UTF8 + RETURNING data_type FORMAT JSON ENCODING UTF8 ) + + + Converts an SQL/JSON expression into a character or binary string. The + expression can be of any JSON type, any + character string type, or bytea in UTF8 encoding. + The returned type used in RETURNING can be any + character string type or bytea. The default is + text. + + + json_serialize('{ "a" : 1 } ' RETURNING bytea) + \x7b20226122203a2031207d20 + + + + +
+ + + details SQL/JSON + facilities for testing JSON. + + + + SQL/JSON Testing Functions + + + + + Function signature + + + Description + + + Example(s) + + + + + + + IS JSON + expression IS NOT JSON + { VALUE | SCALAR | ARRAY | OBJECT } + { WITH | WITHOUT } UNIQUE KEYS + + + This predicate tests whether expression can be + parsed as JSON, possibly of a specified type. + If SCALAR or ARRAY or + OBJECT is specified, the + test is whether or not the JSON is of that particular type. If + WITH UNIQUE KEYS is specified, then any object in the + expression is also tested to see if it + has duplicate keys. + + + +SELECT js, + js IS JSON "json?", + js IS JSON SCALAR "scalar?", + js IS JSON OBJECT "object?", + js IS JSON ARRAY "array?" +FROM (VALUES + ('123'), ('"abc"'), ('{"a": "b"}'), ('[1,2]'),('abc')) foo(js); + js | json? | scalar? | object? | array? +------------+-------+---------+---------+-------- + 123 | t | t | f | f + "abc" | t | t | f | f + {"a": "b"} | t | f | t | f + [1,2] | t | f | f | t + abc | f | f | f | f + + + + +SELECT js, + js IS JSON OBJECT "object?", + js IS JSON ARRAY "array?", + js IS JSON ARRAY WITH UNIQUE KEYS "array w. UK?", + js IS JSON ARRAY WITHOUT UNIQUE KEYS "array w/o UK?" +FROM (VALUES ('[{"a":"1"}, + {"b":"2","b":"3"}]')) foo(js); +-[ RECORD 1 ]-+-------------------- +js | [{"a":"1"}, + + | {"b":"2","b":"3"}] +object? | f +array? | t +array w. UK? | f +array w/o UK? | t + + + + + +
+ + + shows the functions that + are available for processing json and jsonb values. + + + + JSON Processing Functions + + + + + Function + + + Description + + + Example(s) + + + + + + + + + json_array_elements + + json_array_elements ( json ) + setof json + + + + jsonb_array_elements + + jsonb_array_elements ( jsonb ) + setof jsonb + + + Expands the top-level JSON array into a set of JSON values. + + + select * from json_array_elements('[1,true, [2,false]]') + + + value +----------- + 1 + true + [2,false] + + + + + + + + json_array_elements_text + + json_array_elements_text ( json ) + setof text + + + + jsonb_array_elements_text + + jsonb_array_elements_text ( jsonb ) + setof text + + + Expands the top-level JSON array into a set of text values. + + + select * from json_array_elements_text('["foo", "bar"]') + + + value +----------- + foo + bar + + + + + + + + json_array_length + + json_array_length ( json ) + integer + + + + jsonb_array_length + + jsonb_array_length ( jsonb ) + integer + + + Returns the number of elements in the top-level JSON array. + + + json_array_length('[1,2,3,{"f1":1,"f2":[5,6]},4]') + 5 + + + jsonb_array_length('[]') + 0 + + + + + + + json_each + + json_each ( json ) + setof record + ( key text, + value json ) + + + + jsonb_each + + jsonb_each ( jsonb ) + setof record + ( key text, + value jsonb ) + + + Expands the top-level JSON object into a set of key/value pairs. + + + select * from json_each('{"a":"foo", "b":"bar"}') + + + key | value +-----+------- + a | "foo" + b | "bar" + + + + + + + + json_each_text + + json_each_text ( json ) + setof record + ( key text, + value text ) + + + + jsonb_each_text + + jsonb_each_text ( jsonb ) + setof record + ( key text, + value text ) + + + Expands the top-level JSON object into a set of key/value pairs. + The returned values will be of + type text. + + + select * from json_each_text('{"a":"foo", "b":"bar"}') + + + key | value +-----+------- + a | foo + b | bar + + + + + + + + json_extract_path + + json_extract_path ( from_json json, VARIADIC path_elems text[] ) + json + + + + jsonb_extract_path + + jsonb_extract_path ( from_json jsonb, VARIADIC path_elems text[] ) + jsonb + + + Extracts JSON sub-object at the specified path. + (This is functionally equivalent to the #> + operator, but writing the path out as a variadic list can be more + convenient in some cases.) + + + json_extract_path('{"f2":{"f3":1},"f4":{"f5":99,"f6":"foo"}}', 'f4', 'f6') + "foo" + + + + + + + json_extract_path_text + + json_extract_path_text ( from_json json, VARIADIC path_elems text[] ) + text + + + + jsonb_extract_path_text + + jsonb_extract_path_text ( from_json jsonb, VARIADIC path_elems text[] ) + text + + + Extracts JSON sub-object at the specified path as text. + (This is functionally equivalent to the #>> + operator.) + + + json_extract_path_text('{"f2":{"f3":1},"f4":{"f5":99,"f6":"foo"}}', 'f4', 'f6') + foo + + + + + + + json_object_keys + + json_object_keys ( json ) + setof text + + + + jsonb_object_keys + + jsonb_object_keys ( jsonb ) + setof text + + + Returns the set of keys in the top-level JSON object. + + + select * from json_object_keys('{"f1":"abc","f2":{"f3":"a", "f4":"b"}}') + + + json_object_keys +------------------ + f1 + f2 + + + + + + + + json_populate_record + + json_populate_record ( base anyelement, from_json json ) + anyelement + + + + jsonb_populate_record + + jsonb_populate_record ( base anyelement, from_json jsonb ) + anyelement + + + Expands the top-level JSON object to a row having the composite type + of the base argument. The JSON object + is scanned for fields whose names match column names of the output row + type, and their values are inserted into those columns of the output. + (Fields that do not correspond to any output column name are ignored.) + In typical use, the value of base is just + NULL, which means that any output columns that do + not match any object field will be filled with nulls. However, + if base isn't NULL then + the values it contains will be used for unmatched columns. + + + To convert a JSON value to the SQL type of an output column, the + following rules are applied in sequence: + + + + A JSON null value is converted to an SQL null in all cases. + + + + + If the output column is of type json + or jsonb, the JSON value is just reproduced exactly. + + + + + If the output column is a composite (row) type, and the JSON value + is a JSON object, the fields of the object are converted to columns + of the output row type by recursive application of these rules. + + + + + Likewise, if the output column is an array type and the JSON value + is a JSON array, the elements of the JSON array are converted to + elements of the output array by recursive application of these + rules. + + + + + Otherwise, if the JSON value is a string, the contents of the + string are fed to the input conversion function for the column's + data type. + + + + + Otherwise, the ordinary text representation of the JSON value is + fed to the input conversion function for the column's data type. + + + + + + While the example below uses a constant JSON value, typical use would + be to reference a json or jsonb column + laterally from another table in the query's FROM + clause. Writing json_populate_record in + the FROM clause is good practice, since all of the + extracted columns are available for use without duplicate function + calls. + + + create type subrowtype as (d int, e text); + create type myrowtype as (a int, b text[], c subrowtype); + + + select * from json_populate_record(null::myrowtype, + '{"a": 1, "b": ["2", "a b"], "c": {"d": 4, "e": "a b c"}, "x": "foo"}') + + + a | b | c +---+-----------+------------- + 1 | {2,"a b"} | (4,"a b c") + + + + + + + + jsonb_populate_record_valid + + jsonb_populate_record_valid ( base anyelement, from_json json ) + boolean + + + Function for testing jsonb_populate_record. Returns + true if the input jsonb_populate_record + would finish without an error for the given input JSON object; that is, it's + valid input, false otherwise. + + + create type jsb_char2 as (a char(2)); + + + select jsonb_populate_record_valid(NULL::jsb_char2, '{"a": "aaa"}'); + + + jsonb_populate_record_valid +----------------------------- + f +(1 row) + + + select * from jsonb_populate_record(NULL::jsb_char2, '{"a": "aaa"}') q; + + +ERROR: value too long for type character(2) + + select jsonb_populate_record_valid(NULL::jsb_char2, '{"a": "aa"}'); + + + jsonb_populate_record_valid +----------------------------- + t +(1 row) + + + select * from jsonb_populate_record(NULL::jsb_char2, '{"a": "aa"}') q; + + + a +---- + aa +(1 row) + + + + + + + + json_populate_recordset + + json_populate_recordset ( base anyelement, from_json json ) + setof anyelement + + + + jsonb_populate_recordset + + jsonb_populate_recordset ( base anyelement, from_json jsonb ) + setof anyelement + + + Expands the top-level JSON array of objects to a set of rows having + the composite type of the base argument. + Each element of the JSON array is processed as described above + for json[b]_populate_record. + + + create type twoints as (a int, b int); + + + select * from json_populate_recordset(null::twoints, '[{"a":1,"b":2}, {"a":3,"b":4}]') + + + a | b +---+--- + 1 | 2 + 3 | 4 + + + + + + + + json_to_record + + json_to_record ( json ) + record + + + + jsonb_to_record + + jsonb_to_record ( jsonb ) + record + + + Expands the top-level JSON object to a row having the composite type + defined by an AS clause. (As with all functions + returning record, the calling query must explicitly + define the structure of the record with an AS + clause.) The output record is filled from fields of the JSON object, + in the same way as described above + for json[b]_populate_record. Since there is no + input record value, unmatched columns are always filled with nulls. + + + create type myrowtype as (a int, b text); + + + select * from json_to_record('{"a":1,"b":[1,2,3],"c":[1,2,3],"e":"bar","r": {"a": 123, "b": "a b c"}}') as x(a int, b text, c int[], d text, r myrowtype) + + + a | b | c | d | r +---+---------+---------+---+--------------- + 1 | [1,2,3] | {1,2,3} | | (123,"a b c") + + + + + + + + json_to_recordset + + json_to_recordset ( json ) + setof record + + + + jsonb_to_recordset + + jsonb_to_recordset ( jsonb ) + setof record + + + Expands the top-level JSON array of objects to a set of rows having + the composite type defined by an AS clause. (As + with all functions returning record, the calling query + must explicitly define the structure of the record with + an AS clause.) Each element of the JSON array is + processed as described above + for json[b]_populate_record. + + + select * from json_to_recordset('[{"a":1,"b":"foo"}, {"a":"2","c":"bar"}]') as x(a int, b text) + + + a | b +---+----- + 1 | foo + 2 | + + + + + + + + jsonb_set + + jsonb_set ( target jsonb, path text[], new_value jsonb , create_if_missing boolean ) + jsonb + + + Returns target + with the item designated by path + replaced by new_value, or with + new_value added if + create_if_missing is true (which is the + default) and the item designated by path + does not exist. + All earlier steps in the path must exist, or + the target is returned unchanged. + As with the path oriented operators, negative integers that + appear in the path count from the end + of JSON arrays. + If the last path step is an array index that is out of range, + and create_if_missing is true, the new + value is added at the beginning of the array if the index is negative, + or at the end of the array if it is positive. + + + jsonb_set('[{"f1":1,"f2":null},2,null,3]', '{0,f1}', '[2,3,4]', false) + [{"f1": [2, 3, 4], "f2": null}, 2, null, 3] + + + jsonb_set('[{"f1":1,"f2":null},2]', '{0,f3}', '[2,3,4]') + [{"f1": 1, "f2": null, "f3": [2, 3, 4]}, 2] + + + + + + + jsonb_set_lax + + jsonb_set_lax ( target jsonb, path text[], new_value jsonb , create_if_missing boolean , null_value_treatment text ) + jsonb + + + If new_value is not NULL, + behaves identically to jsonb_set. Otherwise behaves + according to the value + of null_value_treatment which must be one + of 'raise_exception', + 'use_json_null', 'delete_key', or + 'return_target'. The default is + 'use_json_null'. + + + jsonb_set_lax('[{"f1":1,"f2":null},2,null,3]', '{0,f1}', null) + [{"f1": null, "f2": null}, 2, null, 3] + + + jsonb_set_lax('[{"f1":99,"f2":null},2]', '{0,f3}', null, true, 'return_target') + [{"f1": 99, "f2": null}, 2] + + + + + + + jsonb_insert + + jsonb_insert ( target jsonb, path text[], new_value jsonb , insert_after boolean ) + jsonb + + + Returns target + with new_value inserted. If the item + designated by the path is an array + element, new_value will be inserted before + that item if insert_after is false (which + is the default), or after it + if insert_after is true. If the item + designated by the path is an object + field, new_value will be inserted only if + the object does not already contain that key. + All earlier steps in the path must exist, or + the target is returned unchanged. + As with the path oriented operators, negative integers that + appear in the path count from the end + of JSON arrays. + If the last path step is an array index that is out of range, the new + value is added at the beginning of the array if the index is negative, + or at the end of the array if it is positive. + + + jsonb_insert('{"a": [0,1,2]}', '{a, 1}', '"new_value"') + {"a": [0, "new_value", 1, 2]} + + + jsonb_insert('{"a": [0,1,2]}', '{a, 1}', '"new_value"', true) + {"a": [0, 1, "new_value", 2]} + + + + + + + json_strip_nulls + + json_strip_nulls ( target json ,strip_in_arrays boolean ) + json + + + + jsonb_strip_nulls + + jsonb_strip_nulls ( target jsonb ,strip_in_arrays boolean ) + jsonb + + + Deletes all object fields that have null values from the given JSON + value, recursively. + If strip_in_arrays is true (the default is false), + null array elements are also stripped. + Otherwise they are not stripped. Bare null values are never stripped. + + + json_strip_nulls('[{"f1":1, "f2":null}, 2, null, 3]') + [{"f1":1},2,null,3] + + + jsonb_strip_nulls('[1,2,null,3,4]', true); + [1,2,3,4] + + + + + + + + jsonb_path_exists + + jsonb_path_exists ( target jsonb, path jsonpath , vars jsonb , silent boolean ) + boolean + + + Checks whether the JSON path returns any item for the specified JSON + value. + (This is useful only with SQL-standard JSON path expressions, not + predicate check + expressions, since those always return a value.) + If the vars argument is specified, it must + be a JSON object, and its fields provide named values to be + substituted into the jsonpath expression. + If the silent argument is specified and + is true, the function suppresses the same errors + as the @? and @@ operators do. + + + jsonb_path_exists('{"a":[1,2,3,4,5]}', '$.a[*] ? (@ >= $min && @ <= $max)', '{"min":2, "max":4}') + t + + + + + + + jsonb_path_match + + jsonb_path_match ( target jsonb, path jsonpath , vars jsonb , silent boolean ) + boolean + + + Returns the SQL boolean result of a JSON path predicate check + for the specified JSON value. + (This is useful only + with predicate + check expressions, not SQL-standard JSON path expressions, + since it will either fail or return NULL if the + path result is not a single boolean value.) + The optional vars + and silent arguments act the same as + for jsonb_path_exists. + + + jsonb_path_match('{"a":[1,2,3,4,5]}', 'exists($.a[*] ? (@ >= $min && @ <= $max))', '{"min":2, "max":4}') + t + + + + + + + jsonb_path_query + + jsonb_path_query ( target jsonb, path jsonpath , vars jsonb , silent boolean ) + setof jsonb + + + Returns all JSON items returned by the JSON path for the specified + JSON value. + For SQL-standard JSON path expressions it returns the JSON + values selected from target. + For predicate + check expressions it returns the result of the predicate + check: true, false, + or null. + The optional vars + and silent arguments act the same as + for jsonb_path_exists. + + + select * from jsonb_path_query('{"a":[1,2,3,4,5]}', '$.a[*] ? (@ >= $min && @ <= $max)', '{"min":2, "max":4}') + + + jsonb_path_query +------------------ + 2 + 3 + 4 + + + + + + + + jsonb_path_query_array + + jsonb_path_query_array ( target jsonb, path jsonpath , vars jsonb , silent boolean ) + jsonb + + + Returns all JSON items returned by the JSON path for the specified + JSON value, as a JSON array. + The parameters are the same as + for jsonb_path_query. + + + jsonb_path_query_array('{"a":[1,2,3,4,5]}', '$.a[*] ? (@ >= $min && @ <= $max)', '{"min":2, "max":4}') + [2, 3, 4] + + + + + + + jsonb_path_query_first + + jsonb_path_query_first ( target jsonb, path jsonpath , vars jsonb , silent boolean ) + jsonb + + + Returns the first JSON item returned by the JSON path for the + specified JSON value, or NULL if there are no + results. + The parameters are the same as + for jsonb_path_query. + + + jsonb_path_query_first('{"a":[1,2,3,4,5]}', '$.a[*] ? (@ >= $min && @ <= $max)', '{"min":2, "max":4}') + 2 + + + + + + + jsonb_path_exists_tz + + jsonb_path_exists_tz ( target jsonb, path jsonpath , vars jsonb , silent boolean ) + boolean + + + + jsonb_path_match_tz + + jsonb_path_match_tz ( target jsonb, path jsonpath , vars jsonb , silent boolean ) + boolean + + + + jsonb_path_query_tz + + jsonb_path_query_tz ( target jsonb, path jsonpath , vars jsonb , silent boolean ) + setof jsonb + + + + jsonb_path_query_array_tz + + jsonb_path_query_array_tz ( target jsonb, path jsonpath , vars jsonb , silent boolean ) + jsonb + + + + jsonb_path_query_first_tz + + jsonb_path_query_first_tz ( target jsonb, path jsonpath , vars jsonb , silent boolean ) + jsonb + + + These functions act like their counterparts described above without + the _tz suffix, except that these functions support + comparisons of date/time values that require timezone-aware + conversions. The example below requires interpretation of the + date-only value 2015-08-02 as a timestamp with time + zone, so the result depends on the current + setting. Due to this dependency, these + functions are marked as stable, which means these functions cannot be + used in indexes. Their counterparts are immutable, and so can be used + in indexes; but they will throw errors if asked to make such + comparisons. + + + jsonb_path_exists_tz('["2015-08-01 12:00:00-05"]', '$[*] ? (@.datetime() < "2015-08-02".datetime())') + t + + + + + + + jsonb_pretty + + jsonb_pretty ( jsonb ) + text + + + Converts the given JSON value to pretty-printed, indented text. + + + jsonb_pretty('[{"f1":1,"f2":null}, 2]') + + +[ + { + "f1": 1, + "f2": null + }, + 2 +] + + + + + + + + json_typeof + + json_typeof ( json ) + text + + + + jsonb_typeof + + jsonb_typeof ( jsonb ) + text + + + Returns the type of the top-level JSON value as a text string. + Possible types are + object, array, + string, number, + boolean, and null. + (The null result should not be confused + with an SQL NULL; see the examples.) + + + json_typeof('-123.4') + number + + + json_typeof('null'::json) + null + + + json_typeof(NULL::json) IS NULL + t + + + + +
+
+ + + The SQL/JSON Path Language + + + SQL/JSON path language + + + + SQL/JSON path expressions specify item(s) to be retrieved + from a JSON value, similarly to XPath expressions used + for access to XML content. In PostgreSQL, + path expressions are implemented as the jsonpath + data type and can use any elements described in + . + + + + JSON query functions and operators + pass the provided path expression to the path engine + for evaluation. If the expression matches the queried JSON data, + the corresponding JSON item, or set of items, is returned. + If there is no match, the result will be NULL, + false, or an error, depending on the function. + Path expressions are written in the SQL/JSON path language + and can include arithmetic expressions and functions. + + + + A path expression consists of a sequence of elements allowed + by the jsonpath data type. + The path expression is normally evaluated from left to right, but + you can use parentheses to change the order of operations. + If the evaluation is successful, a sequence of JSON items is produced, + and the evaluation result is returned to the JSON query function + that completes the specified computation. + + + + To refer to the JSON value being queried (the + context item), use the $ variable + in the path expression. The first element of a path must always + be $. It can be followed by one or more + accessor operators, + which go down the JSON structure level by level to retrieve sub-items + of the context item. Each accessor operator acts on the + result(s) of the previous evaluation step, producing zero, one, or more + output items from each input item. + + + + For example, suppose you have some JSON data from a GPS tracker that you + would like to parse, such as: + +SELECT '{ + "track": { + "segments": [ + { + "location": [ 47.763, 13.4034 ], + "start time": "2018-10-14 10:05:14", + "HR": 73 + }, + { + "location": [ 47.706, 13.2635 ], + "start time": "2018-10-14 10:39:21", + "HR": 135 + } + ] + } +}' AS json \gset + + (The above example can be copied-and-pasted + into psql to set things up for the following + examples. Then psql will + expand :'json' into a suitably-quoted string + constant containing the JSON value.) + + + + To retrieve the available track segments, you need to use the + .key accessor + operator to descend through surrounding JSON objects, for example: + +=> select jsonb_path_query(:'json', '$.track.segments'); + jsonb_path_query +-----------------------------------------------------------&zwsp;-----------------------------------------------------------&zwsp;--------------------------------------------- + [{"HR": 73, "location": [47.763, 13.4034], "start time": "2018-10-14 10:05:14"}, {"HR": 135, "location": [47.706, 13.2635], "start time": "2018-10-14 10:39:21"}] + + + + + To retrieve the contents of an array, you typically use the + [*] operator. + The following example will return the location coordinates for all + the available track segments: + +=> select jsonb_path_query(:'json', '$.track.segments[*].location'); + jsonb_path_query +------------------- + [47.763, 13.4034] + [47.706, 13.2635] + + Here we started with the whole JSON input value ($), + then the .track accessor selected the JSON object + associated with the "track" object key, then + the .segments accessor selected the JSON array + associated with the "segments" key within that + object, then the [*] accessor selected each element + of that array (producing a series of items), then + the .location accessor selected the JSON array + associated with the "location" key within each of + those objects. In this example, each of those objects had + a "location" key; but if any of them did not, + the .location accessor would have simply produced no + output for that input item. + + + + To return the coordinates of the first segment only, you can + specify the corresponding subscript in the [] + accessor operator. Recall that JSON array indexes are 0-relative: + +=> select jsonb_path_query(:'json', '$.track.segments[0].location'); + jsonb_path_query +------------------- + [47.763, 13.4034] + + + + + The result of each path evaluation step can be processed + by one or more of the jsonpath operators and methods + listed in . + Each method name must be preceded by a dot. For example, + you can get the size of an array: + +=> select jsonb_path_query(:'json', '$.track.segments.size()'); + jsonb_path_query +------------------ + 2 + + More examples of using jsonpath operators + and methods within path expressions appear below in + . + + + + A path can also contain + filter expressions that work similarly to the + WHERE clause in SQL. A filter expression begins with + a question mark and provides a condition in parentheses: + + +? (condition) + + + + + Filter expressions must be written just after the path evaluation step + to which they should apply. The result of that step is filtered to include + only those items that satisfy the provided condition. SQL/JSON defines + three-valued logic, so the condition can + produce true, false, + or unknown. The unknown value + plays the same role as SQL NULL and can be tested + for with the is unknown predicate. Further path + evaluation steps use only those items for which the filter expression + returned true. + + + + The functions and operators that can be used in filter expressions are + listed in . Within a + filter expression, the @ variable denotes the value + being considered (i.e., one result of the preceding path step). You can + write accessor operators after @ to retrieve component + items. + + + + For example, suppose you would like to retrieve all heart rate values higher + than 130. You can achieve this as follows: + +=> select jsonb_path_query(:'json', '$.track.segments[*].HR ? (@ > 130)'); + jsonb_path_query +------------------ + 135 + + + + + To get the start times of segments with such values, you have to + filter out irrelevant segments before selecting the start times, so the + filter expression is applied to the previous step, and the path used + in the condition is different: + +=> select jsonb_path_query(:'json', '$.track.segments[*] ? (@.HR > 130)."start time"'); + jsonb_path_query +----------------------- + "2018-10-14 10:39:21" + + + + + You can use several filter expressions in sequence, if required. + The following example selects start times of all segments that + contain locations with relevant coordinates and high heart rate values: + +=> select jsonb_path_query(:'json', '$.track.segments[*] ? (@.location[1] < 13.4) ? (@.HR > 130)."start time"'); + jsonb_path_query +----------------------- + "2018-10-14 10:39:21" + + + + + Using filter expressions at different nesting levels is also allowed. + The following example first filters all segments by location, and then + returns high heart rate values for these segments, if available: + +=> select jsonb_path_query(:'json', '$.track.segments[*] ? (@.location[1] < 13.4).HR ? (@ > 130)'); + jsonb_path_query +------------------ + 135 + + + + + You can also nest filter expressions within each other. + This example returns the size of the track if it contains any + segments with high heart rate values, or an empty sequence otherwise: + +=> select jsonb_path_query(:'json', '$.track ? (exists(@.segments[*] ? (@.HR > 130))).segments.size()'); + jsonb_path_query +------------------ + 2 + + + + + Deviations from the SQL Standard + + PostgreSQL's implementation of the SQL/JSON path + language has the following deviations from the SQL/JSON standard. + + + + Boolean Predicate Check Expressions + + As an extension to the SQL standard, + a PostgreSQL path expression can be a + Boolean predicate, whereas the SQL standard allows predicates only within + filters. While SQL-standard path expressions return the relevant + element(s) of the queried JSON value, predicate check expressions + return the single three-valued jsonb result of the + predicate: true, + false, or null. + For example, we could write this SQL-standard filter expression: + +=> select jsonb_path_query(:'json', '$.track.segments ?(@[*].HR > 130)'); + jsonb_path_query +-----------------------------------------------------------&zwsp;---------------------- + {"HR": 135, "location": [47.706, 13.2635], "start time": "2018-10-14 10:39:21"} + + The similar predicate check expression simply + returns true, indicating that a match exists: + +=> select jsonb_path_query(:'json', '$.track.segments[*].HR > 130'); + jsonb_path_query +------------------ + true + + + + + + Predicate check expressions are required in the + @@ operator (and the + jsonb_path_match function), and should not be used + with the @? operator (or the + jsonb_path_exists function). + + + + + + Regular Expression Interpretation + + There are minor differences in the interpretation of regular + expression patterns used in like_regex filters, as + described in . + + + + + + Strict and Lax Modes + + When you query JSON data, the path expression may not match the + actual JSON data structure. An attempt to access a non-existent + member of an object or element of an array is defined as a + structural error. SQL/JSON path expressions have two modes + of handling structural errors: + + + + + + lax (default) — the path engine implicitly adapts + the queried data to the specified path. + Any structural errors that cannot be fixed as described below + are suppressed, producing no match. + + + + + strict — if a structural error occurs, an error is raised. + + + + + + Lax mode facilitates matching of a JSON document and path + expression when the JSON data does not conform to the expected schema. + If an operand does not match the requirements of a particular operation, + it can be automatically wrapped as an SQL/JSON array, or unwrapped by + converting its elements into an SQL/JSON sequence before performing + the operation. Also, comparison operators automatically unwrap their + operands in lax mode, so you can compare SQL/JSON arrays + out-of-the-box. An array of size 1 is considered equal to its sole element. + Automatic unwrapping is not performed when: + + + + The path expression contains type() or + size() methods that return the type + and the number of elements in the array, respectively. + + + + + The queried JSON data contain nested arrays. In this case, only + the outermost array is unwrapped, while all the inner arrays + remain unchanged. Thus, implicit unwrapping can only go one + level down within each path evaluation step. + + + + + + + For example, when querying the GPS data listed above, you can + abstract from the fact that it stores an array of segments + when using lax mode: + +=> select jsonb_path_query(:'json', 'lax $.track.segments.location'); + jsonb_path_query +------------------- + [47.763, 13.4034] + [47.706, 13.2635] + + + + + In strict mode, the specified path must exactly match the structure of + the queried JSON document, so using this path + expression will cause an error: + +=> select jsonb_path_query(:'json', 'strict $.track.segments.location'); +ERROR: jsonpath member accessor can only be applied to an object + + To get the same result as in lax mode, you have to explicitly unwrap the + segments array: + +=> select jsonb_path_query(:'json', 'strict $.track.segments[*].location'); + jsonb_path_query +------------------- + [47.763, 13.4034] + [47.706, 13.2635] + + + + + The unwrapping behavior of lax mode can lead to surprising results. For + instance, the following query using the .** accessor + selects every HR value twice: + +=> select jsonb_path_query(:'json', 'lax $.**.HR'); + jsonb_path_query +------------------ + 73 + 135 + 73 + 135 + + This happens because the .** accessor selects both + the segments array and each of its elements, while + the .HR accessor automatically unwraps arrays when + using lax mode. To avoid surprising results, we recommend using + the .** accessor only in strict mode. The + following query selects each HR value just once: + +=> select jsonb_path_query(:'json', 'strict $.**.HR'); + jsonb_path_query +------------------ + 73 + 135 + + + + + The unwrapping of arrays can also lead to unexpected results. Consider this + example, which selects all the location arrays: + +=> select jsonb_path_query(:'json', 'lax $.track.segments[*].location'); + jsonb_path_query +------------------- + [47.763, 13.4034] + [47.706, 13.2635] +(2 rows) + + As expected it returns the full arrays. But applying a filter expression + causes the arrays to be unwrapped to evaluate each item, returning only the + items that match the expression: + +=> select jsonb_path_query(:'json', 'lax $.track.segments[*].location ?(@[*] > 15)'); + jsonb_path_query +------------------ + 47.763 + 47.706 +(2 rows) + + This despite the fact that the full arrays are selected by the path + expression. Use strict mode to restore selecting the arrays: + +=> select jsonb_path_query(:'json', 'strict $.track.segments[*].location ?(@[*] > 15)'); + jsonb_path_query +------------------- + [47.763, 13.4034] + [47.706, 13.2635] +(2 rows) + + + + + + SQL/JSON Path Operators and Methods + + + shows the operators and + methods available in jsonpath. Note that while the unary + operators and methods can be applied to multiple values resulting from a + preceding path step, the binary operators (addition etc.) can only be + applied to single values. In lax mode, methods applied to an array will be + executed for each value in the array. The exceptions are + .type() and .size(), which apply to + the array itself. + + + + <type>jsonpath</type> Operators and Methods + + + + + Operator/Method + + + Description + + + Example(s) + + + + + + + + number + number + number + + + Addition + + + jsonb_path_query('[2]', '$[0] + 3') + 5 + + + + + + + number + number + + + Unary plus (no operation); unlike addition, this can iterate over + multiple values + + + jsonb_path_query_array('{"x": [2,3,4]}', '+ $.x') + [2, 3, 4] + + + + + + number - number + number + + + Subtraction + + + jsonb_path_query('[2]', '7 - $[0]') + 5 + + + + + + - number + number + + + Negation; unlike subtraction, this can iterate over + multiple values + + + jsonb_path_query_array('{"x": [2,3,4]}', '- $.x') + [-2, -3, -4] + + + + + + number * number + number + + + Multiplication + + + jsonb_path_query('[4]', '2 * $[0]') + 8 + + + + + + number / number + number + + + Division + + + jsonb_path_query('[8.5]', '$[0] / 2') + 4.2500000000000000 + + + + + + number % number + number + + + Modulo (remainder) + + + jsonb_path_query('[32]', '$[0] % 10') + 2 + + + + + + value . type() + string + + + Type of the JSON item (see json_typeof) + + + jsonb_path_query_array('[1, "2", {}]', '$[*].type()') + ["number", "string", "object"] + + + + + + value . size() + number + + + Size of the JSON item (number of array elements, or 1 if not an + array) + + + jsonb_path_query('{"m": [11, 15]}', '$.m.size()') + 2 + + + + + + value . boolean() + boolean + + + Boolean value converted from a JSON boolean, number, or string + + + jsonb_path_query_array('[1, "yes", false]', '$[*].boolean()') + [true, true, false] + + + + + + value . string() + string + + + String value converted from a JSON boolean, number, string, or + datetime + + + jsonb_path_query_array('[1.23, "xyz", false]', '$[*].string()') + ["1.23", "xyz", "false"] + + + jsonb_path_query('"2023-08-15 12:34:56"', '$.timestamp().string()') + "2023-08-15T12:34:56" + + + + + + value . double() + number + + + Approximate floating-point number converted from a JSON number or + string + + + jsonb_path_query('{"len": "1.9"}', '$.len.double() * 2') + 3.8 + + + + + + number . ceiling() + number + + + Nearest integer greater than or equal to the given number + + + jsonb_path_query('{"h": 1.3}', '$.h.ceiling()') + 2 + + + + + + number . floor() + number + + + Nearest integer less than or equal to the given number + + + jsonb_path_query('{"h": 1.7}', '$.h.floor()') + 1 + + + + + + number . abs() + number + + + Absolute value of the given number + + + jsonb_path_query('{"z": -0.3}', '$.z.abs()') + 0.3 + + + + + + value . bigint() + bigint + + + Big integer value converted from a JSON number or string + + + jsonb_path_query('{"len": "9876543219"}', '$.len.bigint()') + 9876543219 + + + + + + value . decimal( [ precision [ , scale ] ] ) + decimal + + + Rounded decimal value converted from a JSON number or string + (precision and scale must be + integer values) + + + jsonb_path_query('1234.5678', '$.decimal(6, 2)') + 1234.57 + + + + + + value . integer() + integer + + + Integer value converted from a JSON number or string + + + jsonb_path_query('{"len": "12345"}', '$.len.integer()') + 12345 + + + + + + value . number() + numeric + + + Numeric value converted from a JSON number or string + + + jsonb_path_query('{"len": "123.45"}', '$.len.number()') + 123.45 + + + + + + string . datetime() + datetime_type + (see note) + + + Date/time value converted from a string + + + jsonb_path_query('["2015-8-1", "2015-08-12"]', '$[*] ? (@.datetime() < "2015-08-2".datetime())') + "2015-8-1" + + + + + + string . datetime(template) + datetime_type + (see note) + + + Date/time value converted from a string using the + specified to_timestamp template + + + jsonb_path_query_array('["12:30", "18:40"]', '$[*].datetime("HH24:MI")') + ["12:30:00", "18:40:00"] + + + + + + string . date() + date + + + Date value converted from a string + + + jsonb_path_query('"2023-08-15"', '$.date()') + "2023-08-15" + + + + + + string . time() + time without time zone + + + Time without time zone value converted from a string + + + jsonb_path_query('"12:34:56"', '$.time()') + "12:34:56" + + + + + + string . time(precision) + time without time zone + + + Time without time zone value converted from a string, with fractional + seconds adjusted to the given precision + + + jsonb_path_query('"12:34:56.789"', '$.time(2)') + "12:34:56.79" + + + + + + string . time_tz() + time with time zone + + + Time with time zone value converted from a string + + + jsonb_path_query('"12:34:56 +05:30"', '$.time_tz()') + "12:34:56+05:30" + + + + + + string . time_tz(precision) + time with time zone + + + Time with time zone value converted from a string, with fractional + seconds adjusted to the given precision + + + jsonb_path_query('"12:34:56.789 +05:30"', '$.time_tz(2)') + "12:34:56.79+05:30" + + + + + + string . timestamp() + timestamp without time zone + + + Timestamp without time zone value converted from a string + + + jsonb_path_query('"2023-08-15 12:34:56"', '$.timestamp()') + "2023-08-15T12:34:56" + + + + + + string . timestamp(precision) + timestamp without time zone + + + Timestamp without time zone value converted from a string, with + fractional seconds adjusted to the given precision + + + jsonb_path_query('"2023-08-15 12:34:56.789"', '$.timestamp(2)') + "2023-08-15T12:34:56.79" + + + + + + string . timestamp_tz() + timestamp with time zone + + + Timestamp with time zone value converted from a string + + + jsonb_path_query('"2023-08-15 12:34:56 +05:30"', '$.timestamp_tz()') + "2023-08-15T12:34:56+05:30" + + + + + + string . timestamp_tz(precision) + timestamp with time zone + + + Timestamp with time zone value converted from a string, with fractional + seconds adjusted to the given precision + + + jsonb_path_query('"2023-08-15 12:34:56.789 +05:30"', '$.timestamp_tz(2)') + "2023-08-15T12:34:56.79+05:30" + + + + + + object . keyvalue() + array + + + The object's key-value pairs, represented as an array of objects + containing three fields: "key", + "value", and "id"; + "id" is a unique identifier of the object the + key-value pair belongs to + + + jsonb_path_query_array('{"x": "20", "y": 32}', '$.keyvalue()') + [{"id": 0, "key": "x", "value": "20"}, {"id": 0, "key": "y", "value": 32}] + + + + +
+ + + + The result type of the datetime() and + datetime(template) + methods can be date, timetz, time, + timestamptz, or timestamp. + Both methods determine their result type dynamically. + + + The datetime() method sequentially tries to + match its input string to the ISO formats + for date, timetz, time, + timestamptz, and timestamp. It stops on + the first matching format and emits the corresponding data type. + + + The datetime(template) + method determines the result type according to the fields used in the + provided template string. + + + The datetime() and + datetime(template) methods + use the same parsing rules as the to_timestamp SQL + function does (see ), with three + exceptions. First, these methods don't allow unmatched template + patterns. Second, only the following separators are allowed in the + template string: minus sign, period, solidus (slash), comma, apostrophe, + semicolon, colon and space. Third, separators in the template string + must exactly match the input string. + + + If different date/time types need to be compared, an implicit cast is + applied. A date value can be cast to timestamp + or timestamptz, timestamp can be cast to + timestamptz, and time to timetz. + However, all but the first of these conversions depend on the current + setting, and thus can only be performed + within timezone-aware jsonpath functions. Similarly, other + date/time-related methods that convert strings to date/time types + also do this casting, which may involve the current + setting. Therefore, these conversions can + also only be performed within timezone-aware jsonpath + functions. + + + + + shows the available + filter expression elements. + + + + <type>jsonpath</type> Filter Expression Elements + + + + + Predicate/Value + + + Description + + + Example(s) + + + + + + + + value == value + boolean + + + Equality comparison (this, and the other comparison operators, work on + all JSON scalar values) + + + jsonb_path_query_array('[1, "a", 1, 3]', '$[*] ? (@ == 1)') + [1, 1] + + + jsonb_path_query_array('[1, "a", 1, 3]', '$[*] ? (@ == "a")') + ["a"] + + + + + + value != value + boolean + + + value <> value + boolean + + + Non-equality comparison + + + jsonb_path_query_array('[1, 2, 1, 3]', '$[*] ? (@ != 1)') + [2, 3] + + + jsonb_path_query_array('["a", "b", "c"]', '$[*] ? (@ <> "b")') + ["a", "c"] + + + + + + value < value + boolean + + + Less-than comparison + + + jsonb_path_query_array('[1, 2, 3]', '$[*] ? (@ < 2)') + [1] + + + + + + value <= value + boolean + + + Less-than-or-equal-to comparison + + + jsonb_path_query_array('["a", "b", "c"]', '$[*] ? (@ <= "b")') + ["a", "b"] + + + + + + value > value + boolean + + + Greater-than comparison + + + jsonb_path_query_array('[1, 2, 3]', '$[*] ? (@ > 2)') + [3] + + + + + + value >= value + boolean + + + Greater-than-or-equal-to comparison + + + jsonb_path_query_array('[1, 2, 3]', '$[*] ? (@ >= 2)') + [2, 3] + + + + + + true + boolean + + + JSON constant true + + + jsonb_path_query('[{"name": "John", "parent": false}, {"name": "Chris", "parent": true}]', '$[*] ? (@.parent == true)') + {"name": "Chris", "parent": true} + + + + + + false + boolean + + + JSON constant false + + + jsonb_path_query('[{"name": "John", "parent": false}, {"name": "Chris", "parent": true}]', '$[*] ? (@.parent == false)') + {"name": "John", "parent": false} + + + + + + null + value + + + JSON constant null (note that, unlike in SQL, + comparison to null works normally) + + + jsonb_path_query('[{"name": "Mary", "job": null}, {"name": "Michael", "job": "driver"}]', '$[*] ? (@.job == null) .name') + "Mary" + + + + + + boolean && boolean + boolean + + + Boolean AND + + + jsonb_path_query('[1, 3, 7]', '$[*] ? (@ > 1 && @ < 5)') + 3 + + + + + + boolean || boolean + boolean + + + Boolean OR + + + jsonb_path_query('[1, 3, 7]', '$[*] ? (@ < 1 || @ > 5)') + 7 + + + + + + ! boolean + boolean + + + Boolean NOT + + + jsonb_path_query('[1, 3, 7]', '$[*] ? (!(@ < 5))') + 7 + + + + + + boolean is unknown + boolean + + + Tests whether a Boolean condition is unknown. + + + jsonb_path_query('[-1, 2, 7, "foo"]', '$[*] ? ((@ > 0) is unknown)') + "foo" + + + + + + string like_regex string flag string + boolean + + + Tests whether the first operand matches the regular expression + given by the second operand, optionally with modifications + described by a string of flag characters (see + ). + + + jsonb_path_query_array('["abc", "abd", "aBdC", "abdacb", "babc"]', '$[*] ? (@ like_regex "^ab.*c")') + ["abc", "abdacb"] + + + jsonb_path_query_array('["abc", "abd", "aBdC", "abdacb", "babc"]', '$[*] ? (@ like_regex "^ab.*c" flag "i")') + ["abc", "aBdC", "abdacb"] + + + + + + string starts with string + boolean + + + Tests whether the second operand is an initial substring of the first + operand. + + + jsonb_path_query('["John Smith", "Mary Stone", "Bob Johnson"]', '$[*] ? (@ starts with "John")') + "John Smith" + + + + + + exists ( path_expression ) + boolean + + + Tests whether a path expression matches at least one SQL/JSON item. + Returns unknown if the path expression would result + in an error; the second example uses this to avoid a no-such-key error + in strict mode. + + + jsonb_path_query('{"x": [1, 2], "y": [2, 4]}', 'strict $.* ? (exists (@ ? (@[*] > 2)))') + [2, 4] + + + jsonb_path_query_array('{"value": 41}', 'strict $ ? (exists (@.name)) .name') + [] + + + + +
+ +
+ + + SQL/JSON Regular Expressions + + + LIKE_REGEX + in SQL/JSON + + + + SQL/JSON path expressions allow matching text to a regular expression + with the like_regex filter. For example, the + following SQL/JSON path query would case-insensitively match all + strings in an array that start with an English vowel: + +$[*] ? (@ like_regex "^[aeiou]" flag "i") + + + + + The optional flag string may include one or more of + the characters + i for case-insensitive match, + m to allow ^ + and $ to match at newlines, + s to allow . to match a newline, + and q to quote the whole pattern (reducing the + behavior to a simple substring match). + + + + The SQL/JSON standard borrows its definition for regular expressions + from the LIKE_REGEX operator, which in turn uses the + XQuery standard. PostgreSQL does not currently support the + LIKE_REGEX operator. Therefore, + the like_regex filter is implemented using the + POSIX regular expression engine described in + . This leads to various minor + discrepancies from standard SQL/JSON behavior, which are cataloged in + . + Note, however, that the flag-letter incompatibilities described there + do not apply to SQL/JSON, as it translates the XQuery flag letters to + match what the POSIX engine expects. + + + + Keep in mind that the pattern argument of like_regex + is a JSON path string literal, written according to the rules given in + . This means in particular that any + backslashes you want to use in the regular expression must be doubled. + For example, to match string values of the root document that contain + only digits: + +$.* ? (@ like_regex "^\\d+$") + + + +
+ + + SQL/JSON Query Functions + + SQL/JSON functions JSON_EXISTS(), + JSON_QUERY(), and JSON_VALUE() + described in can be used + to query JSON documents. Each of these functions apply a + path_expression (an SQL/JSON path query) to a + context_item (the document). See + for more details on what + the path_expression can contain. The + path_expression can also reference variables, + whose values are specified with their respective names in the + PASSING clause that is supported by each function. + context_item can be a jsonb value + or a character string that can be successfully cast to jsonb. + + + + SQL/JSON Query Functions + + + + + Function signature + + + Description + + + Example(s) + + + + + + + json_exists + +JSON_EXISTS ( +context_item, path_expression + PASSING { value AS varname } , ... +{ TRUE | FALSE | UNKNOWN | ERROR } ON ERROR ) boolean + + + + + + Returns true if the SQL/JSON path_expression + applied to the context_item yields any + items, false otherwise. + + + + + The ON ERROR clause specifies the behavior if + an error occurs during path_expression + evaluation. Specifying ERROR will cause an error to + be thrown with the appropriate message. Other options include + returning boolean values FALSE or + TRUE or the value UNKNOWN which + is actually an SQL NULL. The default when no ON ERROR + clause is specified is to return the boolean value + FALSE. + + + + + Examples: + + + JSON_EXISTS(jsonb '{"key1": [1,2,3]}', 'strict $.key1[*] ? (@ > $x)' PASSING 2 AS x) + t + + + JSON_EXISTS(jsonb '{"a": [1,2,3]}', 'lax $.a[5]' ERROR ON ERROR) + f + + + JSON_EXISTS(jsonb '{"a": [1,2,3]}', 'strict $.a[5]' ERROR ON ERROR) + + +ERROR: jsonpath array subscript is out of bounds + + + + + + json_query + +JSON_QUERY ( +context_item, path_expression + PASSING { value AS varname } , ... + RETURNING data_type FORMAT JSON ENCODING UTF8 + { WITHOUT | WITH { CONDITIONAL | UNCONDITIONAL } } ARRAY WRAPPER + { KEEP | OMIT } QUOTES ON SCALAR STRING + { ERROR | NULL | EMPTY { ARRAY | OBJECT } | DEFAULT expression } ON EMPTY + { ERROR | NULL | EMPTY { ARRAY | OBJECT } | DEFAULT expression } ON ERROR ) jsonb + + + + + + Returns the result of applying the SQL/JSON + path_expression to the + context_item. + + + + + By default, the result is returned as a value of type jsonb, + though the RETURNING clause can be used to return + as some other type to which it can be successfully coerced. + + + + + If the path expression may return multiple values, it might be necessary + to wrap those values using the WITH WRAPPER clause to + make it a valid JSON string, because the default behavior is to not wrap + them, as if WITHOUT WRAPPER were specified. The + WITH WRAPPER clause is by default taken to mean + WITH UNCONDITIONAL WRAPPER, which means that even a + single result value will be wrapped. To apply the wrapper only when + multiple values are present, specify WITH CONDITIONAL WRAPPER. + Getting multiple values in result will be treated as an error if + WITHOUT WRAPPER is specified. + + + + + If the result is a scalar string, by default, the returned value will + be surrounded by quotes, making it a valid JSON value. It can be made + explicit by specifying KEEP QUOTES. Conversely, + quotes can be omitted by specifying OMIT QUOTES. + To ensure that the result is a valid JSON value, OMIT QUOTES + cannot be specified when WITH WRAPPER is also + specified. + + + + + The ON EMPTY clause specifies the behavior if + evaluating path_expression yields an empty + set. The ON ERROR clause specifies the behavior + if an error occurs when evaluating path_expression, + when coercing the result value to the RETURNING type, + or when evaluating the ON EMPTY expression if the + path_expression evaluation returns an empty + set. + + + + + For both ON EMPTY and ON ERROR, + specifying ERROR will cause an error to be thrown with + the appropriate message. Other options include returning an SQL NULL, an + empty array (EMPTY ARRAY), + an empty object (EMPTY OBJECT), or a user-specified + expression (DEFAULT expression) + that can be coerced to jsonb or the type specified in RETURNING. + The default when ON EMPTY or ON ERROR + is not specified is to return an SQL NULL value. + + + + + Examples: + + + JSON_QUERY(jsonb '[1,[2,3],null]', 'lax $[*][$off]' PASSING 1 AS off WITH CONDITIONAL WRAPPER) + 3 + + + JSON_QUERY(jsonb '{"a": "[1, 2]"}', 'lax $.a' OMIT QUOTES) + [1, 2] + + + JSON_QUERY(jsonb '{"a": "[1, 2]"}', 'lax $.a' RETURNING int[] OMIT QUOTES ERROR ON ERROR) + + +ERROR: malformed array literal: "[1, 2]" +DETAIL: Missing "]" after array dimensions. + + + + + + + json_value + +JSON_VALUE ( +context_item, path_expression + PASSING { value AS varname } , ... + RETURNING data_type + { ERROR | NULL | DEFAULT expression } ON EMPTY + { ERROR | NULL | DEFAULT expression } ON ERROR ) text + + + + + + Returns the result of applying the SQL/JSON + path_expression to the + context_item. + + + + + Only use JSON_VALUE() if the extracted value is + expected to be a single SQL/JSON scalar item; + getting multiple values will be treated as an error. If you expect that + extracted value might be an object or an array, use the + JSON_QUERY function instead. + + + + + By default, the result, which must be a single scalar value, is + returned as a value of type text, though the + RETURNING clause can be used to return as some + other type to which it can be successfully coerced. + + + + + The ON ERROR and ON EMPTY + clauses have similar semantics as mentioned in the description of + JSON_QUERY, except the set of values returned in + lieu of throwing an error is different. + + + + + Note that scalar strings returned by JSON_VALUE + always have their quotes removed, equivalent to specifying + OMIT QUOTES in JSON_QUERY. + + + + + Examples: + + + JSON_VALUE(jsonb '"123.45"', '$' RETURNING float) + 123.45 + + + JSON_VALUE(jsonb '"03:04 2015-02-01"', '$.datetime("HH24:MI YYYY-MM-DD")' RETURNING date) + 2015-02-01 + + + JSON_VALUE(jsonb '[1,2]', 'strict $[$off]' PASSING 1 as off) + 2 + + + JSON_VALUE(jsonb '[1,2]', 'strict $[*]' DEFAULT 9 ON ERROR) + 9 + + + + + +
+ + + The context_item expression is converted to + jsonb by an implicit cast if the expression is not already of + type jsonb. Note, however, that any parsing errors that occur + during that conversion are thrown unconditionally, that is, are not + handled according to the (specified or implicit) ON ERROR + clause. + + + + + JSON_VALUE() returns an SQL NULL if + path_expression returns a JSON + null, whereas JSON_QUERY() returns + the JSON null as is. + + +
+ + + JSON_TABLE + + json_table + + + + JSON_TABLE is an SQL/JSON function which + queries JSON data + and presents the results as a relational view, which can be accessed as a + regular SQL table. You can use JSON_TABLE inside + the FROM clause of a SELECT, + UPDATE, or DELETE and as data source + in a MERGE statement. + + + + Taking JSON data as input, JSON_TABLE uses a JSON path + expression to extract a part of the provided data to use as a + row pattern for the constructed view. Each SQL/JSON + value given by the row pattern serves as source for a separate row in the + constructed view. + + + + To split the row pattern into columns, JSON_TABLE + provides the COLUMNS clause that defines the + schema of the created view. For each column, a separate JSON path expression + can be specified to be evaluated against the row pattern to get an SQL/JSON + value that will become the value for the specified column in a given output + row. + + + + JSON data stored at a nested level of the row pattern can be extracted using + the NESTED PATH clause. Each + NESTED PATH clause can be used to generate one or more + columns using the data from a nested level of the row pattern. Those + columns can be specified using a COLUMNS clause that + looks similar to the top-level COLUMNS clause. Rows constructed from + NESTED COLUMNS are called child rows and are joined + against the row constructed from the columns specified in the parent + COLUMNS clause to get the row in the final view. Child + columns themselves may contain a NESTED PATH + specification thus allowing to extract data located at arbitrary nesting + levels. Columns produced by multiple NESTED PATHs at the + same level are considered to be siblings of each + other and their rows after joining with the parent row are combined using + UNION. + + + + The rows produced by JSON_TABLE are laterally + joined to the row that generated them, so you do not have to explicitly join + the constructed view with the original table holding JSON + data. + + + + The syntax is: + + + +JSON_TABLE ( + context_item, path_expression AS json_path_name PASSING { value AS varname } , ... + COLUMNS ( json_table_column , ... ) + { ERROR | EMPTY ARRAY} ON ERROR +) + + +where json_table_column is: + + name FOR ORDINALITY + | name type + FORMAT JSON ENCODING UTF8 + PATH path_expression + { WITHOUT | WITH { CONDITIONAL | UNCONDITIONAL } } ARRAY WRAPPER + { KEEP | OMIT } QUOTES ON SCALAR STRING + { ERROR | NULL | EMPTY { ARRAY | OBJECT } | DEFAULT expression } ON EMPTY + { ERROR | NULL | EMPTY { ARRAY | OBJECT } | DEFAULT expression } ON ERROR + | name type EXISTS PATH path_expression + { ERROR | TRUE | FALSE | UNKNOWN } ON ERROR + | NESTED PATH path_expression AS json_path_name COLUMNS ( json_table_column , ... ) + + + + Each syntax element is described below in more detail. + + + + + + context_item, path_expression AS json_path_name PASSING { value AS varname } , ... + + + + The context_item specifies the input document + to query, the path_expression is an SQL/JSON + path expression defining the query, and json_path_name + is an optional name for the path_expression. + The optional PASSING clause provides data values for + the variables mentioned in the path_expression. + The result of the input data evaluation using the aforementioned elements + is called the row pattern, which is used as the + source for row values in the constructed view. + + + + + + + COLUMNS ( json_table_column , ... ) + + + + + The COLUMNS clause defining the schema of the + constructed view. In this clause, you can specify each column to be + filled with an SQL/JSON value obtained by applying a JSON path expression + against the row pattern. json_table_column has + the following variants: + + + + + + name FOR ORDINALITY + + + + Adds an ordinality column that provides sequential row numbering starting + from 1. Each NESTED PATH (see below) gets its own + counter for any nested ordinality columns. + + + + + + + name type + FORMAT JSON ENCODING UTF8 + PATH path_expression + + + + Inserts an SQL/JSON value obtained by applying + path_expression against the row pattern into + the view's output row after coercing it to specified + type. + + + Specifying FORMAT JSON makes it explicit that you + expect the value to be a valid json object. It only + makes sense to specify FORMAT JSON if + type is one of bpchar, + bytea, character varying, name, + json, jsonb, text, or a domain over + these types. + + + Optionally, you can specify WRAPPER and + QUOTES clauses to format the output. Note that + specifying OMIT QUOTES overrides + FORMAT JSON if also specified, because unquoted + literals do not constitute valid json values. + + + Optionally, you can use ON EMPTY and + ON ERROR clauses to specify whether to throw the error + or return the specified value when the result of JSON path evaluation is + empty and when an error occurs during JSON path evaluation or when + coercing the SQL/JSON value to the specified type, respectively. The + default for both is to return a NULL value. + + + + This clause is internally turned into and has the same semantics as + JSON_VALUE or JSON_QUERY. + The latter if the specified type is not a scalar type or if either of + FORMAT JSON, WRAPPER, or + QUOTES clause is present. + + + + + + + + name type + EXISTS PATH path_expression + + + + Inserts a boolean value obtained by applying + path_expression against the row pattern + into the view's output row after coercing it to specified + type. + + + The value corresponds to whether applying the PATH + expression to the row pattern yields any values. + + + The specified type should have a cast from the + boolean type. + + + Optionally, you can use ON ERROR to specify whether to + throw the error or return the specified value when an error occurs during + JSON path evaluation or when coercing SQL/JSON value to the specified + type. The default is to return a boolean value + FALSE. + + + + This clause is internally turned into and has the same semantics as + JSON_EXISTS. + + + + + + + + NESTED PATH path_expression AS json_path_name + COLUMNS ( json_table_column , ... ) + + + + + Extracts SQL/JSON values from nested levels of the row pattern, + generates one or more columns as defined by the COLUMNS + subclause, and inserts the extracted SQL/JSON values into those + columns. The json_table_column + expression in the COLUMNS subclause uses the same + syntax as in the parent COLUMNS clause. + + + + The NESTED PATH syntax is recursive, + so you can go down multiple nested levels by specifying several + NESTED PATH subclauses within each other. + It allows to unnest the hierarchy of JSON objects and arrays + in a single function invocation rather than chaining several + JSON_TABLE expressions in an SQL statement. + + + + + + + + In each variant of json_table_column described + above, if the PATH clause is omitted, path expression + $.name is used, where + name is the provided column name. + + + + + + + + + AS json_path_name + + + + + The optional json_path_name serves as an + identifier of the provided path_expression. + The name must be unique and distinct from the column names. + + + + + + + { ERROR | EMPTY } ON ERROR + + + + + The optional ON ERROR can be used to specify how to + handle errors when evaluating the top-level + path_expression. Use ERROR + if you want the errors to be thrown and EMPTY to + return an empty table, that is, a table containing 0 rows. Note that + this clause does not affect the errors that occur when evaluating + columns, for which the behavior depends on whether the + ON ERROR clause is specified against a given column. + + + + + + Examples + + + In the examples that follow, the following table containing JSON data + will be used: + + +CREATE TABLE my_films ( js jsonb ); + +INSERT INTO my_films VALUES ( +'{ "favorites" : [ + { "kind" : "comedy", "films" : [ + { "title" : "Bananas", + "director" : "Woody Allen"}, + { "title" : "The Dinner Game", + "director" : "Francis Veber" } ] }, + { "kind" : "horror", "films" : [ + { "title" : "Psycho", + "director" : "Alfred Hitchcock" } ] }, + { "kind" : "thriller", "films" : [ + { "title" : "Vertigo", + "director" : "Alfred Hitchcock" } ] }, + { "kind" : "drama", "films" : [ + { "title" : "Yojimbo", + "director" : "Akira Kurosawa" } ] } + ] }'); + + + + + The following query shows how to use JSON_TABLE to + turn the JSON objects in the my_films table + to a view containing columns for the keys kind, + title, and director contained in + the original JSON along with an ordinality column: + + +SELECT jt.* FROM + my_films, + JSON_TABLE (js, '$.favorites[*]' COLUMNS ( + id FOR ORDINALITY, + kind text PATH '$.kind', + title text PATH '$.films[*].title' WITH WRAPPER, + director text PATH '$.films[*].director' WITH WRAPPER)) AS jt; + + + + id | kind | title | director +----+----------+--------------------------------+---------------------------------- + 1 | comedy | ["Bananas", "The Dinner Game"] | ["Woody Allen", "Francis Veber"] + 2 | horror | ["Psycho"] | ["Alfred Hitchcock"] + 3 | thriller | ["Vertigo"] | ["Alfred Hitchcock"] + 4 | drama | ["Yojimbo"] | ["Akira Kurosawa"] +(4 rows) + + + + + The following is a modified version of the above query to show the + usage of PASSING arguments in the filter specified in + the top-level JSON path expression and the various options for the + individual columns: + + +SELECT jt.* FROM + my_films, + JSON_TABLE (js, '$.favorites[*] ? (@.films[*].director == $filter)' + PASSING 'Alfred Hitchcock' AS filter + COLUMNS ( + id FOR ORDINALITY, + kind text PATH '$.kind', + title text FORMAT JSON PATH '$.films[*].title' OMIT QUOTES, + director text PATH '$.films[*].director' KEEP QUOTES)) AS jt; + + + + id | kind | title | director +----+----------+---------+-------------------- + 1 | horror | Psycho | "Alfred Hitchcock" + 2 | thriller | Vertigo | "Alfred Hitchcock" +(2 rows) + + + + + The following is a modified version of the above query to show the usage + of NESTED PATH for populating title and director + columns, illustrating how they are joined to the parent columns id and + kind: + + +SELECT jt.* FROM + my_films, + JSON_TABLE ( js, '$.favorites[*] ? (@.films[*].director == $filter)' + PASSING 'Alfred Hitchcock' AS filter + COLUMNS ( + id FOR ORDINALITY, + kind text PATH '$.kind', + NESTED PATH '$.films[*]' COLUMNS ( + title text FORMAT JSON PATH '$.title' OMIT QUOTES, + director text PATH '$.director' KEEP QUOTES))) AS jt; + + + + id | kind | title | director +----+----------+---------+-------------------- + 1 | horror | Psycho | "Alfred Hitchcock" + 2 | thriller | Vertigo | "Alfred Hitchcock" +(2 rows) + + + + + + The following is the same query but without the filter in the root + path: + + +SELECT jt.* FROM + my_films, + JSON_TABLE ( js, '$.favorites[*]' + COLUMNS ( + id FOR ORDINALITY, + kind text PATH '$.kind', + NESTED PATH '$.films[*]' COLUMNS ( + title text FORMAT JSON PATH '$.title' OMIT QUOTES, + director text PATH '$.director' KEEP QUOTES))) AS jt; + + + + id | kind | title | director +----+----------+-----------------+-------------------- + 1 | comedy | Bananas | "Woody Allen" + 1 | comedy | The Dinner Game | "Francis Veber" + 2 | horror | Psycho | "Alfred Hitchcock" + 3 | thriller | Vertigo | "Alfred Hitchcock" + 4 | drama | Yojimbo | "Akira Kurosawa" +(5 rows) + + + + + + The following shows another query using a different JSON + object as input. It shows the UNION "sibling join" between + NESTED paths $.movies[*] and + $.books[*] and also the usage of + FOR ORDINALITY column at NESTED + levels (columns movie_id, book_id, + and author_id): + + +SELECT * FROM JSON_TABLE ( +'{"favorites": + [{"movies": + [{"name": "One", "director": "John Doe"}, + {"name": "Two", "director": "Don Joe"}], + "books": + [{"name": "Mystery", "authors": [{"name": "Brown Dan"}]}, + {"name": "Wonder", "authors": [{"name": "Jun Murakami"}, {"name":"Craig Doe"}]}] +}]}'::json, '$.favorites[*]' +COLUMNS ( + user_id FOR ORDINALITY, + NESTED '$.movies[*]' + COLUMNS ( + movie_id FOR ORDINALITY, + mname text PATH '$.name', + director text), + NESTED '$.books[*]' + COLUMNS ( + book_id FOR ORDINALITY, + bname text PATH '$.name', + NESTED '$.authors[*]' + COLUMNS ( + author_id FOR ORDINALITY, + author_name text PATH '$.name')))); + + + + user_id | movie_id | mname | director | book_id | bname | author_id | author_name +---------+----------+-------+----------+---------+---------+-----------+-------------- + 1 | 1 | One | John Doe | | | | + 1 | 2 | Two | Don Joe | | | | + 1 | | | | 1 | Mystery | 1 | Brown Dan + 1 | | | | 2 | Wonder | 1 | Jun Murakami + 1 | | | | 2 | Wonder | 2 | Craig Doe +(5 rows) + + + + +
diff --git a/doc/src/sgml/func/func-logical.sgml b/doc/src/sgml/func/func-logical.sgml new file mode 100644 index 0000000000000..65e50e65a8117 --- /dev/null +++ b/doc/src/sgml/func/func-logical.sgml @@ -0,0 +1,146 @@ + + Logical Operators + + + operator + logical + + + + Boolean + operators + operators, logical + + + + The usual logical operators are available: + + + AND (operator) + + + + OR (operator) + + + + NOT (operator) + + + + conjunction + + + + disjunction + + + + negation + + + +boolean AND boolean boolean +boolean OR boolean boolean +NOT boolean boolean + + + SQL uses a three-valued logic system with true, + false, and null, which represents unknown. + Observe the following truth tables: + + + + + + a + b + a AND b + a OR b + + + + + + TRUE + TRUE + TRUE + TRUE + + + + TRUE + FALSE + FALSE + TRUE + + + + TRUE + NULL + NULL + TRUE + + + + FALSE + FALSE + FALSE + FALSE + + + + FALSE + NULL + FALSE + NULL + + + + NULL + NULL + NULL + NULL + + + + + + + + + + a + NOT a + + + + + + TRUE + FALSE + + + + FALSE + TRUE + + + + NULL + NULL + + + + + + + + The operators AND and OR are + commutative, that is, you can switch the left and right operands + without affecting the result. (However, it is not guaranteed that + the left operand is evaluated before the right operand. See for more information about the + order of evaluation of subexpressions.) + + diff --git a/doc/src/sgml/func/func-matching.sgml b/doc/src/sgml/func/func-matching.sgml new file mode 100644 index 0000000000000..7d44e2a27bcca --- /dev/null +++ b/doc/src/sgml/func/func-matching.sgml @@ -0,0 +1,2471 @@ + + Pattern Matching + + + pattern matching + + + + There are three separate approaches to pattern matching provided + by PostgreSQL: the traditional + SQL LIKE operator, the + more recent SIMILAR TO operator (added in + SQL:1999), and POSIX-style regular + expressions. Aside from the basic does this string match + this pattern? operators, functions are available to extract + or replace matching substrings and to split a string at matching + locations. + + + + + If you have pattern matching needs that go beyond this, + consider writing a user-defined function in Perl or Tcl. + + + + + + While most regular-expression searches can be executed very quickly, + regular expressions can be contrived that take arbitrary amounts of + time and memory to process. Be wary of accepting regular-expression + search patterns from hostile sources. If you must do so, it is + advisable to impose a statement timeout. + + + + Searches using SIMILAR TO patterns have the same + security hazards, since SIMILAR TO provides many + of the same capabilities as POSIX-style regular + expressions. + + + + LIKE searches, being much simpler than the other + two options, are safer to use with possibly-hostile pattern sources. + + + + + SIMILAR TO and POSIX-style regular + expressions do not support nondeterministic collations. If required, use + LIKE or apply a different collation to the expression + to work around this limitation. + + + + <function>LIKE</function> + + + LIKE + + + +string LIKE pattern ESCAPE escape-character +string NOT LIKE pattern ESCAPE escape-character + + + + The LIKE expression returns true if the + string matches the supplied + pattern. (As + expected, the NOT LIKE expression returns + false if LIKE returns true, and vice versa. + An equivalent expression is + NOT (string LIKE + pattern).) + + + + If pattern does not contain percent + signs or underscores, then the pattern only represents the string + itself; in that case LIKE acts like the + equals operator. An underscore (_) in + pattern stands for (matches) any single + character; a percent sign (%) matches any sequence + of zero or more characters. + + + + Some examples: + +'abc' LIKE 'abc' true +'abc' LIKE 'a%' true +'abc' LIKE '_b_' true +'abc' LIKE 'c' false + + + + + LIKE pattern matching supports nondeterministic + collations (see ), such as + case-insensitive collations or collations that, say, ignore punctuation. + So with a case-insensitive collation, one could have: + +'AbC' LIKE 'abc' COLLATE case_insensitive true +'AbC' LIKE 'a%' COLLATE case_insensitive true + + With collations that ignore certain characters or in general that consider + strings of different lengths equal, the semantics can become a bit more + complicated. Consider these examples: + +'.foo.' LIKE 'foo' COLLATE ign_punct true +'.foo.' LIKE 'f_o' COLLATE ign_punct true +'.foo.' LIKE '_oo' COLLATE ign_punct false + + The way the matching works is that the pattern is partitioned into + sequences of wildcards and non-wildcard strings (wildcards being + _ and %). For example, the pattern + f_o is partitioned into f, _, o, the + pattern _oo is partitioned into _, + oo. The input string matches the pattern if it can be + partitioned in such a way that the wildcards match one character or any + number of characters respectively and the non-wildcard partitions are + equal under the applicable collation. So for example, '.foo.' + LIKE 'f_o' COLLATE ign_punct is true because one can partition + .foo. into .f, o, o., and then + '.f' = 'f' COLLATE ign_punct, 'o' + matches the _ wildcard, and 'o.' = 'o' COLLATE + ign_punct. But '.foo.' LIKE '_oo' COLLATE + ign_punct is false because .foo. cannot be + partitioned in a way that the first character is any character and the + rest of the string compares equal to oo. (Note that + the single-character wildcard always matches exactly one character, + independent of the collation. So in this example, the + _ would match ., but then the rest + of the input string won't match the rest of the pattern.) + + + + LIKE pattern matching always covers the entire + string. Therefore, if it's desired to match a sequence anywhere within + a string, the pattern must start and end with a percent sign. + + + + To match a literal underscore or percent sign without matching + other characters, the respective character in + pattern must be + preceded by the escape character. The default escape + character is the backslash but a different one can be selected by + using the ESCAPE clause. To match the escape + character itself, write two escape characters. + + + + + If you have turned off, + any backslashes you write in literal string constants will need to be + doubled. See for more information. + + + + + It's also possible to select no escape character by writing + ESCAPE ''. This effectively disables the + escape mechanism, which makes it impossible to turn off the + special meaning of underscore and percent signs in the pattern. + + + + According to the SQL standard, omitting ESCAPE + means there is no escape character (rather than defaulting to a + backslash), and a zero-length ESCAPE value is + disallowed. PostgreSQL's behavior in + this regard is therefore slightly nonstandard. + + + + The key word ILIKE can be used instead of + LIKE to make the match case-insensitive according to the + active locale. (But this does not support nondeterministic collations.) + This is not in the SQL standard but is a + PostgreSQL extension. + + + + The operator ~~ is equivalent to + LIKE, and ~~* corresponds to + ILIKE. There are also + !~~ and !~~* operators that + represent NOT LIKE and NOT + ILIKE, respectively. All of these operators are + PostgreSQL-specific. You may see these + operator names in EXPLAIN output and similar + places, since the parser actually translates LIKE + et al. to these operators. + + + + The phrases LIKE, ILIKE, + NOT LIKE, and NOT ILIKE are + generally treated as operators + in PostgreSQL syntax; for example they can + be used in expression + operator ANY + (subquery) constructs, although + an ESCAPE clause cannot be included there. In some + obscure cases it may be necessary to use the underlying operator names + instead. + + + + Also see the starts-with operator ^@ and the + corresponding starts_with() function, which are + useful in cases where simply matching the beginning of a string is + needed. + + + + + + <function>SIMILAR TO</function> Regular Expressions + + + regular expression + + + + + SIMILAR TO + + + substring + + + +string SIMILAR TO pattern ESCAPE escape-character +string NOT SIMILAR TO pattern ESCAPE escape-character + + + + The SIMILAR TO operator returns true or + false depending on whether its pattern matches the given string. + It is similar to LIKE, except that it + interprets the pattern using the SQL standard's definition of a + regular expression. SQL regular expressions are a curious cross + between LIKE notation and common (POSIX) regular + expression notation. + + + + Like LIKE, the SIMILAR TO + operator succeeds only if its pattern matches the entire string; + this is unlike common regular expression behavior where the pattern + can match any part of the string. + Also like + LIKE, SIMILAR TO uses + _ and % as wildcard characters denoting + any single character and any string, respectively (these are + comparable to . and .* in POSIX regular + expressions). + + + + In addition to these facilities borrowed from LIKE, + SIMILAR TO supports these pattern-matching + metacharacters borrowed from POSIX regular expressions: + + + + + | denotes alternation (either of two alternatives). + + + + + * denotes repetition of the previous item zero + or more times. + + + + + + denotes repetition of the previous item one + or more times. + + + + + ? denotes repetition of the previous item zero + or one time. + + + + + {m} denotes repetition + of the previous item exactly m times. + + + + + {m,} denotes repetition + of the previous item m or more times. + + + + + {m,n} + denotes repetition of the previous item at least m and + not more than n times. + + + + + Parentheses () can be used to group items into + a single logical item. + + + + + A bracket expression [...] specifies a character + class, just as in POSIX regular expressions. + + + + + Notice that the period (.) is not a metacharacter + for SIMILAR TO. + + + + As with LIKE, a backslash disables the special + meaning of any of these metacharacters. A different escape character + can be specified with ESCAPE, or the escape + capability can be disabled by writing ESCAPE ''. + + + + According to the SQL standard, omitting ESCAPE + means there is no escape character (rather than defaulting to a + backslash), and a zero-length ESCAPE value is + disallowed. PostgreSQL's behavior in + this regard is therefore slightly nonstandard. + + + + Another nonstandard extension is that following the escape character + with a letter or digit provides access to the escape sequences + defined for POSIX regular expressions; see + , + , and + below. + + + + Some examples: + +'abc' SIMILAR TO 'abc' true +'abc' SIMILAR TO 'a' false +'abc' SIMILAR TO '%(b|d)%' true +'abc' SIMILAR TO '(b|c)%' false +'-abc-' SIMILAR TO '%\mabc\M%' true +'xabcy' SIMILAR TO '%\mabc\M%' false + + + + + The substring function with three parameters + provides extraction of a substring that matches an SQL + regular expression pattern. The function can be written according + to standard SQL syntax: + +substring(string similar pattern escape escape-character) + + or using the now obsolete SQL:1999 syntax: + +substring(string from pattern for escape-character) + + or as a plain three-argument function: + +substring(string, pattern, escape-character) + + As with SIMILAR TO, the + specified pattern must match the entire data string, or else the + function fails and returns null. To indicate the part of the + pattern for which the matching data sub-string is of interest, + the pattern should contain + two occurrences of the escape character followed by a double quote + ("). + The text matching the portion of the pattern + between these separators is returned when the match is successful. + + + + The escape-double-quote separators actually + divide substring's pattern into three independent + regular expressions; for example, a vertical bar (|) + in any of the three sections affects only that section. Also, the first + and third of these regular expressions are defined to match the smallest + possible amount of text, not the largest, when there is any ambiguity + about how much of the data string matches which pattern. (In POSIX + parlance, the first and third regular expressions are forced to be + non-greedy.) + + + + As an extension to the SQL standard, PostgreSQL + allows there to be just one escape-double-quote separator, in which case + the third regular expression is taken as empty; or no separators, in which + case the first and third regular expressions are taken as empty. + + + + Some examples, with #" delimiting the return string: + +substring('foobar' similar '%#"o_b#"%' escape '#') oob +substring('foobar' similar '#"o_b#"%' escape '#') NULL + + + + + + <acronym>POSIX</acronym> Regular Expressions + + + regular expression + pattern matching + + + substring + + + regexp_count + + + regexp_instr + + + regexp_like + + + regexp_match + + + regexp_matches + + + regexp_replace + + + regexp_split_to_table + + + regexp_split_to_array + + + regexp_substr + + + + lists the available + operators for pattern matching using POSIX regular expressions. + + + + Regular Expression Match Operators + + + + + + Operator + + + Description + + + Example(s) + + + + + + + + text ~ text + boolean + + + String matches regular expression, case sensitively + + + 'thomas' ~ 't.*ma' + t + + + + + + text ~* text + boolean + + + String matches regular expression, case-insensitively + + + 'thomas' ~* 'T.*ma' + t + + + + + + text !~ text + boolean + + + String does not match regular expression, case sensitively + + + 'thomas' !~ 't.*max' + t + + + + + + text !~* text + boolean + + + String does not match regular expression, case-insensitively + + + 'thomas' !~* 'T.*ma' + f + + + + +
+ + + POSIX regular expressions provide a more + powerful means for pattern matching than the LIKE and + SIMILAR TO operators. + Many Unix tools such as egrep, + sed, or awk use a pattern + matching language that is similar to the one described here. + + + + A regular expression is a character sequence that is an + abbreviated definition of a set of strings (a regular + set). A string is said to match a regular expression + if it is a member of the regular set described by the regular + expression. As with LIKE, pattern characters + match string characters exactly unless they are special characters + in the regular expression language — but regular expressions use + different special characters than LIKE does. + Unlike LIKE patterns, a + regular expression is allowed to match anywhere within a string, unless + the regular expression is explicitly anchored to the beginning or + end of the string. + + + + Some examples: + +'abcd' ~ 'bc' true +'abcd' ~ 'a.c' true — dot matches any character +'abcd' ~ 'a.*d' true — * repeats the preceding pattern item +'abcd' ~ '(b|x)' true — | means OR, parentheses group +'abcd' ~ '^a' true — ^ anchors to start of string +'abcd' ~ '^(b|c)' false — would match except for anchoring + + + + + The POSIX pattern language is described in much + greater detail below. + + + + The substring function with two parameters, + substring(string from + pattern), provides extraction of a + substring + that matches a POSIX regular expression pattern. It returns null if + there is no match, otherwise the first portion of the text that matched the + pattern. But if the pattern contains any parentheses, the portion + of the text that matched the first parenthesized subexpression (the + one whose left parenthesis comes first) is + returned. You can put parentheses around the whole expression + if you want to use parentheses within it without triggering this + exception. If you need parentheses in the pattern before the + subexpression you want to extract, see the non-capturing parentheses + described below. + + + + Some examples: + +substring('foobar' from 'o.b') oob +substring('foobar' from 'o(.)b') o + + + + + The regexp_count function counts the number of + places where a POSIX regular expression pattern matches a string. + It has the syntax + regexp_count(string, + pattern + , start + , flags + ). + pattern is searched for + in string, normally from the beginning of + the string, but if the start parameter is + provided then beginning from that character index. + The flags parameter is an optional text + string containing zero or more single-letter flags that change the + function's behavior. For example, including i in + flags specifies case-insensitive matching. + Supported flags are described in + . + + + + Some examples: + +regexp_count('ABCABCAXYaxy', 'A.') 3 +regexp_count('ABCABCAXYaxy', 'A.', 1, 'i') 4 + + + + + The regexp_instr function returns the starting or + ending position of the N'th match of a + POSIX regular expression pattern to a string, or zero if there is no + such match. It has the syntax + regexp_instr(string, + pattern + , start + , N + , endoption + , flags + , subexpr + ). + pattern is searched for + in string, normally from the beginning of + the string, but if the start parameter is + provided then beginning from that character index. + If N is specified + then the N'th match of the pattern + is located, otherwise the first match is located. + If the endoption parameter is omitted or + specified as zero, the function returns the position of the first + character of the match. Otherwise, endoption + must be one, and the function returns the position of the character + following the match. + The flags parameter is an optional text + string containing zero or more single-letter flags that change the + function's behavior. Supported flags are described + in . + For a pattern containing parenthesized + subexpressions, subexpr is an integer + indicating which subexpression is of interest: the result identifies + the position of the substring matching that subexpression. + Subexpressions are numbered in the order of their leading parentheses. + When subexpr is omitted or zero, the result + identifies the position of the whole match regardless of + parenthesized subexpressions. + + + + Some examples: + +regexp_instr('number of your street, town zip, FR', '[^,]+', 1, 2) + 23 +regexp_instr(string=>'ABCDEFGHI', pattern=>'(c..)(...)', start=>1, "N"=>1, endoption=>0, flags=>'i', subexpr=>2) + 6 + + + + + The regexp_like function checks whether a match + of a POSIX regular expression pattern occurs within a string, + returning boolean true or false. It has the syntax + regexp_like(string, + pattern + , flags ). + The flags parameter is an optional text + string containing zero or more single-letter flags that change the + function's behavior. Supported flags are described + in . + This function has the same results as the ~ + operator if no flags are specified. If only the i + flag is specified, it has the same results as + the ~* operator. + + + + Some examples: + +regexp_like('Hello World', 'world') false +regexp_like('Hello World', 'world', 'i') true + + + + + The regexp_match function returns a text array of + matching substring(s) within the first match of a POSIX + regular expression pattern to a string. It has the syntax + regexp_match(string, + pattern , flags ). + If there is no match, the result is NULL. + If a match is found, and the pattern contains no + parenthesized subexpressions, then the result is a single-element text + array containing the substring matching the whole pattern. + If a match is found, and the pattern contains + parenthesized subexpressions, then the result is a text array + whose n'th element is the substring matching + the n'th parenthesized subexpression of + the pattern (not counting non-capturing + parentheses; see below for details). + The flags parameter is an optional text string + containing zero or more single-letter flags that change the function's + behavior. Supported flags are described + in . + + + + Some examples: + +SELECT regexp_match('foobarbequebaz', 'bar.*que'); + regexp_match +-------------- + {barbeque} +(1 row) + +SELECT regexp_match('foobarbequebaz', '(bar)(beque)'); + regexp_match +-------------- + {bar,beque} +(1 row) + + + + + + In the common case where you just want the whole matching substring + or NULL for no match, the best solution is to + use regexp_substr(). + However, regexp_substr() only exists + in PostgreSQL version 15 and up. When + working in older versions, you can extract the first element + of regexp_match()'s result, for example: + +SELECT (regexp_match('foobarbequebaz', 'bar.*que'))[1]; + regexp_match +-------------- + barbeque +(1 row) + + + + + + The regexp_matches function returns a set of text arrays + of matching substring(s) within matches of a POSIX regular + expression pattern to a string. It has the same syntax as + regexp_match. + This function returns no rows if there is no match, one row if there is + a match and the g flag is not given, or N + rows if there are N matches and the g flag + is given. Each returned row is a text array containing the whole + matched substring or the substrings matching parenthesized + subexpressions of the pattern, just as described above + for regexp_match. + regexp_matches accepts all the flags shown + in , plus + the g flag which commands it to return all matches, not + just the first one. + + + + Some examples: + +SELECT regexp_matches('foo', 'not there'); + regexp_matches +---------------- +(0 rows) + +SELECT regexp_matches('foobarbequebazilbarfbonk', '(b[^b]+)(b[^b]+)', 'g'); + regexp_matches +---------------- + {bar,beque} + {bazil,barf} +(2 rows) + + + + + + In most cases regexp_matches() should be used with + the g flag, since if you only want the first match, it's + easier and more efficient to use regexp_match(). + However, regexp_match() only exists + in PostgreSQL version 10 and up. When working in older + versions, a common trick is to place a regexp_matches() + call in a sub-select, for example: + +SELECT col1, (SELECT regexp_matches(col2, '(bar)(beque)')) FROM tab; + + This produces a text array if there's a match, or NULL if + not, the same as regexp_match() would do. Without the + sub-select, this query would produce no output at all for table rows + without a match, which is typically not the desired behavior. + + + + + The regexp_replace function provides substitution of + new text for substrings that match POSIX regular expression patterns. + It has the syntax + regexp_replace(string, + pattern, replacement + , flags ) + or + regexp_replace(string, + pattern, replacement, + start + , N + , flags ). + The source string is returned unchanged if + there is no match to the pattern. If there is a + match, the string is returned with the + replacement string substituted for the matching + substring. The replacement string can contain + \n, where n is 1 + through 9, to indicate that the source substring matching the + n'th parenthesized subexpression of the pattern should be + inserted, and it can contain \& to indicate that the + substring matching the entire pattern should be inserted. Write + \\ if you need to put a literal backslash in the replacement + text. + pattern is searched for + in string, normally from the beginning of + the string, but if the start parameter is + provided then beginning from that character index. + By default, only the first match of the pattern is replaced. + If N is specified and is greater than zero, + then the N'th match of the pattern + is replaced. + If the g flag is given, or + if N is specified and is zero, then all + matches at or after the start position are + replaced. (The g flag is ignored + when N is specified.) + The flags parameter is an optional text + string containing zero or more single-letter flags that change the + function's behavior. Supported flags (though + not g) are + described in . + + + + Some examples: + +regexp_replace('foobarbaz', 'b..', 'X') + fooXbaz +regexp_replace('foobarbaz', 'b..', 'X', 'g') + fooXX +regexp_replace('foobarbaz', 'b(..)', 'X\1Y', 'g') + fooXarYXazY +regexp_replace('A PostgreSQL function', 'a|e|i|o|u', 'X', 1, 0, 'i') + X PXstgrXSQL fXnctXXn +regexp_replace(string=>'A PostgreSQL function', pattern=>'a|e|i|o|u', replacement=>'X', start=>1, "N"=>3, flags=>'i') + A PostgrXSQL function + + + + + The regexp_split_to_table function splits a string using a POSIX + regular expression pattern as a delimiter. It has the syntax + regexp_split_to_table(string, pattern + , flags ). + If there is no match to the pattern, the function returns the + string. If there is at least one match, for each match it returns + the text from the end of the last match (or the beginning of the string) + to the beginning of the match. When there are no more matches, it + returns the text from the end of the last match to the end of the string. + The flags parameter is an optional text string containing + zero or more single-letter flags that change the function's behavior. + regexp_split_to_table supports the flags described in + . + + + + The regexp_split_to_array function behaves the same as + regexp_split_to_table, except that regexp_split_to_array + returns its result as an array of text. It has the syntax + regexp_split_to_array(string, pattern + , flags ). + The parameters are the same as for regexp_split_to_table. + + + + Some examples: + +SELECT foo FROM regexp_split_to_table('the quick brown fox jumps over the lazy dog', '\s+') AS foo; + foo +------- + the + quick + brown + fox + jumps + over + the + lazy + dog +(9 rows) + +SELECT regexp_split_to_array('the quick brown fox jumps over the lazy dog', '\s+'); + regexp_split_to_array +----------------------------------------------- + {the,quick,brown,fox,jumps,over,the,lazy,dog} +(1 row) + +SELECT foo FROM regexp_split_to_table('the quick brown fox', '\s*') AS foo; + foo +----- + t + h + e + q + u + i + c + k + b + r + o + w + n + f + o + x +(16 rows) + + + + + As the last example demonstrates, the regexp split functions ignore + zero-length matches that occur at the start or end of the string + or immediately after a previous match. This is contrary to the strict + definition of regexp matching that is implemented by + the other regexp functions, but is usually the most convenient behavior + in practice. Other software systems such as Perl use similar definitions. + + + + The regexp_substr function returns the substring + that matches a POSIX regular expression pattern, + or NULL if there is no match. It has the syntax + regexp_substr(string, + pattern + , start + , N + , flags + , subexpr + ). + pattern is searched for + in string, normally from the beginning of + the string, but if the start parameter is + provided then beginning from that character index. + If N is specified + then the N'th match of the pattern + is returned, otherwise the first match is returned. + The flags parameter is an optional text + string containing zero or more single-letter flags that change the + function's behavior. Supported flags are described + in . + For a pattern containing parenthesized + subexpressions, subexpr is an integer + indicating which subexpression is of interest: the result is the + substring matching that subexpression. + Subexpressions are numbered in the order of their leading parentheses. + When subexpr is omitted or zero, the result + is the whole match regardless of parenthesized subexpressions. + + + + Some examples: + +regexp_substr('number of your street, town zip, FR', '[^,]+', 1, 2) + town zip +regexp_substr('ABCDEFGHI', '(c..)(...)', 1, 1, 'i', 2) + FGH + + + + + + + Regular Expression Details + + + PostgreSQL's regular expressions are implemented + using a software package written by Henry Spencer. Much of + the description of regular expressions below is copied verbatim from his + manual. + + + + Regular expressions (REs), as defined in + POSIX 1003.2, come in two forms: + extended REs or EREs + (roughly those of egrep), and + basic REs or BREs + (roughly those of ed). + PostgreSQL supports both forms, and + also implements some extensions + that are not in the POSIX standard, but have become widely used + due to their availability in programming languages such as Perl and Tcl. + REs using these non-POSIX extensions are called + advanced REs or AREs + in this documentation. AREs are almost an exact superset of EREs, + but BREs have several notational incompatibilities (as well as being + much more limited). + We first describe the ARE and ERE forms, noting features that apply + only to AREs, and then describe how BREs differ. + + + + + PostgreSQL always initially presumes that a regular + expression follows the ARE rules. However, the more limited ERE or + BRE rules can be chosen by prepending an embedded option + to the RE pattern, as described in . + This can be useful for compatibility with applications that expect + exactly the POSIX 1003.2 rules. + + + + + A regular expression is defined as one or more + branches, separated by + |. It matches anything that matches one of the + branches. + + + + A branch is zero or more quantified atoms or + constraints, concatenated. + It matches a match for the first, followed by a match for the second, etc.; + an empty branch matches the empty string. + + + + A quantified atom is an atom possibly followed + by a single quantifier. + Without a quantifier, it matches a match for the atom. + With a quantifier, it can match some number of matches of the atom. + An atom can be any of the possibilities + shown in . + The possible quantifiers and their meanings are shown in + . + + + + A constraint matches an empty string, but matches only when + specific conditions are met. A constraint can be used where an atom + could be used, except it cannot be followed by a quantifier. + The simple constraints are shown in + ; + some more constraints are described later. + + + + + Regular Expression Atoms + + + + + Atom + Description + + + + + + (re) + (where re is any regular expression) + matches a match for + re, with the match noted for possible reporting + + + + (?:re) + as above, but the match is not noted for reporting + (a non-capturing set of parentheses) + (AREs only) + + + + . + matches any single character + + + + [chars] + a bracket expression, + matching any one of the chars (see + for more detail) + + + + \k + (where k is a non-alphanumeric character) + matches that character taken as an ordinary character, + e.g., \\ matches a backslash character + + + + \c + where c is alphanumeric + (possibly followed by other characters) + is an escape, see + (AREs only; in EREs and BREs, this matches c) + + + + { + when followed by a character other than a digit, + matches the left-brace character {; + when followed by a digit, it is the beginning of a + bound (see below) + + + + x + where x is a single character with no other + significance, matches that character + + + +
+ + + An RE cannot end with a backslash (\). + + + + + If you have turned off, + any backslashes you write in literal string constants will need to be + doubled. See for more information. + + + + + Regular Expression Quantifiers + + + + + Quantifier + Matches + + + + + + * + a sequence of 0 or more matches of the atom + + + + + + a sequence of 1 or more matches of the atom + + + + ? + a sequence of 0 or 1 matches of the atom + + + + {m} + a sequence of exactly m matches of the atom + + + + {m,} + a sequence of m or more matches of the atom + + + + + {m,n} + a sequence of m through n + (inclusive) matches of the atom; m cannot exceed + n + + + + *? + non-greedy version of * + + + + +? + non-greedy version of + + + + + ?? + non-greedy version of ? + + + + {m}? + non-greedy version of {m} + + + + {m,}? + non-greedy version of {m,} + + + + + {m,n}? + non-greedy version of {m,n} + + + +
+ + + The forms using {...} + are known as bounds. + The numbers m and n within a bound are + unsigned decimal integers with permissible values from 0 to 255 inclusive. + + + + Non-greedy quantifiers (available in AREs only) match the + same possibilities as their corresponding normal (greedy) + counterparts, but prefer the smallest number rather than the largest + number of matches. + See for more detail. + + + + + A quantifier cannot immediately follow another quantifier, e.g., + ** is invalid. + A quantifier cannot + begin an expression or subexpression or follow + ^ or |. + + + + + Regular Expression Constraints + + + + + Constraint + Description + + + + + + ^ + matches at the beginning of the string + + + + $ + matches at the end of the string + + + + (?=re) + positive lookahead matches at any point + where a substring matching re begins + (AREs only) + + + + (?!re) + negative lookahead matches at any point + where no substring matching re begins + (AREs only) + + + + (?<=re) + positive lookbehind matches at any point + where a substring matching re ends + (AREs only) + + + + (?<!re) + negative lookbehind matches at any point + where no substring matching re ends + (AREs only) + + + +
+ + + Lookahead and lookbehind constraints cannot contain back + references (see ), + and all parentheses within them are considered non-capturing. + +
+ + + Bracket Expressions + + + A bracket expression is a list of + characters enclosed in []. It normally matches + any single character from the list (but see below). If the list + begins with ^, it matches any single character + not from the rest of the list. + If two characters + in the list are separated by -, this is + shorthand for the full range of characters between those two + (inclusive) in the collating sequence, + e.g., [0-9] in ASCII matches + any decimal digit. It is illegal for two ranges to share an + endpoint, e.g., a-c-e. Ranges are very + collating-sequence-dependent, so portable programs should avoid + relying on them. + + + + To include a literal ] in the list, make it the + first character (after ^, if that is used). To + include a literal -, make it the first or last + character, or the second endpoint of a range. To use a literal + - as the first endpoint of a range, enclose it + in [. and .] to make it a + collating element (see below). With the exception of these characters, + some combinations using [ + (see next paragraphs), and escapes (AREs only), all other special + characters lose their special significance within a bracket expression. + In particular, \ is not special when following + ERE or BRE rules, though it is special (as introducing an escape) + in AREs. + + + + Within a bracket expression, a collating element (a character, a + multiple-character sequence that collates as if it were a single + character, or a collating-sequence name for either) enclosed in + [. and .] stands for the + sequence of characters of that collating element. The sequence is + treated as a single element of the bracket expression's list. This + allows a bracket + expression containing a multiple-character collating element to + match more than one character, e.g., if the collating sequence + includes a ch collating element, then the RE + [[.ch.]]*c matches the first five characters of + chchcc. + + + + + PostgreSQL currently does not support multi-character collating + elements. This information describes possible future behavior. + + + + + Within a bracket expression, a collating element enclosed in + [= and =] is an equivalence + class, standing for the sequences of characters of all collating + elements equivalent to that one, including itself. (If there are + no other equivalent collating elements, the treatment is as if the + enclosing delimiters were [. and + .].) For example, if o and + ^ are the members of an equivalence class, then + [[=o=]], [[=^=]], and + [o^] are all synonymous. An equivalence class + cannot be an endpoint of a range. + + + + Within a bracket expression, the name of a character class + enclosed in [: and :] stands + for the list of all characters belonging to that class. A character + class cannot be used as an endpoint of a range. + The POSIX standard defines these character class + names: + alnum (letters and numeric digits), + alpha (letters), + blank (space and tab), + cntrl (control characters), + digit (numeric digits), + graph (printable characters except space), + lower (lower-case letters), + print (printable characters including space), + punct (punctuation), + space (any white space), + upper (upper-case letters), + and xdigit (hexadecimal digits). + The behavior of these standard character classes is generally + consistent across platforms for characters in the 7-bit ASCII set. + Whether a given non-ASCII character is considered to belong to one + of these classes depends on the collation + that is used for the regular-expression function or operator + (see ), or by default on the + database's LC_CTYPE locale setting (see + ). The classification of non-ASCII + characters can vary across platforms even in similarly-named + locales. (But the C locale never considers any + non-ASCII characters to belong to any of these classes.) + In addition to these standard character + classes, PostgreSQL defines + the word character class, which is the same as + alnum plus the underscore (_) + character, and + the ascii character class, which contains exactly + the 7-bit ASCII set. + + + + There are two special cases of bracket expressions: the bracket + expressions [[:<:]] and + [[:>:]] are constraints, + matching empty strings at the beginning + and end of a word respectively. A word is defined as a sequence + of word characters that is neither preceded nor followed by word + characters. A word character is any character belonging to the + word character class, that is, any letter, digit, + or underscore. This is an extension, compatible with but not + specified by POSIX 1003.2, and should be used with + caution in software intended to be portable to other systems. + The constraint escapes described below are usually preferable; they + are no more standard, but are easier to type. + + + + + Regular Expression Escapes + + + Escapes are special sequences beginning with \ + followed by an alphanumeric character. Escapes come in several varieties: + character entry, class shorthands, constraint escapes, and back references. + A \ followed by an alphanumeric character but not constituting + a valid escape is illegal in AREs. + In EREs, there are no escapes: outside a bracket expression, + a \ followed by an alphanumeric character merely stands for + that character as an ordinary character, and inside a bracket expression, + \ is an ordinary character. + (The latter is the one actual incompatibility between EREs and AREs.) + + + + Character-entry escapes exist to make it easier to specify + non-printing and other inconvenient characters in REs. They are + shown in . + + + + Class-shorthand escapes provide shorthands for certain + commonly-used character classes. They are + shown in . + + + + A constraint escape is a constraint, + matching the empty string if specific conditions are met, + written as an escape. They are + shown in . + + + + A back reference (\n) matches the + same string matched by the previous parenthesized subexpression specified + by the number n + (see ). For example, + ([bc])\1 matches bb or cc + but not bc or cb. + The subexpression must entirely precede the back reference in the RE. + Subexpressions are numbered in the order of their leading parentheses. + Non-capturing parentheses do not define subexpressions. + The back reference considers only the string characters matched by the + referenced subexpression, not any constraints contained in it. For + example, (^\d)\1 will match 22. + + + + Regular Expression Character-Entry Escapes + + + + + Escape + Description + + + + + + \a + alert (bell) character, as in C + + + + \b + backspace, as in C + + + + \B + synonym for backslash (\) to help reduce the need for backslash + doubling + + + + \cX + (where X is any character) the character whose + low-order 5 bits are the same as those of + X, and whose other bits are all zero + + + + \e + the character whose collating-sequence name + is ESC, + or failing that, the character with octal value 033 + + + + \f + form feed, as in C + + + + \n + newline, as in C + + + + \r + carriage return, as in C + + + + \t + horizontal tab, as in C + + + + \uwxyz + (where wxyz is exactly four hexadecimal digits) + the character whose hexadecimal value is + 0xwxyz + + + + + \Ustuvwxyz + (where stuvwxyz is exactly eight hexadecimal + digits) + the character whose hexadecimal value is + 0xstuvwxyz + + + + + \v + vertical tab, as in C + + + + \xhhh + (where hhh is any sequence of hexadecimal + digits) + the character whose hexadecimal value is + 0xhhh + (a single character no matter how many hexadecimal digits are used) + + + + + \0 + the character whose value is 0 (the null byte) + + + + \xy + (where xy is exactly two octal digits, + and is not a back reference) + the character whose octal value is + 0xy + + + + \xyz + (where xyz is exactly three octal digits, + and is not a back reference) + the character whose octal value is + 0xyz + + + +
+ + + Hexadecimal digits are 0-9, + a-f, and A-F. + Octal digits are 0-7. + + + + Numeric character-entry escapes specifying values outside the ASCII range + (0–127) have meanings dependent on the database encoding. When the + encoding is UTF-8, escape values are equivalent to Unicode code points, + for example \u1234 means the character U+1234. + For other multibyte encodings, character-entry escapes usually just + specify the concatenation of the byte values for the character. If the + escape value does not correspond to any legal character in the database + encoding, no error will be raised, but it will never match any data. + + + + The character-entry escapes are always taken as ordinary characters. + For example, \135 is ] in ASCII, but + \135 does not terminate a bracket expression. + + + + Regular Expression Class-Shorthand Escapes + + + + + Escape + Description + + + + + + \d + matches any digit, like + [[:digit:]] + + + + \s + matches any whitespace character, like + [[:space:]] + + + + \w + matches any word character, like + [[:word:]] + + + + \D + matches any non-digit, like + [^[:digit:]] + + + + \S + matches any non-whitespace character, like + [^[:space:]] + + + + \W + matches any non-word character, like + [^[:word:]] + + + +
+ + + The class-shorthand escapes also work within bracket expressions, + although the definitions shown above are not quite syntactically + valid in that context. + For example, [a-c\d] is equivalent to + [a-c[:digit:]]. + + + + Regular Expression Constraint Escapes + + + + + Escape + Description + + + + + + \A + matches only at the beginning of the string + (see for how this differs from + ^) + + + + \m + matches only at the beginning of a word + + + + \M + matches only at the end of a word + + + + \y + matches only at the beginning or end of a word + + + + \Y + matches only at a point that is not the beginning or end of a + word + + + + \Z + matches only at the end of the string + (see for how this differs from + $) + + + +
+ + + A word is defined as in the specification of + [[:<:]] and [[:>:]] above. + Constraint escapes are illegal within bracket expressions. + + + + Regular Expression Back References + + + + + Escape + Description + + + + + + \m + (where m is a nonzero digit) + a back reference to the m'th subexpression + + + + \mnn + (where m is a nonzero digit, and + nn is some more digits, and the decimal value + mnn is not greater than the number of closing capturing + parentheses seen so far) + a back reference to the mnn'th subexpression + + + +
+ + + + There is an inherent ambiguity between octal character-entry + escapes and back references, which is resolved by the following heuristics, + as hinted at above. + A leading zero always indicates an octal escape. + A single non-zero digit, not followed by another digit, + is always taken as a back reference. + A multi-digit sequence not starting with a zero is taken as a back + reference if it comes after a suitable subexpression + (i.e., the number is in the legal range for a back reference), + and otherwise is taken as octal. + + +
+ + + Regular Expression Metasyntax + + + In addition to the main syntax described above, there are some special + forms and miscellaneous syntactic facilities available. + + + + An RE can begin with one of two special director prefixes. + If an RE begins with ***:, + the rest of the RE is taken as an ARE. (This normally has no effect in + PostgreSQL, since REs are assumed to be AREs; + but it does have an effect if ERE or BRE mode had been specified by + the flags parameter to a regex function.) + If an RE begins with ***=, + the rest of the RE is taken to be a literal string, + with all characters considered ordinary characters. + + + + An ARE can begin with embedded options: + a sequence (?xyz) + (where xyz is one or more alphabetic characters) + specifies options affecting the rest of the RE. + These options override any previously determined options — + in particular, they can override the case-sensitivity behavior implied by + a regex operator, or the flags parameter to a regex + function. + The available option letters are + shown in . + Note that these same option letters are used in the flags + parameters of regex functions. + + + + ARE Embedded-Option Letters + + + + + Option + Description + + + + + + b + rest of RE is a BRE + + + + c + case-sensitive matching (overrides operator type) + + + + e + rest of RE is an ERE + + + + i + case-insensitive matching (see + ) (overrides operator type) + + + + m + historical synonym for n + + + + n + newline-sensitive matching (see + ) + + + + p + partial newline-sensitive matching (see + ) + + + + q + rest of RE is a literal (quoted) string, all ordinary + characters + + + + s + non-newline-sensitive matching (default) + + + + t + tight syntax (default; see below) + + + + w + inverse partial newline-sensitive (weird) matching + (see ) + + + + x + expanded syntax (see below) + + + +
+ + + Embedded options take effect at the ) terminating the sequence. + They can appear only at the start of an ARE (after the + ***: director if any). + + + + In addition to the usual (tight) RE syntax, in which all + characters are significant, there is an expanded syntax, + available by specifying the embedded x option. + In the expanded syntax, + white-space characters in the RE are ignored, as are + all characters between a # + and the following newline (or the end of the RE). This + permits paragraphing and commenting a complex RE. + There are three exceptions to that basic rule: + + + + + a white-space character or # preceded by \ is + retained + + + + + white space or # within a bracket expression is retained + + + + + white space and comments cannot appear within multi-character symbols, + such as (?: + + + + + For this purpose, white-space characters are blank, tab, newline, and + any character that belongs to the space character class. + + + + Finally, in an ARE, outside bracket expressions, the sequence + (?#ttt) + (where ttt is any text not containing a )) + is a comment, completely ignored. + Again, this is not allowed between the characters of + multi-character symbols, like (?:. + Such comments are more a historical artifact than a useful facility, + and their use is deprecated; use the expanded syntax instead. + + + + None of these metasyntax extensions is available if + an initial ***= director + has specified that the user's input be treated as a literal string + rather than as an RE. + +
+ + + Regular Expression Matching Rules + + + In the event that an RE could match more than one substring of a given + string, the RE matches the one starting earliest in the string. + If the RE could match more than one substring starting at that point, + either the longest possible match or the shortest possible match will + be taken, depending on whether the RE is greedy or + non-greedy. + + + + Whether an RE is greedy or not is determined by the following rules: + + + + Most atoms, and all constraints, have no greediness attribute (because + they cannot match variable amounts of text anyway). + + + + + Adding parentheses around an RE does not change its greediness. + + + + + A quantified atom with a fixed-repetition quantifier + ({m} + or + {m}?) + has the same greediness (possibly none) as the atom itself. + + + + + A quantified atom with other normal quantifiers (including + {m,n} + with m equal to n) + is greedy (prefers longest match). + + + + + A quantified atom with a non-greedy quantifier (including + {m,n}? + with m equal to n) + is non-greedy (prefers shortest match). + + + + + A branch — that is, an RE that has no top-level + | operator — has the same greediness as the first + quantified atom in it that has a greediness attribute. + + + + + An RE consisting of two or more branches connected by the + | operator is always greedy. + + + + + + + The above rules associate greediness attributes not only with individual + quantified atoms, but with branches and entire REs that contain quantified + atoms. What that means is that the matching is done in such a way that + the branch, or whole RE, matches the longest or shortest possible + substring as a whole. Once the length of the entire match + is determined, the part of it that matches any particular subexpression + is determined on the basis of the greediness attribute of that + subexpression, with subexpressions starting earlier in the RE taking + priority over ones starting later. + + + + An example of what this means: + +SELECT SUBSTRING('XY1234Z', 'Y*([0-9]{1,3})'); +Result: 123 +SELECT SUBSTRING('XY1234Z', 'Y*?([0-9]{1,3})'); +Result: 1 + + In the first case, the RE as a whole is greedy because Y* + is greedy. It can match beginning at the Y, and it matches + the longest possible string starting there, i.e., Y123. + The output is the parenthesized part of that, or 123. + In the second case, the RE as a whole is non-greedy because Y*? + is non-greedy. It can match beginning at the Y, and it matches + the shortest possible string starting there, i.e., Y1. + The subexpression [0-9]{1,3} is greedy but it cannot change + the decision as to the overall match length; so it is forced to match + just 1. + + + + In short, when an RE contains both greedy and non-greedy subexpressions, + the total match length is either as long as possible or as short as + possible, according to the attribute assigned to the whole RE. The + attributes assigned to the subexpressions only affect how much of that + match they are allowed to eat relative to each other. + + + + The quantifiers {1,1} and {1,1}? + can be used to force greediness or non-greediness, respectively, + on a subexpression or a whole RE. + This is useful when you need the whole RE to have a greediness attribute + different from what's deduced from its elements. As an example, + suppose that we are trying to separate a string containing some digits + into the digits and the parts before and after them. We might try to + do that like this: + +SELECT regexp_match('abc01234xyz', '(.*)(\d+)(.*)'); +Result: {abc0123,4,xyz} + + That didn't work: the first .* is greedy so + it eats as much as it can, leaving the \d+ to + match at the last possible place, the last digit. We might try to fix + that by making it non-greedy: + +SELECT regexp_match('abc01234xyz', '(.*?)(\d+)(.*)'); +Result: {abc,0,""} + + That didn't work either, because now the RE as a whole is non-greedy + and so it ends the overall match as soon as possible. We can get what + we want by forcing the RE as a whole to be greedy: + +SELECT regexp_match('abc01234xyz', '(?:(.*?)(\d+)(.*)){1,1}'); +Result: {abc,01234,xyz} + + Controlling the RE's overall greediness separately from its components' + greediness allows great flexibility in handling variable-length patterns. + + + + When deciding what is a longer or shorter match, + match lengths are measured in characters, not collating elements. + An empty string is considered longer than no match at all. + For example: + bb* + matches the three middle characters of abbbc; + (week|wee)(night|knights) + matches all ten characters of weeknights; + when (.*).* + is matched against abc the parenthesized subexpression + matches all three characters; and when + (a*)* is matched against bc + both the whole RE and the parenthesized + subexpression match an empty string. + + + + If case-independent matching is specified, + the effect is much as if all case distinctions had vanished from the + alphabet. + When an alphabetic that exists in multiple cases appears as an + ordinary character outside a bracket expression, it is effectively + transformed into a bracket expression containing both cases, + e.g., x becomes [xX]. + When it appears inside a bracket expression, all case counterparts + of it are added to the bracket expression, e.g., + [x] becomes [xX] + and [^x] becomes [^xX]. + + + + If newline-sensitive matching is specified, . + and bracket expressions using ^ + will never match the newline character + (so that matches will not cross lines unless the RE + explicitly includes a newline) + and ^ and $ + will match the empty string after and before a newline + respectively, in addition to matching at beginning and end of string + respectively. + But the ARE escapes \A and \Z + continue to match beginning or end of string only. + Also, the character class shorthands \D + and \W will match a newline regardless of this mode. + (Before PostgreSQL 14, they did not match + newlines when in newline-sensitive mode. + Write [^[:digit:]] + or [^[:word:]] to get the old behavior.) + + + + If partial newline-sensitive matching is specified, + this affects . and bracket expressions + as with newline-sensitive matching, but not ^ + and $. + + + + If inverse partial newline-sensitive matching is specified, + this affects ^ and $ + as with newline-sensitive matching, but not . + and bracket expressions. + This isn't very useful but is provided for symmetry. + + + + + Limits and Compatibility + + + No particular limit is imposed on the length of REs in this + implementation. However, + programs intended to be highly portable should not employ REs longer + than 256 bytes, + as a POSIX-compliant implementation can refuse to accept such REs. + + + + The only feature of AREs that is actually incompatible with + POSIX EREs is that \ does not lose its special + significance inside bracket expressions. + All other ARE features use syntax which is illegal or has + undefined or unspecified effects in POSIX EREs; + the *** syntax of directors likewise is outside the POSIX + syntax for both BREs and EREs. + + + + Many of the ARE extensions are borrowed from Perl, but some have + been changed to clean them up, and a few Perl extensions are not present. + Incompatibilities of note include \b, \B, + the lack of special treatment for a trailing newline, + the addition of complemented bracket expressions to the things + affected by newline-sensitive matching, + the restrictions on parentheses and back references in lookahead/lookbehind + constraints, and the longest/shortest-match (rather than first-match) + matching semantics. + + + + + Basic Regular Expressions + + + BREs differ from EREs in several respects. + In BREs, |, +, and ? + are ordinary characters and there is no equivalent + for their functionality. + The delimiters for bounds are + \{ and \}, + with { and } + by themselves ordinary characters. + The parentheses for nested subexpressions are + \( and \), + with ( and ) by themselves ordinary characters. + ^ is an ordinary character except at the beginning of the + RE or the beginning of a parenthesized subexpression, + $ is an ordinary character except at the end of the + RE or the end of a parenthesized subexpression, + and * is an ordinary character if it appears at the beginning + of the RE or the beginning of a parenthesized subexpression + (after a possible leading ^). + Finally, single-digit back references are available, and + \< and \> + are synonyms for + [[:<:]] and [[:>:]] + respectively; no other escapes are available in BREs. + + + + + + + Differences from SQL Standard and XQuery + + + LIKE_REGEX + + + + OCCURRENCES_REGEX + + + + POSITION_REGEX + + + + SUBSTRING_REGEX + + + + TRANSLATE_REGEX + + + + XQuery regular expressions + + + + Since SQL:2008, the SQL standard includes regular expression operators + and functions that performs pattern + matching according to the XQuery regular expression + standard: + + LIKE_REGEX + OCCURRENCES_REGEX + POSITION_REGEX + SUBSTRING_REGEX + TRANSLATE_REGEX + + PostgreSQL does not currently implement these + operators and functions. You can get approximately equivalent + functionality in each case as shown in . (Various optional clauses on + both sides have been omitted in this table.) + + + + Regular Expression Functions Equivalencies + + + + + SQL standard + PostgreSQL + + + + + + string LIKE_REGEX pattern + regexp_like(string, pattern) or string ~ pattern + + + + OCCURRENCES_REGEX(pattern IN string) + regexp_count(string, pattern) + + + + POSITION_REGEX(pattern IN string) + regexp_instr(string, pattern) + + + + SUBSTRING_REGEX(pattern IN string) + regexp_substr(string, pattern) + + + + TRANSLATE_REGEX(pattern IN string WITH replacement) + regexp_replace(string, pattern, replacement) + + + +
+ + + Regular expression functions similar to those provided by PostgreSQL are + also available in a number of other SQL implementations, whereas the + SQL-standard functions are not as widely implemented. Some of the + details of the regular expression syntax will likely differ in each + implementation. + + + + The SQL-standard operators and functions use XQuery regular expressions, + which are quite close to the ARE syntax described above. + Notable differences between the existing POSIX-based + regular-expression feature and XQuery regular expressions include: + + + + + XQuery character class subtraction is not supported. An example of + this feature is using the following to match only English + consonants: [a-z-[aeiou]]. + + + + + XQuery character class shorthands \c, + \C, \i, + and \I are not supported. + + + + + XQuery character class elements + using \p{UnicodeProperty} or the + inverse \P{UnicodeProperty} are not supported. + + + + + POSIX interprets character classes such as \w + (see ) + according to the prevailing locale (which you can control by + attaching a COLLATE clause to the operator or + function). XQuery specifies these classes by reference to Unicode + character properties, so equivalent behavior is obtained only with + a locale that follows the Unicode rules. + + + + + The SQL standard (not XQuery itself) attempts to cater for more + variants of newline than POSIX does. The + newline-sensitive matching options described above consider only + ASCII NL (\n) to be a newline, but SQL would have + us treat CR (\r), CRLF (\r\n) + (a Windows-style newline), and some Unicode-only characters like + LINE SEPARATOR (U+2028) as newlines as well. + Notably, . and \s should + count \r\n as one character not two according to + SQL. + + + + + Of the character-entry escapes described in + , + XQuery supports only \n, \r, + and \t. + + + + + XQuery does not support + the [:name:] syntax + for character classes within bracket expressions. + + + + + XQuery does not have lookahead or lookbehind constraints, + nor any of the constraint escapes described in + . + + + + + The metasyntax forms described in + do not exist in XQuery. + + + + + The regular expression flag letters defined by XQuery are + related to but not the same as the option letters for POSIX + (). While the + i and q options behave the + same, others do not: + + + + XQuery's s (allow dot to match newline) + and m (allow ^ + and $ to match at newlines) flags provide + access to the same behaviors as + POSIX's n, p + and w flags, but they + do not match the behavior of + POSIX's s and m flags. + Note in particular that dot-matches-newline is the default + behavior in POSIX but not XQuery. + + + + + XQuery's x (ignore whitespace in pattern) flag + is noticeably different from POSIX's expanded-mode flag. + POSIX's x flag also + allows # to begin a comment in the pattern, + and POSIX will not ignore a whitespace character after a + backslash. + + + + + + + + +
+
+
diff --git a/doc/src/sgml/func/func-math.sgml b/doc/src/sgml/func/func-math.sgml new file mode 100644 index 0000000000000..7528dc4cea4b9 --- /dev/null +++ b/doc/src/sgml/func/func-math.sgml @@ -0,0 +1,1615 @@ + + Mathematical Functions and Operators + + + Mathematical operators are provided for many + PostgreSQL types. For types without + standard mathematical conventions + (e.g., date/time types) we + describe the actual behavior in subsequent sections. + + + + shows the mathematical + operators that are available for the standard numeric types. + Unless otherwise noted, operators shown as + accepting numeric_type are available for all + the types smallint, integer, + bigint, numeric, real, + and double precision. + Operators shown as accepting integral_type + are available for the types smallint, integer, + and bigint. + Except where noted, each form of an operator returns the same data type + as its argument(s). Calls involving multiple argument data types, such + as integer + numeric, + are resolved by using the type appearing later in these lists. + + + + Mathematical Operators + + + + + + Operator + + + Description + + + Example(s) + + + + + + + + numeric_type + numeric_type + numeric_type + + + Addition + + + 2 + 3 + 5 + + + + + + + numeric_type + numeric_type + + + Unary plus (no operation) + + + + 3.5 + 3.5 + + + + + + numeric_type - numeric_type + numeric_type + + + Subtraction + + + 2 - 3 + -1 + + + + + + - numeric_type + numeric_type + + + Negation + + + - (-4) + 4 + + + + + + numeric_type * numeric_type + numeric_type + + + Multiplication + + + 2 * 3 + 6 + + + + + + numeric_type / numeric_type + numeric_type + + + Division (for integral types, division truncates the result towards + zero) + + + 5.0 / 2 + 2.5000000000000000 + + + 5 / 2 + 2 + + + (-5) / 2 + -2 + + + + + + numeric_type % numeric_type + numeric_type + + + Modulo (remainder); available for smallint, + integer, bigint, and numeric + + + 5 % 4 + 1 + + + + + + numeric ^ numeric + numeric + + + double precision ^ double precision + double precision + + + Exponentiation + + + 2 ^ 3 + 8 + + + Unlike typical mathematical practice, multiple uses of + ^ will associate left to right by default: + + + 2 ^ 3 ^ 3 + 512 + + + 2 ^ (3 ^ 3) + 134217728 + + + + + + |/ double precision + double precision + + + Square root + + + |/ 25.0 + 5 + + + + + + ||/ double precision + double precision + + + Cube root + + + ||/ 64.0 + 4 + + + + + + @ numeric_type + numeric_type + + + Absolute value + + + @ -5.0 + 5.0 + + + + + + integral_type & integral_type + integral_type + + + Bitwise AND + + + 91 & 15 + 11 + + + + + + integral_type | integral_type + integral_type + + + Bitwise OR + + + 32 | 3 + 35 + + + + + + integral_type # integral_type + integral_type + + + Bitwise exclusive OR + + + 17 # 5 + 20 + + + + + + ~ integral_type + integral_type + + + Bitwise NOT + + + ~1 + -2 + + + + + + integral_type << integer + integral_type + + + Bitwise shift left + + + 1 << 4 + 16 + + + + + + integral_type >> integer + integral_type + + + Bitwise shift right + + + 8 >> 2 + 2 + + + + + +
+ + + shows the available + mathematical functions. + Many of these functions are provided in multiple forms with different + argument types. + Except where noted, any given form of a function returns the same + data type as its argument(s); cross-type cases are resolved in the + same way as explained above for operators. + The functions working with double precision data are mostly + implemented on top of the host system's C library; accuracy and behavior in + boundary cases can therefore vary depending on the host system. + + + + Mathematical Functions + + + + + Function + + + Description + + + Example(s) + + + + + + + + + abs + + abs ( numeric_type ) + numeric_type + + + Absolute value + + + abs(-17.4) + 17.4 + + + + + + + cbrt + + cbrt ( double precision ) + double precision + + + Cube root + + + cbrt(64.0) + 4 + + + + + + + ceil + + ceil ( numeric ) + numeric + + + ceil ( double precision ) + double precision + + + Nearest integer greater than or equal to argument + + + ceil(42.2) + 43 + + + ceil(-42.8) + -42 + + + + + + + ceiling + + ceiling ( numeric ) + numeric + + + ceiling ( double precision ) + double precision + + + Nearest integer greater than or equal to argument (same + as ceil) + + + ceiling(95.3) + 96 + + + + + + + degrees + + degrees ( double precision ) + double precision + + + Converts radians to degrees + + + degrees(0.5) + 28.64788975654116 + + + + + + + div + + div ( y numeric, + x numeric ) + numeric + + + Integer quotient of y/x + (truncates towards zero) + + + div(9, 4) + 2 + + + + + + + erf + + erf ( double precision ) + double precision + + + Error function + + + erf(1.0) + 0.8427007929497149 + + + + + + + erfc + + erfc ( double precision ) + double precision + + + Complementary error function (1 - erf(x), without + loss of precision for large inputs) + + + erfc(1.0) + 0.15729920705028513 + + + + + + + exp + + exp ( numeric ) + numeric + + + exp ( double precision ) + double precision + + + Exponential (e raised to the given power) + + + exp(1.0) + 2.7182818284590452 + + + + + + + factorial + + factorial ( bigint ) + numeric + + + Factorial + + + factorial(5) + 120 + + + + + + + floor + + floor ( numeric ) + numeric + + + floor ( double precision ) + double precision + + + Nearest integer less than or equal to argument + + + floor(42.8) + 42 + + + floor(-42.8) + -43 + + + + + + + gamma + + gamma ( double precision ) + double precision + + + Gamma function + + + gamma(0.5) + 1.772453850905516 + + + gamma(6) + 120 + + + + + + + gcd + + gcd ( numeric_type, numeric_type ) + numeric_type + + + Greatest common divisor (the largest positive number that divides both + inputs with no remainder); returns 0 if both inputs + are zero; available for integer, bigint, + and numeric + + + gcd(1071, 462) + 21 + + + + + + + lcm + + lcm ( numeric_type, numeric_type ) + numeric_type + + + Least common multiple (the smallest strictly positive number that is + an integral multiple of both inputs); returns 0 if + either input is zero; available for integer, + bigint, and numeric + + + lcm(1071, 462) + 23562 + + + + + + + lgamma + + lgamma ( double precision ) + double precision + + + Natural logarithm of the absolute value of the gamma function + + + lgamma(1000) + 5905.220423209181 + + + + + + + ln + + ln ( numeric ) + numeric + + + ln ( double precision ) + double precision + + + Natural logarithm + + + ln(2.0) + 0.6931471805599453 + + + + + + + log + + log ( numeric ) + numeric + + + log ( double precision ) + double precision + + + Base 10 logarithm + + + log(100) + 2 + + + + + + + log10 + + log10 ( numeric ) + numeric + + + log10 ( double precision ) + double precision + + + Base 10 logarithm (same as log) + + + log10(1000) + 3 + + + + + + log ( b numeric, + x numeric ) + numeric + + + Logarithm of x to base b + + + log(2.0, 64.0) + 6.0000000000000000 + + + + + + + min_scale + + min_scale ( numeric ) + integer + + + Minimum scale (number of fractional decimal digits) needed + to represent the supplied value precisely + + + min_scale(8.4100) + 2 + + + + + + + mod + + mod ( y numeric_type, + x numeric_type ) + numeric_type + + + Remainder of y/x; + available for smallint, integer, + bigint, and numeric + + + mod(9, 4) + 1 + + + + + + + pi + + pi ( ) + double precision + + + Approximate value of π + + + pi() + 3.141592653589793 + + + + + + + power + + power ( a numeric, + b numeric ) + numeric + + + power ( a double precision, + b double precision ) + double precision + + + a raised to the power of b + + + power(9, 3) + 729 + + + + + + + radians + + radians ( double precision ) + double precision + + + Converts degrees to radians + + + radians(45.0) + 0.7853981633974483 + + + + + + + round + + round ( numeric ) + numeric + + + round ( double precision ) + double precision + + + Rounds to nearest integer. For numeric, ties are + broken by rounding away from zero. For double precision, + the tie-breaking behavior is platform dependent, but + round to nearest even is the most common rule. + + + round(42.4) + 42 + + + + + + round ( v numeric, s integer ) + numeric + + + Rounds v to s decimal + places. Ties are broken by rounding away from zero. + + + round(42.4382, 2) + 42.44 + + + round(1234.56, -1) + 1230 + + + + + + + scale + + scale ( numeric ) + integer + + + Scale of the argument (the number of decimal digits in the fractional part) + + + scale(8.4100) + 4 + + + + + + + sign + + sign ( numeric ) + numeric + + + sign ( double precision ) + double precision + + + Sign of the argument (-1, 0, or +1) + + + sign(-8.4) + -1 + + + + + + + sqrt + + sqrt ( numeric ) + numeric + + + sqrt ( double precision ) + double precision + + + Square root + + + sqrt(2) + 1.4142135623730951 + + + + + + + trim_scale + + trim_scale ( numeric ) + numeric + + + Reduces the value's scale (number of fractional decimal digits) by + removing trailing zeroes + + + trim_scale(8.4100) + 8.41 + + + + + + + trunc + + trunc ( numeric ) + numeric + + + trunc ( double precision ) + double precision + + + Truncates to integer (towards zero) + + + trunc(42.8) + 42 + + + trunc(-42.8) + -42 + + + + + + trunc ( v numeric, s integer ) + numeric + + + Truncates v to s + decimal places + + + trunc(42.4382, 2) + 42.43 + + + + + + + width_bucket + + width_bucket ( operand numeric, low numeric, high numeric, count integer ) + integer + + + width_bucket ( operand double precision, low double precision, high double precision, count integer ) + integer + + + Returns the number of the bucket in + which operand falls in a histogram + having count equal-width buckets spanning the + range low to high. + The buckets have inclusive lower bounds and exclusive upper bounds. + Returns 0 for an input less + than low, + or count+1 for an input + greater than or equal to high. + If low > high, + the behavior is mirror-reversed, with bucket 1 + now being the one just below low, and the + inclusive bounds now being on the upper side. + + + width_bucket(5.35, 0.024, 10.06, 5) + 3 + + + width_bucket(9, 10, 0, 10) + 2 + + + + + + width_bucket ( operand anycompatible, thresholds anycompatiblearray ) + integer + + + Returns the number of the bucket in + which operand falls given an array listing the + inclusive lower bounds of the buckets. + Returns 0 for an input less than the first lower + bound. operand and the array elements can be + of any type having standard comparison operators. + The thresholds array must be + sorted, smallest first, or unexpected results will be + obtained. + + + width_bucket(now(), array['yesterday', 'today', 'tomorrow']::timestamptz[]) + 2 + + + + +
+ + + shows functions for + generating random numbers. + + + + Random Functions + + + + + + Function + + + Description + + + Example(s) + + + + + + + + + random + + random ( ) + double precision + + + Returns a random value in the range 0.0 <= x < 1.0 + + + random() + 0.897124072839091 + + + + + + + random + + random ( min integer, max integer ) + integer + + + random ( min bigint, max bigint ) + bigint + + + random ( min numeric, max numeric ) + numeric + + + Returns a random value in the range + min <= x <= max. + For type numeric, the result will have the same number of + fractional decimal digits as min or + max, whichever has more. + + + random(1, 10) + 7 + + + random(-0.499, 0.499) + 0.347 + + + + + + + random_normal + + + random_normal ( + mean double precision + , stddev double precision ) + double precision + + + Returns a random value from the normal distribution with the given + parameters; mean defaults to 0.0 + and stddev defaults to 1.0 + + + random_normal(0.0, 1.0) + 0.051285419 + + + + + + + setseed + + setseed ( double precision ) + void + + + Sets the seed for subsequent random() and + random_normal() calls; + argument must be between -1.0 and 1.0, inclusive + + + setseed(0.12345) + + + + +
+ + + The random() and random_normal() + functions listed in use a + deterministic pseudo-random number generator. + It is fast but not suitable for cryptographic + applications; see the module for a more + secure alternative. + If setseed() is called, the series of results of + subsequent calls to these functions in the current session + can be repeated by re-issuing setseed() with the same + argument. + Without any prior setseed() call in the same + session, the first call to any of these functions obtains a seed + from a platform-dependent source of random bits. + + + + shows the + available trigonometric functions. Each of these functions comes in + two variants, one that measures angles in radians and one that + measures angles in degrees. + + + + Trigonometric Functions + + + + + + Function + + + Description + + + Example(s) + + + + + + + + + acos + + acos ( double precision ) + double precision + + + Inverse cosine, result in radians + + + acos(1) + 0 + + + + + + + acosd + + acosd ( double precision ) + double precision + + + Inverse cosine, result in degrees + + + acosd(0.5) + 60 + + + + + + + asin + + asin ( double precision ) + double precision + + + Inverse sine, result in radians + + + asin(1) + 1.5707963267948966 + + + + + + + asind + + asind ( double precision ) + double precision + + + Inverse sine, result in degrees + + + asind(0.5) + 30 + + + + + + + atan + + atan ( double precision ) + double precision + + + Inverse tangent, result in radians + + + atan(1) + 0.7853981633974483 + + + + + + + atand + + atand ( double precision ) + double precision + + + Inverse tangent, result in degrees + + + atand(1) + 45 + + + + + + + atan2 + + atan2 ( y double precision, + x double precision ) + double precision + + + Inverse tangent of + y/x, + result in radians + + + atan2(1, 0) + 1.5707963267948966 + + + + + + + atan2d + + atan2d ( y double precision, + x double precision ) + double precision + + + Inverse tangent of + y/x, + result in degrees + + + atan2d(1, 0) + 90 + + + + + + + cos + + cos ( double precision ) + double precision + + + Cosine, argument in radians + + + cos(0) + 1 + + + + + + + cosd + + cosd ( double precision ) + double precision + + + Cosine, argument in degrees + + + cosd(60) + 0.5 + + + + + + + cot + + cot ( double precision ) + double precision + + + Cotangent, argument in radians + + + cot(0.5) + 1.830487721712452 + + + + + + + cotd + + cotd ( double precision ) + double precision + + + Cotangent, argument in degrees + + + cotd(45) + 1 + + + + + + + sin + + sin ( double precision ) + double precision + + + Sine, argument in radians + + + sin(1) + 0.8414709848078965 + + + + + + + sind + + sind ( double precision ) + double precision + + + Sine, argument in degrees + + + sind(30) + 0.5 + + + + + + + tan + + tan ( double precision ) + double precision + + + Tangent, argument in radians + + + tan(1) + 1.5574077246549023 + + + + + + + tand + + tand ( double precision ) + double precision + + + Tangent, argument in degrees + + + tand(45) + 1 + + + + +
+ + + + Another way to work with angles measured in degrees is to use the unit + transformation functions radians() + and degrees() shown earlier. + However, using the degree-based trigonometric functions is preferred, + as that way avoids round-off error for special cases such + as sind(30). + + + + + shows the + available hyperbolic functions. + + + + Hyperbolic Functions + + + + + + Function + + + Description + + + Example(s) + + + + + + + + + sinh + + sinh ( double precision ) + double precision + + + Hyperbolic sine + + + sinh(1) + 1.1752011936438014 + + + + + + + cosh + + cosh ( double precision ) + double precision + + + Hyperbolic cosine + + + cosh(0) + 1 + + + + + + + tanh + + tanh ( double precision ) + double precision + + + Hyperbolic tangent + + + tanh(1) + 0.7615941559557649 + + + + + + + asinh + + asinh ( double precision ) + double precision + + + Inverse hyperbolic sine + + + asinh(1) + 0.881373587019543 + + + + + + + acosh + + acosh ( double precision ) + double precision + + + Inverse hyperbolic cosine + + + acosh(1) + 0 + + + + + + + atanh + + atanh ( double precision ) + double precision + + + Inverse hyperbolic tangent + + + atanh(0.5) + 0.5493061443340548 + + + + +
+ +
diff --git a/doc/src/sgml/func/func-merge-support.sgml b/doc/src/sgml/func/func-merge-support.sgml new file mode 100644 index 0000000000000..7f084271c13ae --- /dev/null +++ b/doc/src/sgml/func/func-merge-support.sgml @@ -0,0 +1,78 @@ + + Merge Support Functions + + + MERGE + RETURNING + + + + PostgreSQL includes one merge support function + that may be used in the RETURNING list of a + command to identify the action taken for each + row; see . + + + + Merge Support Functions + + + + + + Function + + + Description + + + + + + + + + merge_action + + merge_action ( ) + text + + + Returns the merge action command executed for the current row. This + will be 'INSERT', 'UPDATE', or + 'DELETE'. + + + + +
+ + + Example: + 0 THEN + UPDATE SET in_stock = true, quantity = s.quantity + WHEN MATCHED THEN + UPDATE SET in_stock = false, quantity = 0 + WHEN NOT MATCHED THEN + INSERT (product_id, in_stock, quantity) + VALUES (s.product_id, true, s.quantity) + RETURNING merge_action(), p.*; + + merge_action | product_id | in_stock | quantity +--------------+------------+----------+---------- + UPDATE | 1001 | t | 50 + UPDATE | 1002 | f | 0 + INSERT | 1003 | t | 10 +]]> + + + + Note that this function can only be used in the RETURNING + list of a MERGE command. It is an error to use it in any + other part of a query. + + +
diff --git a/doc/src/sgml/func/func-net.sgml b/doc/src/sgml/func/func-net.sgml new file mode 100644 index 0000000000000..1361a44c19767 --- /dev/null +++ b/doc/src/sgml/func/func-net.sgml @@ -0,0 +1,592 @@ + + Network Address Functions and Operators + + + The IP network address types, cidr and inet, + support the usual comparison operators shown in + + as well as the specialized operators and functions shown in + and + . + + + + Any cidr value can be cast to inet implicitly; + therefore, the operators and functions shown below as operating on + inet also work on cidr values. (Where there are + separate functions for inet and cidr, it is + because the behavior should be different for the two cases.) + Also, it is permitted to cast an inet value + to cidr. When this is done, any bits to the right of the + netmask are silently zeroed to create a valid cidr value. + + + + IP Address Operators + + + + + Operator + + + Description + + + Example(s) + + + + + + + + inet << inet + boolean + + + Is subnet strictly contained by subnet? + This operator, and the next four, test for subnet inclusion. They + consider only the network parts of the two addresses (ignoring any + bits to the right of the netmasks) and determine whether one network + is identical to or a subnet of the other. + + + inet '192.168.1.5' << inet '192.168.1/24' + t + + + inet '192.168.0.5' << inet '192.168.1/24' + f + + + inet '192.168.1/24' << inet '192.168.1/24' + f + + + + + + inet <<= inet + boolean + + + Is subnet contained by or equal to subnet? + + + inet '192.168.1/24' <<= inet '192.168.1/24' + t + + + + + + inet >> inet + boolean + + + Does subnet strictly contain subnet? + + + inet '192.168.1/24' >> inet '192.168.1.5' + t + + + + + + inet >>= inet + boolean + + + Does subnet contain or equal subnet? + + + inet '192.168.1/24' >>= inet '192.168.1/24' + t + + + + + + inet && inet + boolean + + + Does either subnet contain or equal the other? + + + inet '192.168.1/24' && inet '192.168.1.80/28' + t + + + inet '192.168.1/24' && inet '192.168.2.0/28' + f + + + + + + ~ inet + inet + + + Computes bitwise NOT. + + + ~ inet '192.168.1.6' + 63.87.254.249 + + + + + + inet & inet + inet + + + Computes bitwise AND. + + + inet '192.168.1.6' & inet '0.0.0.255' + 0.0.0.6 + + + + + + inet | inet + inet + + + Computes bitwise OR. + + + inet '192.168.1.6' | inet '0.0.0.255' + 192.168.1.255 + + + + + + inet + bigint + inet + + + Adds an offset to an address. + + + inet '192.168.1.6' + 25 + 192.168.1.31 + + + + + + bigint + inet + inet + + + Adds an offset to an address. + + + 200 + inet '::ffff:fff0:1' + ::ffff:255.240.0.201 + + + + + + inet - bigint + inet + + + Subtracts an offset from an address. + + + inet '192.168.1.43' - 36 + 192.168.1.7 + + + + + + inet - inet + bigint + + + Computes the difference of two addresses. + + + inet '192.168.1.43' - inet '192.168.1.19' + 24 + + + inet '::1' - inet '::ffff:1' + -4294901760 + + + + +
+ + + IP Address Functions + + + + + Function + + + Description + + + Example(s) + + + + + + + + + abbrev + + abbrev ( inet ) + text + + + Creates an abbreviated display format as text. + (The result is the same as the inet output function + produces; it is abbreviated only in comparison to the + result of an explicit cast to text, which for historical + reasons will never suppress the netmask part.) + + + abbrev(inet '10.1.0.0/32') + 10.1.0.0 + + + + + + abbrev ( cidr ) + text + + + Creates an abbreviated display format as text. + (The abbreviation consists of dropping all-zero octets to the right + of the netmask; more examples are in + .) + + + abbrev(cidr '10.1.0.0/16') + 10.1/16 + + + + + + + broadcast + + broadcast ( inet ) + inet + + + Computes the broadcast address for the address's network. + + + broadcast(inet '192.168.1.5/24') + 192.168.1.255/24 + + + + + + + family + + family ( inet ) + integer + + + Returns the address's family: 4 for IPv4, + 6 for IPv6. + + + family(inet '::1') + 6 + + + + + + + host + + host ( inet ) + text + + + Returns the IP address as text, ignoring the netmask. + + + host(inet '192.168.1.0/24') + 192.168.1.0 + + + + + + + hostmask + + hostmask ( inet ) + inet + + + Computes the host mask for the address's network. + + + hostmask(inet '192.168.23.20/30') + 0.0.0.3 + + + + + + + inet_merge + + inet_merge ( inet, inet ) + cidr + + + Computes the smallest network that includes both of the given networks. + + + inet_merge(inet '192.168.1.5/24', inet '192.168.2.5/24') + 192.168.0.0/22 + + + + + + + inet_same_family + + inet_same_family ( inet, inet ) + boolean + + + Tests whether the addresses belong to the same IP family. + + + inet_same_family(inet '192.168.1.5/24', inet '::1') + f + + + + + + + masklen + + masklen ( inet ) + integer + + + Returns the netmask length in bits. + + + masklen(inet '192.168.1.5/24') + 24 + + + + + + + netmask + + netmask ( inet ) + inet + + + Computes the network mask for the address's network. + + + netmask(inet '192.168.1.5/24') + 255.255.255.0 + + + + + + + network + + network ( inet ) + cidr + + + Returns the network part of the address, zeroing out + whatever is to the right of the netmask. + (This is equivalent to casting the value to cidr.) + + + network(inet '192.168.1.5/24') + 192.168.1.0/24 + + + + + + + set_masklen + + set_masklen ( inet, integer ) + inet + + + Sets the netmask length for an inet value. + The address part does not change. + + + set_masklen(inet '192.168.1.5/24', 16) + 192.168.1.5/16 + + + + + + set_masklen ( cidr, integer ) + cidr + + + Sets the netmask length for a cidr value. + Address bits to the right of the new netmask are set to zero. + + + set_masklen(cidr '192.168.1.0/24', 16) + 192.168.0.0/16 + + + + + + + text + + text ( inet ) + text + + + Returns the unabbreviated IP address and netmask length as text. + (This has the same result as an explicit cast to text.) + + + text(inet '192.168.1.5') + 192.168.1.5/32 + + + + +
+ + + + The abbrev, host, + and text functions are primarily intended to offer + alternative display formats for IP addresses. + + + + + The MAC address types, macaddr and macaddr8, + support the usual comparison operators shown in + + as well as the specialized functions shown in + . + In addition, they support the bitwise logical operators + ~, & and | + (NOT, AND and OR), just as shown above for IP addresses. + + + + MAC Address Functions + + + + + Function + + + Description + + + Example(s) + + + + + + + + + trunc + + trunc ( macaddr ) + macaddr + + + Sets the last 3 bytes of the address to zero. The remaining prefix + can be associated with a particular manufacturer (using data not + included in PostgreSQL). + + + trunc(macaddr '12:34:56:78:90:ab') + 12:34:56:00:00:00 + + + + + + trunc ( macaddr8 ) + macaddr8 + + + Sets the last 5 bytes of the address to zero. The remaining prefix + can be associated with a particular manufacturer (using data not + included in PostgreSQL). + + + trunc(macaddr8 '12:34:56:78:90:ab:cd:ef') + 12:34:56:00:00:00:00:00 + + + + + + + macaddr8_set7bit + + macaddr8_set7bit ( macaddr8 ) + macaddr8 + + + Sets the 7th bit of the address to one, creating what is known as + modified EUI-64, for inclusion in an IPv6 address. + + + macaddr8_set7bit(macaddr8 '00:34:56:ab:cd:ef') + 02:34:56:ff:fe:ab:cd:ef + + + + +
+ +
diff --git a/doc/src/sgml/func/func-range.sgml b/doc/src/sgml/func/func-range.sgml new file mode 100644 index 0000000000000..2dc40348a57f4 --- /dev/null +++ b/doc/src/sgml/func/func-range.sgml @@ -0,0 +1,1053 @@ + + Range/Multirange Functions and Operators + + + See for an overview of range types. + + + + shows the specialized operators + available for range types. + shows the specialized operators + available for multirange types. + In addition to those, the usual comparison operators shown in + are available for range + and multirange types. The comparison operators order first by the range lower + bounds, and only if those are equal do they compare the upper bounds. The + multirange operators compare each range until one is unequal. This + does not usually result in a useful overall ordering, but the operators are + provided to allow unique indexes to be constructed on ranges. + + + + Range Operators + + + + + Operator + + + Description + + + Example(s) + + + + + + + + anyrange @> anyrange + boolean + + + Does the first range contain the second? + + + int4range(2,4) @> int4range(2,3) + t + + + + + + anyrange @> anyelement + boolean + + + Does the range contain the element? + + + '[2011-01-01,2011-03-01)'::tsrange @> '2011-01-10'::timestamp + t + + + + + + anyrange <@ anyrange + boolean + + + Is the first range contained by the second? + + + int4range(2,4) <@ int4range(1,7) + t + + + + + + anyelement <@ anyrange + boolean + + + Is the element contained in the range? + + + 42 <@ int4range(1,7) + f + + + + + + anyrange && anyrange + boolean + + + Do the ranges overlap, that is, have any elements in common? + + + int8range(3,7) && int8range(4,12) + t + + + + + + anyrange << anyrange + boolean + + + Is the first range strictly left of the second? + + + int8range(1,10) << int8range(100,110) + t + + + + + + anyrange >> anyrange + boolean + + + Is the first range strictly right of the second? + + + int8range(50,60) >> int8range(20,30) + t + + + + + + anyrange &< anyrange + boolean + + + Does the first range not extend to the right of the second? + + + int8range(1,20) &< int8range(18,20) + t + + + + + + anyrange &> anyrange + boolean + + + Does the first range not extend to the left of the second? + + + int8range(7,20) &> int8range(5,10) + t + + + + + + anyrange -|- anyrange + boolean + + + Are the ranges adjacent? + + + numrange(1.1,2.2) -|- numrange(2.2,3.3) + t + + + + + + anyrange + anyrange + anyrange + + + Computes the union of the ranges. The ranges must overlap or be + adjacent, so that the union is a single range (but + see range_merge()). + + + numrange(5,15) + numrange(10,20) + [5,20) + + + + + + anyrange * anyrange + anyrange + + + Computes the intersection of the ranges. + + + int8range(5,15) * int8range(10,20) + [10,15) + + + + + + anyrange - anyrange + anyrange + + + Computes the difference of the ranges. The second range must not be + contained in the first in such a way that the difference would not be + a single range. + + + int8range(5,15) - int8range(10,20) + [5,10) + + + + +
+ + + Multirange Operators + + + + + Operator + + + Description + + + Example(s) + + + + + + + + anymultirange @> anymultirange + boolean + + + Does the first multirange contain the second? + + + '{[2,4)}'::int4multirange @> '{[2,3)}'::int4multirange + t + + + + + + anymultirange @> anyrange + boolean + + + Does the multirange contain the range? + + + '{[2,4)}'::int4multirange @> int4range(2,3) + t + + + + + + anymultirange @> anyelement + boolean + + + Does the multirange contain the element? + + + '{[2011-01-01,2011-03-01)}'::tsmultirange @> '2011-01-10'::timestamp + t + + + + + + anyrange @> anymultirange + boolean + + + Does the range contain the multirange? + + + '[2,4)'::int4range @> '{[2,3)}'::int4multirange + t + + + + + + anymultirange <@ anymultirange + boolean + + + Is the first multirange contained by the second? + + + '{[2,4)}'::int4multirange <@ '{[1,7)}'::int4multirange + t + + + + + + anymultirange <@ anyrange + boolean + + + Is the multirange contained by the range? + + + '{[2,4)}'::int4multirange <@ int4range(1,7) + t + + + + + + anyrange <@ anymultirange + boolean + + + Is the range contained by the multirange? + + + int4range(2,4) <@ '{[1,7)}'::int4multirange + t + + + + + + anyelement <@ anymultirange + boolean + + + Is the element contained by the multirange? + + + 4 <@ '{[1,7)}'::int4multirange + t + + + + + + anymultirange && anymultirange + boolean + + + Do the multiranges overlap, that is, have any elements in common? + + + '{[3,7)}'::int8multirange && '{[4,12)}'::int8multirange + t + + + + + + anymultirange && anyrange + boolean + + + Does the multirange overlap the range? + + + '{[3,7)}'::int8multirange && int8range(4,12) + t + + + + + + anyrange && anymultirange + boolean + + + Does the range overlap the multirange? + + + int8range(3,7) && '{[4,12)}'::int8multirange + t + + + + + + anymultirange << anymultirange + boolean + + + Is the first multirange strictly left of the second? + + + '{[1,10)}'::int8multirange << '{[100,110)}'::int8multirange + t + + + + + + anymultirange << anyrange + boolean + + + Is the multirange strictly left of the range? + + + '{[1,10)}'::int8multirange << int8range(100,110) + t + + + + + + anyrange << anymultirange + boolean + + + Is the range strictly left of the multirange? + + + int8range(1,10) << '{[100,110)}'::int8multirange + t + + + + + + anymultirange >> anymultirange + boolean + + + Is the first multirange strictly right of the second? + + + '{[50,60)}'::int8multirange >> '{[20,30)}'::int8multirange + t + + + + + + anymultirange >> anyrange + boolean + + + Is the multirange strictly right of the range? + + + '{[50,60)}'::int8multirange >> int8range(20,30) + t + + + + + + anyrange >> anymultirange + boolean + + + Is the range strictly right of the multirange? + + + int8range(50,60) >> '{[20,30)}'::int8multirange + t + + + + + + anymultirange &< anymultirange + boolean + + + Does the first multirange not extend to the right of the second? + + + '{[1,20)}'::int8multirange &< '{[18,20)}'::int8multirange + t + + + + + + anymultirange &< anyrange + boolean + + + Does the multirange not extend to the right of the range? + + + '{[1,20)}'::int8multirange &< int8range(18,20) + t + + + + + + anyrange &< anymultirange + boolean + + + Does the range not extend to the right of the multirange? + + + int8range(1,20) &< '{[18,20)}'::int8multirange + t + + + + + + anymultirange &> anymultirange + boolean + + + Does the first multirange not extend to the left of the second? + + + '{[7,20)}'::int8multirange &> '{[5,10)}'::int8multirange + t + + + + + + anymultirange &> anyrange + boolean + + + Does the multirange not extend to the left of the range? + + + '{[7,20)}'::int8multirange &> int8range(5,10) + t + + + + + + anyrange &> anymultirange + boolean + + + Does the range not extend to the left of the multirange? + + + int8range(7,20) &> '{[5,10)}'::int8multirange + t + + + + + + anymultirange -|- anymultirange + boolean + + + Are the multiranges adjacent? + + + '{[1.1,2.2)}'::nummultirange -|- '{[2.2,3.3)}'::nummultirange + t + + + + + + anymultirange -|- anyrange + boolean + + + Is the multirange adjacent to the range? + + + '{[1.1,2.2)}'::nummultirange -|- numrange(2.2,3.3) + t + + + + + + anyrange -|- anymultirange + boolean + + + Is the range adjacent to the multirange? + + + numrange(1.1,2.2) -|- '{[2.2,3.3)}'::nummultirange + t + + + + + + anymultirange + anymultirange + anymultirange + + + Computes the union of the multiranges. The multiranges need not overlap + or be adjacent. + + + '{[5,10)}'::nummultirange + '{[15,20)}'::nummultirange + {[5,10), [15,20)} + + + + + + anymultirange * anymultirange + anymultirange + + + Computes the intersection of the multiranges. + + + '{[5,15)}'::int8multirange * '{[10,20)}'::int8multirange + {[10,15)} + + + + + + anymultirange - anymultirange + anymultirange + + + Computes the difference of the multiranges. + + + '{[5,20)}'::int8multirange - '{[10,15)}'::int8multirange + {[5,10), [15,20)} + + + + +
+ + + The left-of/right-of/adjacent operators always return false when an empty + range or multirange is involved; that is, an empty range is not considered to + be either before or after any other range. + + + + Elsewhere empty ranges and multiranges are treated as the additive identity: + anything unioned with an empty value is itself. Anything minus an empty + value is itself. An empty multirange has exactly the same points as an empty + range. Every range contains the empty range. Every multirange contains as many + empty ranges as you like. + + + + The range union and difference operators will fail if the resulting range would + need to contain two disjoint sub-ranges, as such a range cannot be + represented. There are separate operators for union and difference that take + multirange parameters and return a multirange, and they do not fail even if + their arguments are disjoint. So if you need a union or difference operation + for ranges that may be disjoint, you can avoid errors by first casting your + ranges to multiranges. + + + + shows the functions + available for use with range types. + shows the functions + available for use with multirange types. + + + + Range Functions + + + + + Function + + + Description + + + Example(s) + + + + + + + + + lower + + lower ( anyrange ) + anyelement + + + Extracts the lower bound of the range (NULL if the + range is empty or has no lower bound). + + + lower(numrange(1.1,2.2)) + 1.1 + + + + + + + upper + + upper ( anyrange ) + anyelement + + + Extracts the upper bound of the range (NULL if the + range is empty or has no upper bound). + + + upper(numrange(1.1,2.2)) + 2.2 + + + + + + + isempty + + isempty ( anyrange ) + boolean + + + Is the range empty? + + + isempty(numrange(1.1,2.2)) + f + + + + + + + lower_inc + + lower_inc ( anyrange ) + boolean + + + Is the range's lower bound inclusive? + + + lower_inc(numrange(1.1,2.2)) + t + + + + + + + upper_inc + + upper_inc ( anyrange ) + boolean + + + Is the range's upper bound inclusive? + + + upper_inc(numrange(1.1,2.2)) + f + + + + + + + lower_inf + + lower_inf ( anyrange ) + boolean + + + Does the range have no lower bound? (A lower bound of + -Infinity returns false.) + + + lower_inf('(,)'::daterange) + t + + + + + + + upper_inf + + upper_inf ( anyrange ) + boolean + + + Does the range have no upper bound? (An upper bound of + Infinity returns false.) + + + upper_inf('(,)'::daterange) + t + + + + + + + range_merge + + range_merge ( anyrange, anyrange ) + anyrange + + + Computes the smallest range that includes both of the given ranges. + + + range_merge('[1,2)'::int4range, '[3,4)'::int4range) + [1,4) + + + + +
+ + + Multirange Functions + + + + + Function + + + Description + + + Example(s) + + + + + + + + lower + + lower ( anymultirange ) + anyelement + + + Extracts the lower bound of the multirange (NULL if the + multirange is empty or has no lower bound). + + + lower('{[1.1,2.2)}'::nummultirange) + 1.1 + + + + + + + upper + + upper ( anymultirange ) + anyelement + + + Extracts the upper bound of the multirange (NULL if the + multirange is empty or has no upper bound). + + + upper('{[1.1,2.2)}'::nummultirange) + 2.2 + + + + + + + isempty + + isempty ( anymultirange ) + boolean + + + Is the multirange empty? + + + isempty('{[1.1,2.2)}'::nummultirange) + f + + + + + + + lower_inc + + lower_inc ( anymultirange ) + boolean + + + Is the multirange's lower bound inclusive? + + + lower_inc('{[1.1,2.2)}'::nummultirange) + t + + + + + + + upper_inc + + upper_inc ( anymultirange ) + boolean + + + Is the multirange's upper bound inclusive? + + + upper_inc('{[1.1,2.2)}'::nummultirange) + f + + + + + + + lower_inf + + lower_inf ( anymultirange ) + boolean + + + Does the multirange have no lower bound? (A lower bound of + -Infinity returns false.) + + + lower_inf('{(,)}'::datemultirange) + t + + + + + + + upper_inf + + upper_inf ( anymultirange ) + boolean + + + Does the multirange have no upper bound? (An upper bound of + Infinity returns false.) + + + upper_inf('{(,)}'::datemultirange) + t + + + + + + + range_merge + + range_merge ( anymultirange ) + anyrange + + + Computes the smallest range that includes the entire multirange. + + + range_merge('{[1,2), [3,4)}'::int4multirange) + [1,4) + + + + + + + multirange (function) + + multirange ( anyrange ) + anymultirange + + + Returns a multirange containing just the given range. + + + multirange('[1,2)'::int4range) + {[1,2)} + + + + + + + unnest + for multirange + + unnest ( anymultirange ) + setof anyrange + + + Expands a multirange into a set of ranges in ascending order. + + + unnest('{[1,2), [3,4)}'::int4multirange) + + + [1,2) + [3,4) + + + + + +
+ + + The lower_inc, upper_inc, + lower_inf, and upper_inf + functions all return false for an empty range or multirange. + +
diff --git a/doc/src/sgml/func/func-sequence.sgml b/doc/src/sgml/func/func-sequence.sgml new file mode 100644 index 0000000000000..e9f5b4e8e6b27 --- /dev/null +++ b/doc/src/sgml/func/func-sequence.sgml @@ -0,0 +1,195 @@ + + Sequence Manipulation Functions + + + sequence + + + + This section describes functions for operating on sequence + objects, also called sequence generators or just sequences. + Sequence objects are special single-row tables created with . + Sequence objects are commonly used to generate unique identifiers + for rows of a table. The sequence functions, listed in , provide simple, multiuser-safe + methods for obtaining successive sequence values from sequence + objects. + + + + Sequence Functions + + + + + Function + + + Description + + + + + + + + + nextval + + nextval ( regclass ) + bigint + + + Advances the sequence object to its next value and returns that value. + This is done atomically: even if multiple sessions + execute nextval concurrently, each will safely + receive a distinct sequence value. + If the sequence object has been created with default parameters, + successive nextval calls will return successive + values beginning with 1. Other behaviors can be obtained by using + appropriate parameters in the + command. + + + This function requires USAGE + or UPDATE privilege on the sequence. + + + + + + + setval + + setval ( regclass, bigint , boolean ) + bigint + + + Sets the sequence object's current value, and optionally + its is_called flag. The two-parameter + form sets the sequence's last_value field to the + specified value and sets its is_called field to + true, meaning that the next + nextval will advance the sequence before + returning a value. The value that will be reported + by currval is also set to the specified value. + In the three-parameter form, is_called can be set + to either true + or false. true has the same + effect as the two-parameter form. If it is set + to false, the next nextval + will return exactly the specified value, and sequence advancement + commences with the following nextval. + Furthermore, the value reported by currval is not + changed in this case. For example, + +SELECT setval('myseq', 42); Next nextval will return 43 +SELECT setval('myseq', 42, true); Same as above +SELECT setval('myseq', 42, false); Next nextval will return 42 + + The result returned by setval is just the value of its + second argument. + + + This function requires UPDATE privilege on the + sequence. + + + + + + + currval + + currval ( regclass ) + bigint + + + Returns the value most recently obtained + by nextval for this sequence in the current + session. (An error is reported if nextval has + never been called for this sequence in this session.) Because this is + returning a session-local value, it gives a predictable answer whether + or not other sessions have executed nextval since + the current session did. + + + This function requires USAGE + or SELECT privilege on the sequence. + + + + + + + lastval + + lastval () + bigint + + + Returns the value most recently returned by + nextval in the current session. This function is + identical to currval, except that instead + of taking the sequence name as an argument it refers to whichever + sequence nextval was most recently applied to + in the current session. It is an error to call + lastval if nextval + has not yet been called in the current session. + + + This function requires USAGE + or SELECT privilege on the last used sequence. + + + + +
+ + + + To avoid blocking concurrent transactions that obtain numbers from + the same sequence, the value obtained by nextval + is not reclaimed for re-use if the calling transaction later aborts. + This means that transaction aborts or database crashes can result in + gaps in the sequence of assigned values. That can happen without a + transaction abort, too. For example an INSERT with + an ON CONFLICT clause will compute the to-be-inserted + tuple, including doing any required nextval + calls, before detecting any conflict that would cause it to follow + the ON CONFLICT rule instead. + Thus, PostgreSQL sequence + objects cannot be used to obtain gapless + sequences. + + + + Likewise, sequence state changes made by setval + are immediately visible to other transactions, and are not undone if + the calling transaction rolls back. + + + + If the database cluster crashes before committing a transaction + containing a nextval + or setval call, the sequence state change might + not have made its way to persistent storage, so that it is uncertain + whether the sequence will have its original or updated state after the + cluster restarts. This is harmless for usage of the sequence within + the database, since other effects of uncommitted transactions will not + be visible either. However, if you wish to use a sequence value for + persistent outside-the-database purposes, make sure that the + nextval call has been committed before doing so. + + + + + The sequence to be operated on by a sequence function is specified by + a regclass argument, which is simply the OID of the sequence in the + pg_class system catalog. You do not have to look up the + OID by hand, however, since the regclass data type's input + converter will do the work for you. See + for details. + +
diff --git a/doc/src/sgml/func/func-srf.sgml b/doc/src/sgml/func/func-srf.sgml new file mode 100644 index 0000000000000..eafc961c9f909 --- /dev/null +++ b/doc/src/sgml/func/func-srf.sgml @@ -0,0 +1,306 @@ + + Set Returning Functions + + + set returning functions + functions + + + + This section describes functions that possibly return more than one row. + The most widely used functions in this class are series generating + functions, as detailed in and + . Other, more specialized + set-returning functions are described elsewhere in this manual. + See for ways to combine multiple + set-returning functions. + + + + Series Generating Functions + + + + + Function + + + Description + + + + + + + + + generate_series + + generate_series ( start integer, stop integer , step integer ) + setof integer + + + generate_series ( start bigint, stop bigint , step bigint ) + setof bigint + + + generate_series ( start numeric, stop numeric , step numeric ) + setof numeric + + + Generates a series of values from start + to stop, with a step size + of step. step + defaults to 1. + + + + + + generate_series ( start timestamp, stop timestamp, step interval ) + setof timestamp + + + generate_series ( start timestamp with time zone, stop timestamp with time zone, step interval , timezone text ) + setof timestamp with time zone + + + Generates a series of values from start + to stop, with a step size + of step. + In the timezone-aware form, times of day and daylight-savings + adjustments are computed according to the time zone named by + the timezone argument, or the current + setting if that is omitted. + + + + +
+ + + When step is positive, zero rows are returned if + start is greater than stop. + Conversely, when step is negative, zero rows are + returned if start is less than stop. + Zero rows are also returned if any input is NULL. + It is an error + for step to be zero. Some examples follow: + +SELECT * FROM generate_series(2,4); + generate_series +----------------- + 2 + 3 + 4 +(3 rows) + +SELECT * FROM generate_series(5,1,-2); + generate_series +----------------- + 5 + 3 + 1 +(3 rows) + +SELECT * FROM generate_series(4,3); + generate_series +----------------- +(0 rows) + +SELECT generate_series(1.1, 4, 1.3); + generate_series +----------------- + 1.1 + 2.4 + 3.7 +(3 rows) + +-- this example relies on the date-plus-integer operator: +SELECT current_date + s.a AS dates FROM generate_series(0,14,7) AS s(a); + dates +------------ + 2004-02-05 + 2004-02-12 + 2004-02-19 +(3 rows) + +SELECT * FROM generate_series('2008-03-01 00:00'::timestamp, + '2008-03-04 12:00', '10 hours'); + generate_series +--------------------- + 2008-03-01 00:00:00 + 2008-03-01 10:00:00 + 2008-03-01 20:00:00 + 2008-03-02 06:00:00 + 2008-03-02 16:00:00 + 2008-03-03 02:00:00 + 2008-03-03 12:00:00 + 2008-03-03 22:00:00 + 2008-03-04 08:00:00 +(9 rows) + +-- this example assumes that TimeZone is set to UTC; note the DST transition: +SELECT * FROM generate_series('2001-10-22 00:00 -04:00'::timestamptz, + '2001-11-01 00:00 -05:00'::timestamptz, + '1 day'::interval, 'America/New_York'); + generate_series +------------------------ + 2001-10-22 04:00:00+00 + 2001-10-23 04:00:00+00 + 2001-10-24 04:00:00+00 + 2001-10-25 04:00:00+00 + 2001-10-26 04:00:00+00 + 2001-10-27 04:00:00+00 + 2001-10-28 04:00:00+00 + 2001-10-29 05:00:00+00 + 2001-10-30 05:00:00+00 + 2001-10-31 05:00:00+00 + 2001-11-01 05:00:00+00 +(11 rows) + + + + + Subscript Generating Functions + + + + + Function + + + Description + + + + + + + + + generate_subscripts + + generate_subscripts ( array anyarray, dim integer ) + setof integer + + + Generates a series comprising the valid subscripts of + the dim'th dimension of the given array. + + + + + + generate_subscripts ( array anyarray, dim integer, reverse boolean ) + setof integer + + + Generates a series comprising the valid subscripts of + the dim'th dimension of the given array. + When reverse is true, returns the series in + reverse order. + + + + +
+ + + generate_subscripts is a convenience function that generates + the set of valid subscripts for the specified dimension of the given + array. + Zero rows are returned for arrays that do not have the requested dimension, + or if any input is NULL. + Some examples follow: + +-- basic usage: +SELECT generate_subscripts('{NULL,1,NULL,2}'::int[], 1) AS s; + s +--- + 1 + 2 + 3 + 4 +(4 rows) + +-- presenting an array, the subscript and the subscripted +-- value requires a subquery: +SELECT * FROM arrays; + a +-------------------- + {-1,-2} + {100,200,300} +(2 rows) + +SELECT a AS array, s AS subscript, a[s] AS value +FROM (SELECT generate_subscripts(a, 1) AS s, a FROM arrays) foo; + array | subscript | value +---------------+-----------+------- + {-1,-2} | 1 | -1 + {-1,-2} | 2 | -2 + {100,200,300} | 1 | 100 + {100,200,300} | 2 | 200 + {100,200,300} | 3 | 300 +(5 rows) + +-- unnest a 2D array: +CREATE OR REPLACE FUNCTION unnest2(anyarray) +RETURNS SETOF anyelement AS $$ +select $1[i][j] + from generate_subscripts($1,1) g1(i), + generate_subscripts($1,2) g2(j); +$$ LANGUAGE sql IMMUTABLE; +CREATE FUNCTION +SELECT * FROM unnest2(ARRAY[[1,2],[3,4]]); + unnest2 +--------- + 1 + 2 + 3 + 4 +(4 rows) + + + + + ordinality + + + + When a function in the FROM clause is suffixed + by WITH ORDINALITY, a bigint column is + appended to the function's output column(s), which starts from 1 and + increments by 1 for each row of the function's output. + This is most useful in the case of set returning + functions such as unnest(). + + +-- set returning function WITH ORDINALITY: +SELECT * FROM pg_ls_dir('.') WITH ORDINALITY AS t(ls,n); + ls | n +-----------------+---- + pg_serial | 1 + pg_twophase | 2 + postmaster.opts | 3 + pg_notify | 4 + postgresql.conf | 5 + pg_tblspc | 6 + logfile | 7 + base | 8 + postmaster.pid | 9 + pg_ident.conf | 10 + global | 11 + pg_xact | 12 + pg_snapshots | 13 + pg_multixact | 14 + PG_VERSION | 15 + pg_wal | 16 + pg_hba.conf | 17 + pg_stat_tmp | 18 + pg_subtrans | 19 +(19 rows) + + + +
diff --git a/doc/src/sgml/func/func-statistics.sgml b/doc/src/sgml/func/func-statistics.sgml new file mode 100644 index 0000000000000..22dee263cc2a0 --- /dev/null +++ b/doc/src/sgml/func/func-statistics.sgml @@ -0,0 +1,85 @@ + + Statistics Information Functions + + + function + statistics + + + + PostgreSQL provides a function to inspect complex + statistics defined using the CREATE STATISTICS command. + + + + Inspecting MCV Lists + + + pg_mcv_list_items + + + +pg_mcv_list_items ( pg_mcv_list ) setof record + + + + pg_mcv_list_items returns a set of records describing + all items stored in a multi-column MCV list. It + returns the following columns: + + + + + + Name + Type + Description + + + + + + index + integer + index of the item in the MCV list + + + values + text[] + values stored in the MCV item + + + nulls + boolean[] + flags identifying NULL values + + + frequency + double precision + frequency of this MCV item + + + base_frequency + double precision + base frequency of this MCV item + + + + + + + + The pg_mcv_list_items function can be used like this: + + +SELECT m.* FROM pg_statistic_ext join pg_statistic_ext_data on (oid = stxoid), + pg_mcv_list_items(stxdmcv) m WHERE stxname = 'stts'; + + + Values of the pg_mcv_list type can be obtained only from the + pg_statistic_ext_data.stxdmcv + column. + + + + diff --git a/doc/src/sgml/func/func-string.sgml b/doc/src/sgml/func/func-string.sgml new file mode 100644 index 0000000000000..3eec93eb3395b --- /dev/null +++ b/doc/src/sgml/func/func-string.sgml @@ -0,0 +1,1818 @@ + + String Functions and Operators + + + This section describes functions and operators for examining and + manipulating string values. Strings in this context include values + of the types character, character varying, + and text. Except where noted, these functions and operators + are declared to accept and return type text. They will + interchangeably accept character varying arguments. + Values of type character will be converted + to text before the function or operator is applied, resulting + in stripping any trailing spaces in the character value. + + + + SQL defines some string functions that use + key words, rather than commas, to separate + arguments. Details are in + . + PostgreSQL also provides versions of these functions + that use the regular function invocation syntax + (see ). + + + + + The string concatenation operator (||) will accept + non-string input, so long as at least one input is of string type, as shown + in . For other cases, inserting an + explicit coercion to text can be used to have non-string input + accepted. + + + + + <acronym>SQL</acronym> String Functions and Operators + + + + + Function/Operator + + + Description + + + Example(s) + + + + + + + + + character string + concatenation + + text || text + text + + + Concatenates the two strings. + + + 'Post' || 'greSQL' + PostgreSQL + + + + + + text || anynonarray + text + + + anynonarray || text + text + + + Converts the non-string input to text, then concatenates the two + strings. (The non-string input cannot be of an array type, because + that would create ambiguity with the array || + operators. If you want to concatenate an array's text equivalent, + cast it to text explicitly.) + + + 'Value: ' || 42 + Value: 42 + + + + + + + btrim + + btrim ( string text + , characters text ) + text + + + Removes the longest string containing only characters + in characters (a space by default) + from the start and end of string. + + + btrim('xyxtrimyyx', 'xyz') + trim + + + + + + + normalized + + + Unicode normalization + + text IS NOT form NORMALIZED + boolean + + + Checks whether the string is in the specified Unicode normalization + form. The optional form key word specifies the + form: NFC (the default), NFD, + NFKC, or NFKD. This expression can + only be used when the server encoding is UTF8. Note + that checking for normalization using this expression is often faster + than normalizing possibly already normalized strings. + + + U&'\0061\0308bc' IS NFD NORMALIZED + t + + + + + + + bit_length + + bit_length ( text ) + integer + + + Returns number of bits in the string (8 + times the octet_length). + + + bit_length('jose') + 32 + + + + + + + char_length + + + character string + length + + + length + of a character string + character string, length + + char_length ( text ) + integer + + + + character_length + + character_length ( text ) + integer + + + Returns number of characters in the string. + + + char_length('josé') + 4 + + + + + + + lower + + lower ( text ) + text + + + Converts the string to all lower case, according to the rules of the + database's locale. + + + lower('TOM') + tom + + + + + + + lpad + + lpad ( string text, + length integer + , fill text ) + text + + + Extends the string to length + length by prepending the characters + fill (a space by default). If the + string is already longer than + length then it is truncated (on the right). + + + lpad('hi', 5, 'xy') + xyxhi + + + + + + + ltrim + + ltrim ( string text + , characters text ) + text + + + Removes the longest string containing only characters in + characters (a space by default) from the start of + string. + + + ltrim('zzzytest', 'xyz') + test + + + + + + + normalize + + + Unicode normalization + + normalize ( text + , form ) + text + + + Converts the string to the specified Unicode + normalization form. The optional form key word + specifies the form: NFC (the default), + NFD, NFKC, or + NFKD. This function can only be used when the + server encoding is UTF8. + + + normalize(U&'\0061\0308bc', NFC) + U&'\00E4bc' + + + + + + + octet_length + + octet_length ( text ) + integer + + + Returns number of bytes in the string. + + + octet_length('josé') + 5 (if server encoding is UTF8) + + + + + + + octet_length + + octet_length ( character ) + integer + + + Returns number of bytes in the string. Since this version of the + function accepts type character directly, it will not + strip trailing spaces. + + + octet_length('abc '::character(4)) + 4 + + + + + + + overlay + + overlay ( string text PLACING newsubstring text FROM start integer FOR count integer ) + text + + + Replaces the substring of string that starts at + the start'th character and extends + for count characters + with newsubstring. + If count is omitted, it defaults to the length + of newsubstring. + + + overlay('Txxxxas' placing 'hom' from 2 for 4) + Thomas + + + + + + + position + + position ( substring text IN string text ) + integer + + + Returns first starting index of the specified + substring within + string, or zero if it's not present. + + + position('om' in 'Thomas') + 3 + + + + + + + rpad + + rpad ( string text, + length integer + , fill text ) + text + + + Extends the string to length + length by appending the characters + fill (a space by default). If the + string is already longer than + length then it is truncated. + + + rpad('hi', 5, 'xy') + hixyx + + + + + + + rtrim + + rtrim ( string text + , characters text ) + text + + + Removes the longest string containing only characters in + characters (a space by default) from the end of + string. + + + rtrim('testxxzx', 'xyz') + test + + + + + + + substring + + substring ( string text FROM start integer FOR count integer ) + text + + + Extracts the substring of string starting at + the start'th character if that is specified, + and stopping after count characters if that is + specified. Provide at least one of start + and count. + + + substring('Thomas' from 2 for 3) + hom + + + substring('Thomas' from 3) + omas + + + substring('Thomas' for 2) + Th + + + + + + substring ( string text FROM pattern text ) + text + + + Extracts the first substring matching POSIX regular expression; see + . + + + substring('Thomas' from '...$') + mas + + + + + + substring ( string text SIMILAR pattern text ESCAPE escape text ) + text + + + substring ( string text FROM pattern text FOR escape text ) + text + + + Extracts the first substring matching SQL regular expression; + see . The first form has + been specified since SQL:2003; the second form was only in SQL:1999 + and should be considered obsolete. + + + substring('Thomas' similar '%#"o_a#"_' escape '#') + oma + + + + + + + trim + + trim ( LEADING | TRAILING | BOTH + characters text FROM + string text ) + text + + + Removes the longest string containing only characters in + characters (a space by default) from the + start, end, or both ends (BOTH is the default) + of string. + + + trim(both 'xyz' from 'yxTomxx') + Tom + + + + + + trim ( LEADING | TRAILING | BOTH FROM + string text , + characters text ) + text + + + This is a non-standard syntax for trim(). + + + trim(both from 'yxTomxx', 'xyz') + Tom + + + + + + + unicode_assigned + + unicode_assigned ( text ) + boolean + + + Returns true if all characters in the string are + assigned Unicode codepoints; false otherwise. This + function can only be used when the server encoding is + UTF8. + + + + + + + upper + + upper ( text ) + text + + + Converts the string to all upper case, according to the rules of the + database's locale. + + + upper('tom') + TOM + + + + +
+ + + Additional string manipulation functions and operators are available + and are listed in . (Some of + these are used internally to implement + the SQL-standard string functions listed in + .) + There are also pattern-matching operators, which are described in + , and operators for full-text + search, which are described in . + + + + Other String Functions and Operators + + + + + Function/Operator + + + Description + + + Example(s) + + + + + + + + + character string + prefix test + + text ^@ text + boolean + + + Returns true if the first string starts with the second string + (equivalent to the starts_with() function). + + + 'alphabet' ^@ 'alph' + t + + + + + + + ascii + + ascii ( text ) + integer + + + Returns the numeric code of the first character of the argument. + In UTF8 encoding, returns the Unicode code point + of the character. In other multibyte encodings, the argument must + be an ASCII character. + + + ascii('x') + 120 + + + + + + + chr + + chr ( integer ) + text + + + Returns the character with the given code. In UTF8 + encoding the argument is treated as a Unicode code point. In other + multibyte encodings the argument must designate + an ASCII character. chr(0) is + disallowed because text data types cannot store that character. + + + chr(65) + A + + + + + + + concat + + concat ( val1 "any" + , val2 "any" , ... ) + text + + + Concatenates the text representations of all the arguments. + NULL arguments are ignored. + + + concat('abcde', 2, NULL, 22) + abcde222 + + + + + + + concat_ws + + concat_ws ( sep text, + val1 "any" + , val2 "any" , ... ) + text + + + Concatenates all but the first argument, with separators. The first + argument is used as the separator string, and should not be NULL. + Other NULL arguments are ignored. + + + concat_ws(',', 'abcde', 2, NULL, 22) + abcde,2,22 + + + + + + + format + + format ( formatstr text + , formatarg "any" , ... ) + text + + + Formats arguments according to a format string; + see . + This function is similar to the C function sprintf. + + + format('Hello %s, %1$s', 'World') + Hello World, World + + + + + + + initcap + + initcap ( text ) + text + + + Converts the first letter of each word to upper case and the + rest to lower case. When using the libc locale + provider, words are sequences of alphanumeric characters separated + by non-alphanumeric characters; when using the ICU locale provider, + words are separated according to + Unicode Standard Annex #29. + + + initcap('hi THOMAS') + Hi Thomas + + + + + + + casefold + + casefold ( text ) + text + + + Performs case folding of the input string according to the collation. + Case folding is similar to case conversion, but the purpose of case + folding is to facilitate case-insensitive matching of strings, + whereas the purpose of case conversion is to convert to a particular + cased form. This function can only be used when the server encoding + is UTF8. + + + Ordinarily, case folding simply converts to lowercase, but there may + be exceptions depending on the collation. For instance, some + characters have more than two lowercase variants, or fold to uppercase. + + + Case folding may change the length of the string. For instance, in + the PG_UNICODE_FAST collation, ß + (U+00DF) folds to ss. + + + casefold can be used for Unicode Default Caseless + Matching. It does not always preserve the normalized form of the + input string (see ). + + + The libc provider doesn't support case folding, so + casefold is identical to . + + + + + + + left + + left ( string text, + n integer ) + text + + + Returns first n characters in the + string, or when n is negative, returns + all but last |n| characters. + + + left('abcde', 2) + ab + + + + + + + length + + length ( text ) + integer + + + Returns the number of characters in the string. + + + length('jose') + 4 + + + + + + + md5 + + md5 ( text ) + text + + + Computes the MD5 hash of + the argument, with the result written in hexadecimal. + + + md5('abc') + 900150983cd24fb0&zwsp;d6963f7d28e17f72 + + + + + + + parse_ident + + parse_ident ( qualified_identifier text + , strict_mode boolean DEFAULT true ) + text[] + + + Splits qualified_identifier into an array of + identifiers, removing any quoting of individual identifiers. By + default, extra characters after the last identifier are considered an + error; but if the second parameter is false, then such + extra characters are ignored. (This behavior is useful for parsing + names for objects like functions.) Note that this function does not + truncate over-length identifiers. If you want truncation you can cast + the result to name[]. + + + parse_ident('"SomeSchema".someTable') + {SomeSchema,sometable} + + + + + + + pg_client_encoding + + pg_client_encoding ( ) + name + + + Returns current client encoding name. + + + pg_client_encoding() + UTF8 + + + + + + + quote_ident + + quote_ident ( text ) + text + + + Returns the given string suitably quoted to be used as an identifier + in an SQL statement string. + Quotes are added only if necessary (i.e., if the string contains + non-identifier characters or would be case-folded). + Embedded quotes are properly doubled. + See also . + + + quote_ident('Foo bar') + "Foo bar" + + + + + + + quote_literal + + quote_literal ( text ) + text + + + Returns the given string suitably quoted to be used as a string literal + in an SQL statement string. + Embedded single-quotes and backslashes are properly doubled. + Note that quote_literal returns null on null + input; if the argument might be null, + quote_nullable is often more suitable. + See also . + + + quote_literal(E'O\'Reilly') + 'O''Reilly' + + + + + + quote_literal ( anyelement ) + text + + + Converts the given value to text and then quotes it as a literal. + Embedded single-quotes and backslashes are properly doubled. + + + quote_literal(42.5) + '42.5' + + + + + + + quote_nullable + + quote_nullable ( text ) + text + + + Returns the given string suitably quoted to be used as a string literal + in an SQL statement string; or, if the argument + is null, returns NULL. + Embedded single-quotes and backslashes are properly doubled. + See also . + + + quote_nullable(NULL) + NULL + + + + + + quote_nullable ( anyelement ) + text + + + Converts the given value to text and then quotes it as a literal; + or, if the argument is null, returns NULL. + Embedded single-quotes and backslashes are properly doubled. + + + quote_nullable(42.5) + '42.5' + + + + + + + regexp_count + + regexp_count ( string text, pattern text + , start integer + , flags text ) + integer + + + Returns the number of times the POSIX regular + expression pattern matches in + the string; see + . + + + regexp_count('123456789012', '\d\d\d', 2) + 3 + + + + + + + regexp_instr + + regexp_instr ( string text, pattern text + , start integer + , N integer + , endoption integer + , flags text + , subexpr integer ) + integer + + + Returns the position within string where + the N'th match of the POSIX regular + expression pattern occurs, or zero if there is + no such match; see . + + + regexp_instr('ABCDEF', 'c(.)(..)', 1, 1, 0, 'i') + 3 + + + regexp_instr('ABCDEF', 'c(.)(..)', 1, 1, 0, 'i', 2) + 5 + + + + + + + regexp_like + + regexp_like ( string text, pattern text + , flags text ) + boolean + + + Checks whether a match of the POSIX regular + expression pattern occurs + within string; see + . + + + regexp_like('Hello World', 'world$', 'i') + t + + + + + + + regexp_match + + regexp_match ( string text, pattern text , flags text ) + text[] + + + Returns substrings within the first match of the POSIX regular + expression pattern to + the string; see + . + + + regexp_match('foobarbequebaz', '(bar)(beque)') + {bar,beque} + + + + + + + regexp_matches + + regexp_matches ( string text, pattern text , flags text ) + setof text[] + + + Returns substrings within the first match of the POSIX regular + expression pattern to + the string, or substrings within all + such matches if the g flag is used; + see . + + + regexp_matches('foobarbequebaz', 'ba.', 'g') + + + {bar} + {baz} + + + + + + + + regexp_replace + + regexp_replace ( string text, pattern text, replacement text + , flags text ) + text + + + Replaces the substring that is the first match to the POSIX + regular expression pattern, or all such + matches if the g flag is used; see + . + + + regexp_replace('Thomas', '.[mN]a.', 'M') + ThM + + + + + + regexp_replace ( string text, pattern text, replacement text, + start integer + , N integer + , flags text ) + text + + + Replaces the substring that is the N'th + match to the POSIX regular expression pattern, + or all such matches if N is zero, with the + search beginning at the start'th character + of string. If N is + omitted, it defaults to 1. See + . + + + regexp_replace('Thomas', '.', 'X', 3, 2) + ThoXas + + + regexp_replace(string=>'hello world', pattern=>'l', replacement=>'XX', start=>1, "N"=>2) + helXXo world + + + + + + + regexp_split_to_array + + regexp_split_to_array ( string text, pattern text , flags text ) + text[] + + + Splits string using a POSIX regular + expression as the delimiter, producing an array of results; see + . + + + regexp_split_to_array('hello world', '\s+') + {hello,world} + + + + + + + regexp_split_to_table + + regexp_split_to_table ( string text, pattern text , flags text ) + setof text + + + Splits string using a POSIX regular + expression as the delimiter, producing a set of results; see + . + + + regexp_split_to_table('hello world', '\s+') + + + hello + world + + + + + + + + regexp_substr + + regexp_substr ( string text, pattern text + , start integer + , N integer + , flags text + , subexpr integer ) + text + + + Returns the substring within string that + matches the N'th occurrence of the POSIX + regular expression pattern, + or NULL if there is no such match; see + . + + + regexp_substr('ABCDEF', 'c(.)(..)', 1, 1, 'i') + CDEF + + + regexp_substr('ABCDEF', 'c(.)(..)', 1, 1, 'i', 2) + EF + + + + + + + repeat + + repeat ( string text, number integer ) + text + + + Repeats string the specified + number of times. + + + repeat('Pg', 4) + PgPgPgPg + + + + + + + replace + + replace ( string text, + from text, + to text ) + text + + + Replaces all occurrences in string of + substring from with + substring to. + + + replace('abcdefabcdef', 'cd', 'XX') + abXXefabXXef + + + + + + + reverse + + reverse ( text ) + text + + + Reverses the order of the characters in the string. + + + reverse('abcde') + edcba + + + + + + + right + + right ( string text, + n integer ) + text + + + Returns last n characters in the string, + or when n is negative, returns all but + first |n| characters. + + + right('abcde', 2) + de + + + + + + + split_part + + split_part ( string text, + delimiter text, + n integer ) + text + + + Splits string at occurrences + of delimiter and returns + the n'th field (counting from one), + or when n is negative, returns + the |n|'th-from-last field. + + + split_part('abc~@~def~@~ghi', '~@~', 2) + def + + + split_part('abc,def,ghi,jkl', ',', -2) + ghi + + + + + + + starts_with + + starts_with ( string text, prefix text ) + boolean + + + Returns true if string starts + with prefix. + + + starts_with('alphabet', 'alph') + t + + + + + + + string_to_array + + string_to_array ( string text, delimiter text , null_string text ) + text[] + + + Splits the string at occurrences + of delimiter and forms the resulting fields + into a text array. + If delimiter is NULL, + each character in the string will become a + separate element in the array. + If delimiter is an empty string, then + the string is treated as a single field. + If null_string is supplied and is + not NULL, fields matching that string are + replaced by NULL. + See also array_to_string. + + + string_to_array('xx~~yy~~zz', '~~', 'yy') + {xx,NULL,zz} + + + + + + + string_to_table + + string_to_table ( string text, delimiter text , null_string text ) + setof text + + + Splits the string at occurrences + of delimiter and returns the resulting fields + as a set of text rows. + If delimiter is NULL, + each character in the string will become a + separate row of the result. + If delimiter is an empty string, then + the string is treated as a single field. + If null_string is supplied and is + not NULL, fields matching that string are + replaced by NULL. + + + string_to_table('xx~^~yy~^~zz', '~^~', 'yy') + + + xx + NULL + zz + + + + + + + + strpos + + strpos ( string text, substring text ) + integer + + + Returns first starting index of the specified substring + within string, or zero if it's not present. + (Same as position(substring in + string), but note the reversed + argument order.) + + + strpos('high', 'ig') + 2 + + + + + + + substr + + substr ( string text, start integer , count integer ) + text + + + Extracts the substring of string starting at + the start'th character, + and extending for count characters if that is + specified. (Same + as substring(string + from start + for count).) + + + substr('alphabet', 3) + phabet + + + substr('alphabet', 3, 2) + ph + + + + + + + to_ascii + + to_ascii ( string text ) + text + + + to_ascii ( string text, + encoding name ) + text + + + to_ascii ( string text, + encoding integer ) + text + + + Converts string to ASCII + from another encoding, which may be identified by name or number. + If encoding is omitted the database encoding + is assumed (which in practice is the only useful case). + The conversion consists primarily of dropping accents. + Conversion is only supported + from LATIN1, LATIN2, + LATIN9, and WIN1250 encodings. + (See the module for another, more flexible + solution.) + + + to_ascii('Karél') + Karel + + + + + + + to_bin + + to_bin ( integer ) + text + + + to_bin ( bigint ) + text + + + Converts the number to its equivalent two's complement binary + representation. + + + to_bin(2147483647) + 1111111111111111111111111111111 + + + to_bin(-1234) + 11111111111111111111101100101110 + + + + + + + to_hex + + to_hex ( integer ) + text + + + to_hex ( bigint ) + text + + + Converts the number to its equivalent two's complement hexadecimal + representation. + + + to_hex(2147483647) + 7fffffff + + + to_hex(-1234) + fffffb2e + + + + + + + to_oct + + to_oct ( integer ) + text + + + to_oct ( bigint ) + text + + + Converts the number to its equivalent two's complement octal + representation. + + + to_oct(2147483647) + 17777777777 + + + to_oct(-1234) + 37777775456 + + + + + + + translate + + translate ( string text, + from text, + to text ) + text + + + Replaces each character in string that + matches a character in the from set with the + corresponding character in the to + set. If from is longer than + to, occurrences of the extra characters in + from are deleted. + + + translate('12345', '143', 'ax') + a2x5 + + + + + + + unistr + + unistr ( text ) + text + + + Evaluate escaped Unicode characters in the argument. Unicode characters + can be specified as + \XXXX (4 hexadecimal + digits), \+XXXXXX (6 + hexadecimal digits), + \uXXXX (4 hexadecimal + digits), or \UXXXXXXXX + (8 hexadecimal digits). To specify a backslash, write two + backslashes. All other characters are taken literally. + + + + If the server encoding is not UTF-8, the Unicode code point identified + by one of these escape sequences is converted to the actual server + encoding; an error is reported if that's not possible. + + + + This function provides a (non-standard) alternative to string + constants with Unicode escapes (see ). + + + + unistr('d\0061t\+000061') + data + + + unistr('d\u0061t\U00000061') + data + + + + + +
+ + + The concat, concat_ws and + format functions are variadic, so it is possible to + pass the values to be concatenated or formatted as an array marked with + the VARIADIC keyword (see ). The array's elements are + treated as if they were separate ordinary arguments to the function. + If the variadic array argument is NULL, concat + and concat_ws return NULL, but + format treats a NULL as a zero-element array. + + + + See also the aggregate function string_agg in + , and the functions for + converting between strings and the bytea type in + . + + + + <function>format</function> + + + format + + + + The function format produces output formatted according to + a format string, in a style similar to the C function + sprintf. + + + + +format(formatstr text , formatarg "any" , ... ) + + formatstr is a format string that specifies how the + result should be formatted. Text in the format string is copied + directly to the result, except where format specifiers are + used. Format specifiers act as placeholders in the string, defining how + subsequent function arguments should be formatted and inserted into the + result. Each formatarg argument is converted to text + according to the usual output rules for its data type, and then formatted + and inserted into the result string according to the format specifier(s). + + + + Format specifiers are introduced by a % character and have + the form + +%[position][flags][width]type + + where the component fields are: + + + + position (optional) + + + A string of the form n$ where + n is the index of the argument to print. + Index 1 means the first argument after + formatstr. If the position is + omitted, the default is to use the next argument in sequence. + + + + + + flags (optional) + + + Additional options controlling how the format specifier's output is + formatted. Currently the only supported flag is a minus sign + (-) which will cause the format specifier's output to be + left-justified. This has no effect unless the width + field is also specified. + + + + + + width (optional) + + + Specifies the minimum number of characters to use to + display the format specifier's output. The output is padded on the + left or right (depending on the - flag) with spaces as + needed to fill the width. A too-small width does not cause + truncation of the output, but is simply ignored. The width may be + specified using any of the following: a positive integer; an + asterisk (*) to use the next function argument as the + width; or a string of the form *n$ to + use the nth function argument as the width. + + + + If the width comes from a function argument, that argument is + consumed before the argument that is used for the format specifier's + value. If the width argument is negative, the result is left + aligned (as if the - flag had been specified) within a + field of length abs(width). + + + + + + type (required) + + + The type of format conversion to use to produce the format + specifier's output. The following types are supported: + + + + s formats the argument value as a simple + string. A null value is treated as an empty string. + + + + + I treats the argument value as an SQL + identifier, double-quoting it if necessary. + It is an error for the value to be null (equivalent to + quote_ident). + + + + + L quotes the argument value as an SQL literal. + A null value is displayed as the string NULL, without + quotes (equivalent to quote_nullable). + + + + + + + + + + + In addition to the format specifiers described above, the special sequence + %% may be used to output a literal % character. + + + + Here are some examples of the basic format conversions: + + +SELECT format('Hello %s', 'World'); +Result: Hello World + +SELECT format('Testing %s, %s, %s, %%', 'one', 'two', 'three'); +Result: Testing one, two, three, % + +SELECT format('INSERT INTO %I VALUES(%L)', 'Foo bar', E'O\'Reilly'); +Result: INSERT INTO "Foo bar" VALUES('O''Reilly') + +SELECT format('INSERT INTO %I VALUES(%L)', 'locations', 'C:\Program Files'); +Result: INSERT INTO locations VALUES('C:\Program Files') + + + + + Here are examples using width fields + and the - flag: + + +SELECT format('|%10s|', 'foo'); +Result: | foo| + +SELECT format('|%-10s|', 'foo'); +Result: |foo | + +SELECT format('|%*s|', 10, 'foo'); +Result: | foo| + +SELECT format('|%*s|', -10, 'foo'); +Result: |foo | + +SELECT format('|%-*s|', 10, 'foo'); +Result: |foo | + +SELECT format('|%-*s|', -10, 'foo'); +Result: |foo | + + + + + These examples show use of position fields: + + +SELECT format('Testing %3$s, %2$s, %1$s', 'one', 'two', 'three'); +Result: Testing three, two, one + +SELECT format('|%*2$s|', 'foo', 10, 'bar'); +Result: | bar| + +SELECT format('|%1$*2$s|', 'foo', 10, 'bar'); +Result: | foo| + + + + + Unlike the standard C function sprintf, + PostgreSQL's format function allows format + specifiers with and without position fields to be mixed + in the same format string. A format specifier without a + position field always uses the next argument after the + last argument consumed. + In addition, the format function does not require all + function arguments to be used in the format string. + For example: + + +SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three'); +Result: Testing three, two, three + + + + + The %I and %L format specifiers are particularly + useful for safely constructing dynamic SQL statements. See + . + + + +
diff --git a/doc/src/sgml/func/func-subquery.sgml b/doc/src/sgml/func/func-subquery.sgml new file mode 100644 index 0000000000000..a9f2b12e48c66 --- /dev/null +++ b/doc/src/sgml/func/func-subquery.sgml @@ -0,0 +1,349 @@ + + Subquery Expressions + + + EXISTS + + + + IN + + + + NOT IN + + + + ANY + + + + ALL + + + + SOME + + + + subquery + + + + This section describes the SQL-compliant subquery + expressions available in PostgreSQL. + All of the expression forms documented in this section return + Boolean (true/false) results. + + + + <literal>EXISTS</literal> + + +EXISTS (subquery) + + + + The argument of EXISTS is an arbitrary SELECT statement, + or subquery. The + subquery is evaluated to determine whether it returns any rows. + If it returns at least one row, the result of EXISTS is + true; if the subquery returns no rows, the result of EXISTS + is false. + + + + The subquery can refer to variables from the surrounding query, + which will act as constants during any one evaluation of the subquery. + + + + The subquery will generally only be executed long enough to determine + whether at least one row is returned, not all the way to completion. + It is unwise to write a subquery that has side effects (such as + calling sequence functions); whether the side effects occur + might be unpredictable. + + + + Since the result depends only on whether any rows are returned, + and not on the contents of those rows, the output list of the + subquery is normally unimportant. A common coding convention is + to write all EXISTS tests in the form + EXISTS(SELECT 1 WHERE ...). There are exceptions to + this rule however, such as subqueries that use INTERSECT. + + + + This simple example is like an inner join on col2, but + it produces at most one output row for each tab1 row, + even if there are several matching tab2 rows: + +SELECT col1 +FROM tab1 +WHERE EXISTS (SELECT 1 FROM tab2 WHERE col2 = tab1.col2); + + + + + + <literal>IN</literal> + + +expression IN (subquery) + + + + The right-hand side is a parenthesized + subquery, which must return exactly one column. The left-hand expression + is evaluated and compared to each row of the subquery result. + The result of IN is true if any equal subquery row is found. + The result is false if no equal row is found (including the + case where the subquery returns no rows). + + + + Note that if the left-hand expression yields null, or if there are + no equal right-hand values and at least one right-hand row yields + null, the result of the IN construct will be null, not false. + This is in accordance with SQL's normal rules for Boolean combinations + of null values. + + + + As with EXISTS, it's unwise to assume that the subquery will + be evaluated completely. + + + +row_constructor IN (subquery) + + + + The left-hand side of this form of IN is a row constructor, + as described in . + The right-hand side is a parenthesized + subquery, which must return exactly as many columns as there are + expressions in the left-hand row. The left-hand expressions are + evaluated and compared row-wise to each row of the subquery result. + The result of IN is true if any equal subquery row is found. + The result is false if no equal row is found (including the + case where the subquery returns no rows). + + + + As usual, null values in the rows are combined per + the normal rules of SQL Boolean expressions. Two rows are considered + equal if all their corresponding members are non-null and equal; the rows + are unequal if any corresponding members are non-null and unequal; + otherwise the result of that row comparison is unknown (null). + If all the per-row results are either unequal or null, with at least one + null, then the result of IN is null. + + + + + <literal>NOT IN</literal> + + +expression NOT IN (subquery) + + + + The right-hand side is a parenthesized + subquery, which must return exactly one column. The left-hand expression + is evaluated and compared to each row of the subquery result. + The result of NOT IN is true if only unequal subquery rows + are found (including the case where the subquery returns no rows). + The result is false if any equal row is found. + + + + Note that if the left-hand expression yields null, or if there are + no equal right-hand values and at least one right-hand row yields + null, the result of the NOT IN construct will be null, not true. + This is in accordance with SQL's normal rules for Boolean combinations + of null values. + + + + As with EXISTS, it's unwise to assume that the subquery will + be evaluated completely. + + + +row_constructor NOT IN (subquery) + + + + The left-hand side of this form of NOT IN is a row constructor, + as described in . + The right-hand side is a parenthesized + subquery, which must return exactly as many columns as there are + expressions in the left-hand row. The left-hand expressions are + evaluated and compared row-wise to each row of the subquery result. + The result of NOT IN is true if only unequal subquery rows + are found (including the case where the subquery returns no rows). + The result is false if any equal row is found. + + + + As usual, null values in the rows are combined per + the normal rules of SQL Boolean expressions. Two rows are considered + equal if all their corresponding members are non-null and equal; the rows + are unequal if any corresponding members are non-null and unequal; + otherwise the result of that row comparison is unknown (null). + If all the per-row results are either unequal or null, with at least one + null, then the result of NOT IN is null. + + + + + <literal>ANY</literal>/<literal>SOME</literal> + + +expression operator ANY (subquery) +expression operator SOME (subquery) + + + + The right-hand side is a parenthesized + subquery, which must return exactly one column. The left-hand expression + is evaluated and compared to each row of the subquery result using the + given operator, which must yield a Boolean + result. + The result of ANY is true if any true result is obtained. + The result is false if no true result is found (including the + case where the subquery returns no rows). + + + + SOME is a synonym for ANY. + IN is equivalent to = ANY. + + + + Note that if there are no successes and at least one right-hand row yields + null for the operator's result, the result of the ANY construct + will be null, not false. + This is in accordance with SQL's normal rules for Boolean combinations + of null values. + + + + As with EXISTS, it's unwise to assume that the subquery will + be evaluated completely. + + + +row_constructor operator ANY (subquery) +row_constructor operator SOME (subquery) + + + + The left-hand side of this form of ANY is a row constructor, + as described in . + The right-hand side is a parenthesized + subquery, which must return exactly as many columns as there are + expressions in the left-hand row. The left-hand expressions are + evaluated and compared row-wise to each row of the subquery result, + using the given operator. + The result of ANY is true if the comparison + returns true for any subquery row. + The result is false if the comparison returns false for every + subquery row (including the case where the subquery returns no + rows). + The result is NULL if no comparison with a subquery row returns true, + and at least one comparison returns NULL. + + + + See for details about the meaning + of a row constructor comparison. + + + + + <literal>ALL</literal> + + +expression operator ALL (subquery) + + + + The right-hand side is a parenthesized + subquery, which must return exactly one column. The left-hand expression + is evaluated and compared to each row of the subquery result using the + given operator, which must yield a Boolean + result. + The result of ALL is true if all rows yield true + (including the case where the subquery returns no rows). + The result is false if any false result is found. + The result is NULL if no comparison with a subquery row returns false, + and at least one comparison returns NULL. + + + + NOT IN is equivalent to <> ALL. + + + + As with EXISTS, it's unwise to assume that the subquery will + be evaluated completely. + + + +row_constructor operator ALL (subquery) + + + + The left-hand side of this form of ALL is a row constructor, + as described in . + The right-hand side is a parenthesized + subquery, which must return exactly as many columns as there are + expressions in the left-hand row. The left-hand expressions are + evaluated and compared row-wise to each row of the subquery result, + using the given operator. + The result of ALL is true if the comparison + returns true for all subquery rows (including the + case where the subquery returns no rows). + The result is false if the comparison returns false for any + subquery row. + The result is NULL if no comparison with a subquery row returns false, + and at least one comparison returns NULL. + + + + See for details about the meaning + of a row constructor comparison. + + + + + Single-Row Comparison + + + comparison + subquery result row + + + +row_constructor operator (subquery) + + + + The left-hand side is a row constructor, + as described in . + The right-hand side is a parenthesized subquery, which must return exactly + as many columns as there are expressions in the left-hand row. Furthermore, + the subquery cannot return more than one row. (If it returns zero rows, + the result is taken to be null.) The left-hand side is evaluated and + compared row-wise to the single subquery result row. + + + + See for details about the meaning + of a row constructor comparison. + + + diff --git a/doc/src/sgml/func/func-textsearch.sgml b/doc/src/sgml/func/func-textsearch.sgml new file mode 100644 index 0000000000000..a06a58f14983a --- /dev/null +++ b/doc/src/sgml/func/func-textsearch.sgml @@ -0,0 +1,1046 @@ + + Text Search Functions and Operators + + + full text search + functions and operators + + + + text search + functions and operators + + + + , + and + + summarize the functions and operators that are provided + for full text searching. See for a detailed + explanation of PostgreSQL's text search + facility. + + + + Text Search Operators + + + + + Operator + + + Description + + + Example(s) + + + + + + + + tsvector @@ tsquery + boolean + + + tsquery @@ tsvector + boolean + + + Does tsvector match tsquery? + (The arguments can be given in either order.) + + + to_tsvector('fat cats ate rats') @@ to_tsquery('cat & rat') + t + + + + + + text @@ tsquery + boolean + + + Does text string, after implicit invocation + of to_tsvector(), match tsquery? + + + 'fat cats ate rats' @@ to_tsquery('cat & rat') + t + + + + + + tsvector || tsvector + tsvector + + + Concatenates two tsvectors. If both inputs contain + lexeme positions, the second input's positions are adjusted + accordingly. + + + 'a:1 b:2'::tsvector || 'c:1 d:2 b:3'::tsvector + 'a':1 'b':2,5 'c':3 'd':4 + + + + + + tsquery && tsquery + tsquery + + + ANDs two tsquerys together, producing a query that + matches documents that match both input queries. + + + 'fat | rat'::tsquery && 'cat'::tsquery + ( 'fat' | 'rat' ) & 'cat' + + + + + + tsquery || tsquery + tsquery + + + ORs two tsquerys together, producing a query that + matches documents that match either input query. + + + 'fat | rat'::tsquery || 'cat'::tsquery + 'fat' | 'rat' | 'cat' + + + + + + !! tsquery + tsquery + + + Negates a tsquery, producing a query that matches + documents that do not match the input query. + + + !! 'cat'::tsquery + !'cat' + + + + + + tsquery <-> tsquery + tsquery + + + Constructs a phrase query, which matches if the two input queries + match at successive lexemes. + + + to_tsquery('fat') <-> to_tsquery('rat') + 'fat' <-> 'rat' + + + + + + tsquery @> tsquery + boolean + + + Does first tsquery contain the second? (This considers + only whether all the lexemes appearing in one query appear in the + other, ignoring the combining operators.) + + + 'cat'::tsquery @> 'cat & rat'::tsquery + f + + + + + + tsquery <@ tsquery + boolean + + + Is first tsquery contained in the second? (This + considers only whether all the lexemes appearing in one query appear + in the other, ignoring the combining operators.) + + + 'cat'::tsquery <@ 'cat & rat'::tsquery + t + + + 'cat'::tsquery <@ '!cat & rat'::tsquery + t + + + + +
+ + + In addition to these specialized operators, the usual comparison + operators shown in are + available for types tsvector and tsquery. + These are not very + useful for text searching but allow, for example, unique indexes to be + built on columns of these types. + + + + Text Search Functions + + + + + Function + + + Description + + + Example(s) + + + + + + + + + array_to_tsvector + + array_to_tsvector ( text[] ) + tsvector + + + Converts an array of text strings to a tsvector. + The given strings are used as lexemes as-is, without further + processing. Array elements must not be empty strings + or NULL. + + + array_to_tsvector('{fat,cat,rat}'::text[]) + 'cat' 'fat' 'rat' + + + + + + + get_current_ts_config + + get_current_ts_config ( ) + regconfig + + + Returns the OID of the current default text search configuration + (as set by ). + + + get_current_ts_config() + english + + + + + + + length + + length ( tsvector ) + integer + + + Returns the number of lexemes in the tsvector. + + + length('fat:2,4 cat:3 rat:5A'::tsvector) + 3 + + + + + + + numnode + + numnode ( tsquery ) + integer + + + Returns the number of lexemes plus operators in + the tsquery. + + + numnode('(fat & rat) | cat'::tsquery) + 5 + + + + + + + plainto_tsquery + + plainto_tsquery ( + config regconfig, + query text ) + tsquery + + + Converts text to a tsquery, normalizing words according to + the specified or default configuration. Any punctuation in the string + is ignored (it does not determine query operators). The resulting + query matches documents containing all non-stopwords in the text. + + + plainto_tsquery('english', 'The Fat Rats') + 'fat' & 'rat' + + + + + + + phraseto_tsquery + + phraseto_tsquery ( + config regconfig, + query text ) + tsquery + + + Converts text to a tsquery, normalizing words according to + the specified or default configuration. Any punctuation in the string + is ignored (it does not determine query operators). The resulting + query matches phrases containing all non-stopwords in the text. + + + phraseto_tsquery('english', 'The Fat Rats') + 'fat' <-> 'rat' + + + phraseto_tsquery('english', 'The Cat and Rats') + 'cat' <2> 'rat' + + + + + + + websearch_to_tsquery + + websearch_to_tsquery ( + config regconfig, + query text ) + tsquery + + + Converts text to a tsquery, normalizing words according + to the specified or default configuration. Quoted word sequences are + converted to phrase tests. The word or is understood + as producing an OR operator, and a dash produces a NOT operator; + other punctuation is ignored. + This approximates the behavior of some common web search tools. + + + websearch_to_tsquery('english', '"fat rat" or cat dog') + 'fat' <-> 'rat' | 'cat' & 'dog' + + + + + + + querytree + + querytree ( tsquery ) + text + + + Produces a representation of the indexable portion of + a tsquery. A result that is empty or + just T indicates a non-indexable query. + + + querytree('foo & ! bar'::tsquery) + 'foo' + + + + + + + setweight + + setweight ( vector tsvector, weight "char" ) + tsvector + + + Assigns the specified weight to each element + of the vector. + + + setweight('fat:2,4 cat:3 rat:5B'::tsvector, 'A') + 'cat':3A 'fat':2A,4A 'rat':5A + + + + + + + setweight + setweight for specific lexeme(s) + + setweight ( vector tsvector, weight "char", lexemes text[] ) + tsvector + + + Assigns the specified weight to elements + of the vector that are listed + in lexemes. + The strings in lexemes are taken as lexemes + as-is, without further processing. Strings that do not match any + lexeme in vector are ignored. + + + setweight('fat:2,4 cat:3 rat:5,6B'::tsvector, 'A', '{cat,rat}') + 'cat':3A 'fat':2,4 'rat':5A,6A + + + + + + + strip + + strip ( tsvector ) + tsvector + + + Removes positions and weights from the tsvector. + + + strip('fat:2,4 cat:3 rat:5A'::tsvector) + 'cat' 'fat' 'rat' + + + + + + + to_tsquery + + to_tsquery ( + config regconfig, + query text ) + tsquery + + + Converts text to a tsquery, normalizing words according to + the specified or default configuration. The words must be combined + by valid tsquery operators. + + + to_tsquery('english', 'The & Fat & Rats') + 'fat' & 'rat' + + + + + + + to_tsvector + + to_tsvector ( + config regconfig, + document text ) + tsvector + + + Converts text to a tsvector, normalizing words according + to the specified or default configuration. Position information is + included in the result. + + + to_tsvector('english', 'The Fat Rats') + 'fat':2 'rat':3 + + + + + + to_tsvector ( + config regconfig, + document json ) + tsvector + + + to_tsvector ( + config regconfig, + document jsonb ) + tsvector + + + Converts each string value in the JSON document to + a tsvector, normalizing words according to the specified + or default configuration. The results are then concatenated in + document order to produce the output. Position information is + generated as though one stopword exists between each pair of string + values. (Beware that document order of the fields of a + JSON object is implementation-dependent when the input + is jsonb; observe the difference in the examples.) + + + to_tsvector('english', '{"aa": "The Fat Rats", "b": "dog"}'::json) + 'dog':5 'fat':2 'rat':3 + + + to_tsvector('english', '{"aa": "The Fat Rats", "b": "dog"}'::jsonb) + 'dog':1 'fat':4 'rat':5 + + + + + + + json_to_tsvector + + json_to_tsvector ( + config regconfig, + document json, + filter jsonb ) + tsvector + + + + jsonb_to_tsvector + + jsonb_to_tsvector ( + config regconfig, + document jsonb, + filter jsonb ) + tsvector + + + Selects each item in the JSON document that is requested by + the filter and converts each one to + a tsvector, normalizing words according to the specified + or default configuration. The results are then concatenated in + document order to produce the output. Position information is + generated as though one stopword exists between each pair of selected + items. (Beware that document order of the fields of a + JSON object is implementation-dependent when the input + is jsonb.) + The filter must be a jsonb + array containing zero or more of these keywords: + "string" (to include all string values), + "numeric" (to include all numeric values), + "boolean" (to include all boolean values), + "key" (to include all keys), or + "all" (to include all the above). + As a special case, the filter can also be a + simple JSON value that is one of these keywords. + + + json_to_tsvector('english', '{"a": "The Fat Rats", "b": 123}'::json, '["string", "numeric"]') + '123':5 'fat':2 'rat':3 + + + json_to_tsvector('english', '{"cat": "The Fat Rats", "dog": 123}'::json, '"all"') + '123':9 'cat':1 'dog':7 'fat':4 'rat':5 + + + + + + + ts_delete + + ts_delete ( vector tsvector, lexeme text ) + tsvector + + + Removes any occurrence of the given lexeme + from the vector. + The lexeme string is treated as a lexeme as-is, + without further processing. + + + ts_delete('fat:2,4 cat:3 rat:5A'::tsvector, 'fat') + 'cat':3 'rat':5A + + + + + + ts_delete ( vector tsvector, lexemes text[] ) + tsvector + + + Removes any occurrences of the lexemes + in lexemes + from the vector. + The strings in lexemes are taken as lexemes + as-is, without further processing. Strings that do not match any + lexeme in vector are ignored. + + + ts_delete('fat:2,4 cat:3 rat:5A'::tsvector, ARRAY['fat','rat']) + 'cat':3 + + + + + + + ts_filter + + ts_filter ( vector tsvector, weights "char"[] ) + tsvector + + + Selects only elements with the given weights + from the vector. + + + ts_filter('fat:2,4 cat:3b,7c rat:5A'::tsvector, '{a,b}') + 'cat':3B 'rat':5A + + + + + + + ts_headline + + ts_headline ( + config regconfig, + document text, + query tsquery + , options text ) + text + + + Displays, in an abbreviated form, the match(es) for + the query in + the document, which must be raw text not + a tsvector. Words in the document are normalized + according to the specified or default configuration before matching to + the query. Use of this function is discussed in + , which also describes the + available options. + + + ts_headline('The fat cat ate the rat.', 'cat') + The fat <b>cat</b> ate the rat. + + + + + + ts_headline ( + config regconfig, + document json, + query tsquery + , options text ) + text + + + ts_headline ( + config regconfig, + document jsonb, + query tsquery + , options text ) + text + + + Displays, in an abbreviated form, match(es) for + the query that occur in string values + within the JSON document. + See for more details. + + + ts_headline('{"cat":"raining cats and dogs"}'::jsonb, 'cat') + {"cat": "raining <b>cats</b> and dogs"} + + + + + + + ts_rank + + ts_rank ( + weights real[], + vector tsvector, + query tsquery + , normalization integer ) + real + + + Computes a score showing how well + the vector matches + the query. See + for details. + + + ts_rank(to_tsvector('raining cats and dogs'), 'cat') + 0.06079271 + + + + + + + ts_rank_cd + + ts_rank_cd ( + weights real[], + vector tsvector, + query tsquery + , normalization integer ) + real + + + Computes a score showing how well + the vector matches + the query, using a cover density + algorithm. See for details. + + + ts_rank_cd(to_tsvector('raining cats and dogs'), 'cat') + 0.1 + + + + + + + ts_rewrite + + ts_rewrite ( query tsquery, + target tsquery, + substitute tsquery ) + tsquery + + + Replaces occurrences of target + with substitute + within the query. + See for details. + + + ts_rewrite('a & b'::tsquery, 'a'::tsquery, 'foo|bar'::tsquery) + 'b' & ( 'foo' | 'bar' ) + + + + + + ts_rewrite ( query tsquery, + select text ) + tsquery + + + Replaces portions of the query according to + target(s) and substitute(s) obtained by executing + a SELECT command. + See for details. + + + SELECT ts_rewrite('a & b'::tsquery, 'SELECT t,s FROM aliases') + 'b' & ( 'foo' | 'bar' ) + + + + + + + tsquery_phrase + + tsquery_phrase ( query1 tsquery, query2 tsquery ) + tsquery + + + Constructs a phrase query that searches + for matches of query1 + and query2 at successive lexemes (same + as <-> operator). + + + tsquery_phrase(to_tsquery('fat'), to_tsquery('cat')) + 'fat' <-> 'cat' + + + + + + tsquery_phrase ( query1 tsquery, query2 tsquery, distance integer ) + tsquery + + + Constructs a phrase query that searches + for matches of query1 and + query2 that occur exactly + distance lexemes apart. + + + tsquery_phrase(to_tsquery('fat'), to_tsquery('cat'), 10) + 'fat' <10> 'cat' + + + + + + + tsvector_to_array + + tsvector_to_array ( tsvector ) + text[] + + + Converts a tsvector to an array of lexemes. + + + tsvector_to_array('fat:2,4 cat:3 rat:5A'::tsvector) + {cat,fat,rat} + + + + + + + unnest + for tsvector + + unnest ( tsvector ) + setof record + ( lexeme text, + positions smallint[], + weights text ) + + + Expands a tsvector into a set of rows, one per lexeme. + + + select * from unnest('cat:3 fat:2,4 rat:5A'::tsvector) + + + lexeme | positions | weights +--------+-----------+--------- + cat | {3} | {D} + fat | {2,4} | {D,D} + rat | {5} | {A} + + + + + +
+ + + + All the text search functions that accept an optional regconfig + argument will use the configuration specified by + + when that argument is omitted. + + + + + The functions in + + are listed separately because they are not usually used in everyday text + searching operations. They are primarily helpful for development and + debugging of new text search configurations. + + + + Text Search Debugging Functions + + + + + Function + + + Description + + + Example(s) + + + + + + + + + ts_debug + + ts_debug ( + config regconfig, + document text ) + setof record + ( alias text, + description text, + token text, + dictionaries regdictionary[], + dictionary regdictionary, + lexemes text[] ) + + + Extracts and normalizes tokens from + the document according to the specified or + default text search configuration, and returns information about how + each token was processed. + See for details. + + + ts_debug('english', 'The Brightest supernovaes') + (asciiword,"Word, all ASCII",The,{english_stem},english_stem,{}) ... + + + + + + + ts_lexize + + ts_lexize ( dict regdictionary, token text ) + text[] + + + Returns an array of replacement lexemes if the input token is known to + the dictionary, or an empty array if the token is known to the + dictionary but it is a stop word, or NULL if it is not a known word. + See for details. + + + ts_lexize('english_stem', 'stars') + {star} + + + + + + + ts_parse + + ts_parse ( parser_name text, + document text ) + setof record + ( tokid integer, + token text ) + + + Extracts tokens from the document using the + named parser. + See for details. + + + ts_parse('default', 'foo - bar') + (1,foo) ... + + + + + + ts_parse ( parser_oid oid, + document text ) + setof record + ( tokid integer, + token text ) + + + Extracts tokens from the document using a + parser specified by OID. + See for details. + + + ts_parse(3722, 'foo - bar') + (1,foo) ... + + + + + + + ts_token_type + + ts_token_type ( parser_name text ) + setof record + ( tokid integer, + alias text, + description text ) + + + Returns a table that describes each type of token the named parser can + recognize. + See for details. + + + ts_token_type('default') + (1,asciiword,"Word, all ASCII") ... + + + + + + ts_token_type ( parser_oid oid ) + setof record + ( tokid integer, + alias text, + description text ) + + + Returns a table that describes each type of token a parser specified + by OID can recognize. + See for details. + + + ts_token_type(3722) + (1,asciiword,"Word, all ASCII") ... + + + + + + + ts_stat + + ts_stat ( sqlquery text + , weights text ) + setof record + ( word text, + ndoc integer, + nentry integer ) + + + Executes the sqlquery, which must return a + single tsvector column, and returns statistics about each + distinct lexeme contained in the data. + See for details. + + + ts_stat('SELECT vector FROM apod') + (foo,10,15) ... + + + + +
+ +
diff --git a/doc/src/sgml/func/func-trigger.sgml b/doc/src/sgml/func/func-trigger.sgml new file mode 100644 index 0000000000000..94b40adbdb84a --- /dev/null +++ b/doc/src/sgml/func/func-trigger.sgml @@ -0,0 +1,135 @@ + + Trigger Functions + + + While many uses of triggers involve user-written trigger functions, + PostgreSQL provides a few built-in trigger + functions that can be used directly in user-defined triggers. These + are summarized in . + (Additional built-in trigger functions exist, which implement foreign + key constraints and deferred index constraints. Those are not documented + here since users need not use them directly.) + + + + For more information about creating triggers, see + . + + + + Built-In Trigger Functions + + + + + Function + + + Description + + + Example Usage + + + + + + + + + suppress_redundant_updates_trigger + + suppress_redundant_updates_trigger ( ) + trigger + + + Suppresses do-nothing update operations. See below for details. + + + CREATE TRIGGER ... suppress_redundant_updates_trigger() + + + + + + + tsvector_update_trigger + + tsvector_update_trigger ( ) + trigger + + + Automatically updates a tsvector column from associated + plain-text document column(s). The text search configuration to use + is specified by name as a trigger argument. See + for details. + + + CREATE TRIGGER ... tsvector_update_trigger(tsvcol, 'pg_catalog.swedish', title, body) + + + + + + + tsvector_update_trigger_column + + tsvector_update_trigger_column ( ) + trigger + + + Automatically updates a tsvector column from associated + plain-text document column(s). The text search configuration to use + is taken from a regconfig column of the table. See + for details. + + + CREATE TRIGGER ... tsvector_update_trigger_column(tsvcol, tsconfigcol, title, body) + + + + +
+ + + The suppress_redundant_updates_trigger function, + when applied as a row-level BEFORE UPDATE trigger, + will prevent any update that does not actually change the data in the + row from taking place. This overrides the normal behavior which always + performs a physical row update + regardless of whether or not the data has changed. (This normal behavior + makes updates run faster, since no checking is required, and is also + useful in certain cases.) + + + + Ideally, you should avoid running updates that don't actually + change the data in the record. Redundant updates can cost considerable + unnecessary time, especially if there are lots of indexes to alter, + and space in dead rows that will eventually have to be vacuumed. + However, detecting such situations in client code is not + always easy, or even possible, and writing expressions to detect + them can be error-prone. An alternative is to use + suppress_redundant_updates_trigger, which will skip + updates that don't change the data. You should use this with care, + however. The trigger takes a small but non-trivial time for each record, + so if most of the records affected by updates do actually change, + use of this trigger will make updates run slower on average. + + + + The suppress_redundant_updates_trigger function can be + added to a table like this: + +CREATE TRIGGER z_min_update +BEFORE UPDATE ON tablename +FOR EACH ROW EXECUTE FUNCTION suppress_redundant_updates_trigger(); + + In most cases, you need to fire this trigger last for each row, so that + it does not override other triggers that might wish to alter the row. + Bearing in mind that triggers fire in name order, you would therefore + choose a trigger name that comes after the name of any other trigger + you might have on the table. (Hence the z prefix in the + example.) + +
diff --git a/doc/src/sgml/func/func-uuid.sgml b/doc/src/sgml/func/func-uuid.sgml new file mode 100644 index 0000000000000..65c5ddec6b7dc --- /dev/null +++ b/doc/src/sgml/func/func-uuid.sgml @@ -0,0 +1,188 @@ + + UUID Functions + + + UUID + generating + + + + gen_random_uuid + + + + uuidv4 + + + + uuidv7 + + + + uuid_extract_timestamp + + + + uuid_extract_version + + + + shows the PostgreSQL + functions that can be used to generate UUIDs. + + + + <acronym>UUID</acronym> Generation Functions + + + + + + Function + + + Description + + + Example(s) + + + + + + + + + + gen_random_uuid + uuid + + + uuidv4 + uuid + + + Generate a version 4 (random) UUID. + + + gen_random_uuid() + 5b30857f-0bfa-48b5-ac0b-5c64e28078d1 + + + uuidv4() + b42410ee-132f-42ee-9e4f-09a6485c95b8 + + + + + + + uuidv7 + ( shift interval ) + uuid + + + Generate a version 7 (time-ordered) UUID. The timestamp is computed using UNIX timestamp + with millisecond precision + sub-millisecond timestamp + random. The optional parameter + shift will shift the computed timestamp by the given interval. + + + uuidv7() + 019535d9-3df7-79fb-b466-fa907fa17f9e + + + + + +
+ + + + The module provides additional functions that + implement other standard algorithms for generating UUIDs. + + + + + shows the PostgreSQL + functions that can be used to extract information from UUIDs. + + + + <acronym>UUID</acronym> Extraction Functions + + + + + + Function + + + Description + + + Example(s) + + + + + + + + + + uuid_extract_timestamp + ( uuid ) + timestamp with time zone + + + Extracts a timestamp with time zone from UUID + version 1 and 7. For other versions, this function returns null. Note that + the extracted timestamp is not necessarily exactly equal to the time the + UUID was generated; this depends on the implementation that generated the + UUID. + + + uuid_extract_timestamp('019535d9-3df7-79fb-b466-&zwsp;fa907fa17f9e'::uuid) + 2025-02-23 21:46:24.503-05 + + + + + + + uuid_extract_version + ( uuid ) + smallint + + + Extracts the version from a UUID of the variant described by + RFC 9562. For + other variants, this function returns null. For example, for a UUID + generated by gen_random_uuid, this function will + return 4. + + + uuid_extract_version('41db1265-8bc1-4ab3-992f-&zwsp;885799a4af1d'::uuid) + 4 + + + uuid_extract_version('019535d9-3df7-79fb-b466-&zwsp;fa907fa17f9e'::uuid) + 7 + + + + + +
+ + + PostgreSQL also provides the usual comparison + operators shown in for + UUIDs. + + + See for details on the data type + uuid in PostgreSQL. + +
diff --git a/doc/src/sgml/func/func-window.sgml b/doc/src/sgml/func/func-window.sgml new file mode 100644 index 0000000000000..cce0165b9526e --- /dev/null +++ b/doc/src/sgml/func/func-window.sgml @@ -0,0 +1,284 @@ + + Window Functions + + + window function + built-in + + + + Window functions provide the ability to perform + calculations across sets of rows that are related to the current query + row. See for an introduction to this + feature, and for syntax + details. + + + + The built-in window functions are listed in + . Note that these functions + must be invoked using window function syntax, i.e., an + OVER clause is required. + + + + In addition to these functions, any built-in or user-defined + ordinary aggregate (i.e., not ordered-set or hypothetical-set aggregates) + can be used as a window function; see + for a list of the built-in aggregates. + Aggregate functions act as window functions only when an OVER + clause follows the call; otherwise they act as plain aggregates + and return a single row for the entire set. + + + + General-Purpose Window Functions + + + + + Function + + + Description + + + + + + + + + row_number + + row_number () + bigint + + + Returns the number of the current row within its partition, counting + from 1. + + + + + + + rank + + rank () + bigint + + + Returns the rank of the current row, with gaps; that is, + the row_number of the first row in its peer + group. + + + + + + + dense_rank + + dense_rank () + bigint + + + Returns the rank of the current row, without gaps; this function + effectively counts peer groups. + + + + + + + percent_rank + + percent_rank () + double precision + + + Returns the relative rank of the current row, that is + (rank - 1) / (total partition rows - 1). + The value thus ranges from 0 to 1 inclusive. + + + + + + + cume_dist + + cume_dist () + double precision + + + Returns the cumulative distribution, that is (number of partition rows + preceding or peers with current row) / (total partition rows). + The value thus ranges from 1/N to 1. + + + + + + + ntile + + ntile ( num_buckets integer ) + integer + + + Returns an integer ranging from 1 to the argument value, dividing the + partition as equally as possible. + + + + + + + lag + + lag ( value anycompatible + , offset integer + , default anycompatible ) + anycompatible + + + Returns value evaluated at + the row that is offset + rows before the current row within the partition; if there is no such + row, instead returns default + (which must be of a type compatible with + value). + Both offset and + default are evaluated + with respect to the current row. If omitted, + offset defaults to 1 and + default to NULL. + + + + + + + lead + + lead ( value anycompatible + , offset integer + , default anycompatible ) + anycompatible + + + Returns value evaluated at + the row that is offset + rows after the current row within the partition; if there is no such + row, instead returns default + (which must be of a type compatible with + value). + Both offset and + default are evaluated + with respect to the current row. If omitted, + offset defaults to 1 and + default to NULL. + + + + + + + first_value + + first_value ( value anyelement ) + anyelement + + + Returns value evaluated + at the row that is the first row of the window frame. + + + + + + + last_value + + last_value ( value anyelement ) + anyelement + + + Returns value evaluated + at the row that is the last row of the window frame. + + + + + + + nth_value + + nth_value ( value anyelement, n integer ) + anyelement + + + Returns value evaluated + at the row that is the n'th + row of the window frame (counting from 1); + returns NULL if there is no such row. + + + + +
+ + + All of the functions listed in + depend on the sort ordering + specified by the ORDER BY clause of the associated window + definition. Rows that are not distinct when considering only the + ORDER BY columns are said to be peers. + The four ranking functions (including cume_dist) are + defined so that they give the same answer for all rows of a peer group. + + + + Note that first_value, last_value, and + nth_value consider only the rows within the window + frame, which by default contains the rows from the start of the + partition through the last peer of the current row. This is + likely to give unhelpful results for last_value and + sometimes also nth_value. You can redefine the frame by + adding a suitable frame specification (RANGE, + ROWS or GROUPS) to + the OVER clause. + See for more information + about frame specifications. + + + + When an aggregate function is used as a window function, it aggregates + over the rows within the current row's window frame. + An aggregate used with ORDER BY and the default window frame + definition produces a running sum type of behavior, which may or + may not be what's wanted. To obtain + aggregation over the whole partition, omit ORDER BY or use + ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING. + Other frame specifications can be used to obtain other effects. + + + + + The SQL standard defines a RESPECT NULLS or + IGNORE NULLS option for lead, lag, + first_value, last_value, and + nth_value. This is not implemented in + PostgreSQL: the behavior is always the + same as the standard's default, namely RESPECT NULLS. + Likewise, the standard's FROM FIRST or FROM LAST + option for nth_value is not implemented: only the + default FROM FIRST behavior is supported. (You can achieve + the result of FROM LAST by reversing the ORDER BY + ordering.) + + + +
diff --git a/doc/src/sgml/func/func-xml.sgml b/doc/src/sgml/func/func-xml.sgml new file mode 100644 index 0000000000000..21f34467a4f8a --- /dev/null +++ b/doc/src/sgml/func/func-xml.sgml @@ -0,0 +1,1283 @@ + + + XML Functions + + + XML Functions + + + + The functions and function-like expressions described in this + section operate on values of type xml. See for information about the xml + type. The function-like expressions xmlparse + and xmlserialize for converting to and from + type xml are documented there, not in this section. + + + + Use of most of these functions + requires PostgreSQL to have been built + with configure --with-libxml. + + + + Producing XML Content + + + A set of functions and function-like expressions is available for + producing XML content from SQL data. As such, they are + particularly suitable for formatting query results into XML + documents for processing in client applications. + + + + <literal>xmltext</literal> + + + xmltext + + + +xmltext ( text ) xml + + + + The function xmltext returns an XML value with a single + text node containing the input argument as its content. Predefined entities + like ampersand (), left and right angle brackets + (]]>), and quotation marks () + are escaped. + + + + Example: +'); + xmltext +------------------------- + < foo & bar > +]]> + + + + + <literal>xmlcomment</literal> + + + xmlcomment + + + +xmlcomment ( text ) xml + + + + The function xmlcomment creates an XML value + containing an XML comment with the specified text as content. + The text cannot contain -- or end with a + -, otherwise the resulting construct + would not be a valid XML comment. + If the argument is null, the result is null. + + + + Example: + +]]> + + + + + <literal>xmlconcat</literal> + + + xmlconcat + + + +xmlconcat ( xml , ... ) xml + + + + The function xmlconcat concatenates a list + of individual XML values to create a single value containing an + XML content fragment. Null values are omitted; the result is + only null if there are no nonnull arguments. + + + + Example: +', 'foo'); + + xmlconcat +---------------------- + foo +]]> + + + + XML declarations, if present, are combined as follows. If all + argument values have the same XML version declaration, that + version is used in the result, else no version is used. If all + argument values have the standalone declaration value + yes, then that value is used in the result. If + all argument values have a standalone declaration value and at + least one is no, then that is used in the result. + Else the result will have no standalone declaration. If the + result is determined to require a standalone declaration but no + version declaration, a version declaration with version 1.0 will + be used because XML requires an XML declaration to contain a + version declaration. Encoding declarations are ignored and + removed in all cases. + + + + Example: +', ''); + + xmlconcat +----------------------------------- + +]]> + + + + + <literal>xmlelement</literal> + + + xmlelement + + + +xmlelement ( NAME name , XMLATTRIBUTES ( attvalue AS attname , ... ) , content , ... ) xml + + + + The xmlelement expression produces an XML + element with the given name, attributes, and content. + The name + and attname items shown in the syntax are + simple identifiers, not values. The attvalue + and content items are expressions, which can + yield any PostgreSQL data type. The + argument(s) within XMLATTRIBUTES generate attributes + of the XML element; the content value(s) are + concatenated to form its content. + + + + Examples: + + +SELECT xmlelement(name foo, xmlattributes('xyz' as bar)); + + xmlelement +------------------ + + +SELECT xmlelement(name foo, xmlattributes(current_date as bar), 'cont', 'ent'); + + xmlelement +------------------------------------- + content +]]> + + + + Element and attribute names that are not valid XML names are + escaped by replacing the offending characters by the sequence + _xHHHH_, where + HHHH is the character's Unicode + codepoint in hexadecimal notation. For example: + +]]> + + + + An explicit attribute name need not be specified if the attribute + value is a column reference, in which case the column's name will + be used as the attribute name by default. In other cases, the + attribute must be given an explicit name. So this example is + valid: + +CREATE TABLE test (a xml, b xml); +SELECT xmlelement(name test, xmlattributes(a, b)) FROM test; + + But these are not: + +SELECT xmlelement(name test, xmlattributes('constant'), a, b) FROM test; +SELECT xmlelement(name test, xmlattributes(func(a, b))) FROM test; + + + + + Element content, if specified, will be formatted according to + its data type. If the content is itself of type xml, + complex XML documents can be constructed. For example: + +]]> + + Content of other types will be formatted into valid XML character + data. This means in particular that the characters <, >, + and & will be converted to entities. Binary data (data type + bytea) will be represented in base64 or hex + encoding, depending on the setting of the configuration parameter + . The particular behavior for + individual data types is expected to evolve in order to align the + PostgreSQL mappings with those specified in SQL:2006 and later, + as discussed in . + + + + + <literal>xmlforest</literal> + + + xmlforest + + + +xmlforest ( content AS name , ... ) xml + + + + The xmlforest expression produces an XML + forest (sequence) of elements using the given names and content. + As for xmlelement, + each name must be a simple identifier, while + the content expressions can have any data + type. + + + + Examples: + +SELECT xmlforest('abc' AS foo, 123 AS bar); + + xmlforest +------------------------------ + <foo>abc</foo><bar>123</bar> + + +SELECT xmlforest(table_name, column_name) +FROM information_schema.columns +WHERE table_schema = 'pg_catalog'; + + xmlforest +------------------------------------&zwsp;----------------------------------- + <table_name>pg_authid</table_name>&zwsp;<column_name>rolname</column_name> + <table_name>pg_authid</table_name>&zwsp;<column_name>rolsuper</column_name> + ... + + + As seen in the second example, the element name can be omitted if + the content value is a column reference, in which case the column + name is used by default. Otherwise, a name must be specified. + + + + Element names that are not valid XML names are escaped as shown + for xmlelement above. Similarly, content + data is escaped to make valid XML content, unless it is already + of type xml. + + + + Note that XML forests are not valid XML documents if they consist + of more than one element, so it might be useful to wrap + xmlforest expressions in + xmlelement. + + + + + <literal>xmlpi</literal> + + + xmlpi + + + +xmlpi ( NAME name , content ) xml + + + + The xmlpi expression creates an XML + processing instruction. + As for xmlelement, + the name must be a simple identifier, while + the content expression can have any data type. + The content, if present, must not contain the + character sequence ?>. + + + + Example: + +]]> + + + + + <literal>xmlroot</literal> + + + xmlroot + + + +xmlroot ( xml, VERSION {text|NO VALUE} , STANDALONE {YES|NO|NO VALUE} ) xml + + + + The xmlroot expression alters the properties + of the root node of an XML value. If a version is specified, + it replaces the value in the root node's version declaration; if a + standalone setting is specified, it replaces the value in the + root node's standalone declaration. + + + +abc'), + version '1.0', standalone yes); + + xmlroot +---------------------------------------- + + abc +]]> + + + + + <literal>xmlagg</literal> + + + xmlagg + + + +xmlagg ( xml ) xml + + + + The function xmlagg is, unlike the other + functions described here, an aggregate function. It concatenates the + input values to the aggregate function call, + much like xmlconcat does, except that concatenation + occurs across rows rather than across expressions in a single row. + See for additional information + about aggregate functions. + + + + Example: +abc'); +INSERT INTO test VALUES (2, ''); +SELECT xmlagg(x) FROM test; + xmlagg +---------------------- + abc +]]> + + + + To determine the order of the concatenation, an ORDER BY + clause may be added to the aggregate call as described in + . For example: + +abc +]]> + + + + The following non-standard approach used to be recommended + in previous versions, and may still be useful in specific + cases: + +abc +]]> + + + + + + XML Predicates + + + The expressions described in this section check properties + of xml values. + + + + <literal>IS DOCUMENT</literal> + + + IS DOCUMENT + + + +xml IS DOCUMENT boolean + + + + The expression IS DOCUMENT returns true if the + argument XML value is a proper XML document, false if it is not + (that is, it is a content fragment), or null if the argument is + null. See about the difference + between documents and content fragments. + + + + + <literal>IS NOT DOCUMENT</literal> + + + IS NOT DOCUMENT + + + +xml IS NOT DOCUMENT boolean + + + + The expression IS NOT DOCUMENT returns false if the + argument XML value is a proper XML document, true if it is not (that is, + it is a content fragment), or null if the argument is null. + + + + + <literal>XMLEXISTS</literal> + + + XMLEXISTS + + + +XMLEXISTS ( text PASSING BY {REF|VALUE} xml BY {REF|VALUE} ) boolean + + + + The function xmlexists evaluates an XPath 1.0 + expression (the first argument), with the passed XML value as its context + item. The function returns false if the result of that evaluation + yields an empty node-set, true if it yields any other value. The + function returns null if any argument is null. A nonnull value + passed as the context item must be an XML document, not a content + fragment or any non-XML value. + + + + Example: + TorontoOttawa'); + + xmlexists +------------ + t +(1 row) +]]> + + + + The BY REF and BY VALUE clauses + are accepted in PostgreSQL, but are ignored, + as discussed in . + + + + In the SQL standard, the xmlexists function + evaluates an expression in the XML Query language, + but PostgreSQL allows only an XPath 1.0 + expression, as discussed in + . + + + + + <literal>xml_is_well_formed</literal> + + + xml_is_well_formed + + + + xml_is_well_formed_document + + + + xml_is_well_formed_content + + + +xml_is_well_formed ( text ) boolean +xml_is_well_formed_document ( text ) boolean +xml_is_well_formed_content ( text ) boolean + + + + These functions check whether a text string represents + well-formed XML, returning a Boolean result. + xml_is_well_formed_document checks for a well-formed + document, while xml_is_well_formed_content checks + for well-formed content. xml_is_well_formed does + the former if the configuration + parameter is set to DOCUMENT, or the latter if it is set to + CONTENT. This means that + xml_is_well_formed is useful for seeing whether + a simple cast to type xml will succeed, whereas the other two + functions are useful for seeing whether the corresponding variants of + XMLPARSE will succeed. + + + + Examples: + +'); + xml_is_well_formed +-------------------- + f +(1 row) + +SELECT xml_is_well_formed(''); + xml_is_well_formed +-------------------- + t +(1 row) + +SET xmloption TO CONTENT; +SELECT xml_is_well_formed('abc'); + xml_is_well_formed +-------------------- + t +(1 row) + +SELECT xml_is_well_formed_document('bar'); + xml_is_well_formed_document +----------------------------- + t +(1 row) + +SELECT xml_is_well_formed_document('bar'); + xml_is_well_formed_document +----------------------------- + f +(1 row) +]]> + + The last example shows that the checks include whether + namespaces are correctly matched. + + + + + + Processing XML + + + To process values of data type xml, PostgreSQL offers + the functions xpath and + xpath_exists, which evaluate XPath 1.0 + expressions, and the XMLTABLE + table function. + + + + <literal>xpath</literal> + + + XPath + + + +xpath ( xpath text, xml xml , nsarray text[] ) xml[] + + + + The function xpath evaluates the XPath 1.0 + expression xpath (given as text) + against the XML value + xml. It returns an array of XML values + corresponding to the node-set produced by the XPath expression. + If the XPath expression returns a scalar value rather than a node-set, + a single-element array is returned. + + + + The second argument must be a well formed XML document. In particular, + it must have a single root node element. + + + + The optional third argument of the function is an array of namespace + mappings. This array should be a two-dimensional text array with + the length of the second axis being equal to 2 (i.e., it should be an + array of arrays, each of which consists of exactly 2 elements). + The first element of each array entry is the namespace name (alias), the + second the namespace URI. It is not required that aliases provided in + this array be the same as those being used in the XML document itself (in + other words, both in the XML document and in the xpath + function context, aliases are local). + + + + Example: +test', + ARRAY[ARRAY['my', 'http://example.com']]); + + xpath +-------- + {test} +(1 row) +]]> + + + + To deal with default (anonymous) namespaces, do something like this: +test', + ARRAY[ARRAY['mydefns', 'http://example.com']]); + + xpath +-------- + {test} +(1 row) +]]> + + + + + <literal>xpath_exists</literal> + + + xpath_exists + + + +xpath_exists ( xpath text, xml xml , nsarray text[] ) boolean + + + + The function xpath_exists is a specialized form + of the xpath function. Instead of returning the + individual XML values that satisfy the XPath 1.0 expression, this function + returns a Boolean indicating whether the query was satisfied or not + (specifically, whether it produced any value other than an empty node-set). + This function is equivalent to the XMLEXISTS predicate, + except that it also offers support for a namespace mapping argument. + + + + Example: +test', + ARRAY[ARRAY['my', 'http://example.com']]); + + xpath_exists +-------------- + t +(1 row) +]]> + + + + + <literal>xmltable</literal> + + + xmltable + + + + table function + XMLTABLE + + + +XMLTABLE ( + XMLNAMESPACES ( namespace_uri AS namespace_name , ... ), + row_expression PASSING BY {REF|VALUE} document_expression BY {REF|VALUE} + COLUMNS name { type PATH column_expression DEFAULT default_expression NOT NULL | NULL + | FOR ORDINALITY } + , ... +) setof record + + + + The xmltable expression produces a table based + on an XML value, an XPath filter to extract rows, and a + set of column definitions. + Although it syntactically resembles a function, it can only appear + as a table in a query's FROM clause. + + + + The optional XMLNAMESPACES clause gives a + comma-separated list of namespace definitions, where + each namespace_uri is a text + expression and each namespace_name is a simple + identifier. It specifies the XML namespaces used in the document and + their aliases. A default namespace specification is not currently + supported. + + + + The required row_expression argument is an + XPath 1.0 expression (given as text) that is evaluated, + passing the XML value document_expression as + its context item, to obtain a set of XML nodes. These nodes are what + xmltable transforms into output rows. No rows + will be produced if the document_expression + is null, nor if the row_expression produces + an empty node-set or any value other than a node-set. + + + + document_expression provides the context + item for the row_expression. It must be a + well-formed XML document; fragments/forests are not accepted. + The BY REF and BY VALUE clauses + are accepted but ignored, as discussed in + . + + + + In the SQL standard, the xmltable function + evaluates expressions in the XML Query language, + but PostgreSQL allows only XPath 1.0 + expressions, as discussed in + . + + + + The required COLUMNS clause specifies the + column(s) that will be produced in the output table. + See the syntax summary above for the format. + A name is required for each column, as is a data type + (unless FOR ORDINALITY is specified, in which case + type integer is implicit). The path, default and + nullability clauses are optional. + + + + A column marked FOR ORDINALITY will be populated + with row numbers, starting with 1, in the order of nodes retrieved from + the row_expression's result node-set. + At most one column may be marked FOR ORDINALITY. + + + + + XPath 1.0 does not specify an order for nodes in a node-set, so code + that relies on a particular order of the results will be + implementation-dependent. Details can be found in + . + + + + + The column_expression for a column is an + XPath 1.0 expression that is evaluated for each row, with the current + node from the row_expression result as its + context item, to find the value of the column. If + no column_expression is given, then the + column name is used as an implicit path. + + + + If a column's XPath expression returns a non-XML value (which is limited + to string, boolean, or double in XPath 1.0) and the column has a + PostgreSQL type other than xml, the column will be set + as if by assigning the value's string representation to the PostgreSQL + type. (If the value is a boolean, its string representation is taken + to be 1 or 0 if the output + column's type category is numeric, otherwise true or + false.) + + + + If a column's XPath expression returns a non-empty set of XML nodes + and the column's PostgreSQL type is xml, the column will + be assigned the expression result exactly, if it is of document or + content form. + + + A result containing more than one element node at the top level, or + non-whitespace text outside of an element, is an example of content form. + An XPath result can be of neither form, for example if it returns an + attribute node selected from the element that contains it. Such a result + will be put into content form with each such disallowed node replaced by + its string value, as defined for the XPath 1.0 + string function. + + + + + + A non-XML result assigned to an xml output column produces + content, a single text node with the string value of the result. + An XML result assigned to a column of any other type may not have more than + one node, or an error is raised. If there is exactly one node, the column + will be set as if by assigning the node's string + value (as defined for the XPath 1.0 string function) + to the PostgreSQL type. + + + + The string value of an XML element is the concatenation, in document order, + of all text nodes contained in that element and its descendants. The string + value of an element with no descendant text nodes is an + empty string (not NULL). + Any xsi:nil attributes are ignored. + Note that the whitespace-only text() node between two non-text + elements is preserved, and that leading whitespace on a text() + node is not flattened. + The XPath 1.0 string function may be consulted for the + rules defining the string value of other XML node types and non-XML values. + + + + The conversion rules presented here are not exactly those of the SQL + standard, as discussed in . + + + + If the path expression returns an empty node-set + (typically, when it does not match) + for a given row, the column will be set to NULL, unless + a default_expression is specified; then the + value resulting from evaluating that expression is used. + + + + A default_expression, rather than being + evaluated immediately when xmltable is called, + is evaluated each time a default is needed for the column. + If the expression qualifies as stable or immutable, the repeat + evaluation may be skipped. + This means that you can usefully use volatile functions like + nextval in + default_expression. + + + + Columns may be marked NOT NULL. If the + column_expression for a NOT + NULL column does not match anything and there is + no DEFAULT or + the default_expression also evaluates to null, + an error is reported. + + + + Examples: + + + AU + Australia + + + JP + Japan + Shinzo Abe + 145935 + + + SG + Singapore + 697 + + +$$ AS data; + +SELECT xmltable.* + FROM xmldata, + XMLTABLE('//ROWS/ROW' + PASSING data + COLUMNS id int PATH '@id', + ordinality FOR ORDINALITY, + "COUNTRY_NAME" text, + country_id text PATH 'COUNTRY_ID', + size_sq_km float PATH 'SIZE[@unit = "sq_km"]', + size_other text PATH + 'concat(SIZE[@unit!="sq_km"], " ", SIZE[@unit!="sq_km"]/@unit)', + premier_name text PATH 'PREMIER_NAME' DEFAULT 'not specified'); + + id | ordinality | COUNTRY_NAME | country_id | size_sq_km | size_other | premier_name +----+------------+--------------+------------+------------+--------------+--------------- + 1 | 1 | Australia | AU | | | not specified + 5 | 2 | Japan | JP | | 145935 sq_mi | Shinzo Abe + 6 | 3 | Singapore | SG | 697 | | not specified +]]> + + The following example shows concatenation of multiple text() nodes, + usage of the column name as XPath filter, and the treatment of whitespace, + XML comments and processing instructions: + + + Hello2a2 bbbxxxCC + +$$ AS data; + +SELECT xmltable.* + FROM xmlelements, XMLTABLE('/root' PASSING data COLUMNS element text); + element +------------------------- + Hello2a2 bbbxxxCC +]]> + + + + The following example illustrates how + the XMLNAMESPACES clause can be used to specify + a list of namespaces + used in the XML document as well as in the XPath expressions: + + + + + +'::xml) +) +SELECT xmltable.* + FROM XMLTABLE(XMLNAMESPACES('http://example.com/myns' AS x, + 'http://example.com/b' AS "B"), + '/x:example/x:item' + PASSING (SELECT data FROM xmldata) + COLUMNS foo int PATH '@foo', + bar int PATH '@B:bar'); + foo | bar +-----+----- + 1 | 2 + 3 | 4 + 4 | 5 +(3 rows) +]]> + + + + + + Mapping Tables to XML + + + XML export + + + + The following functions map the contents of relational tables to + XML values. They can be thought of as XML export functionality: + +table_to_xml ( table regclass, nulls boolean, + tableforest boolean, targetns text ) xml +query_to_xml ( query text, nulls boolean, + tableforest boolean, targetns text ) xml +cursor_to_xml ( cursor refcursor, count integer, nulls boolean, + tableforest boolean, targetns text ) xml + + + + + table_to_xml maps the content of the named + table, passed as parameter table. The + regclass type accepts strings identifying tables using the + usual notation, including optional schema qualification and + double quotes (see for details). + query_to_xml executes the + query whose text is passed as parameter + query and maps the result set. + cursor_to_xml fetches the indicated number of + rows from the cursor specified by the parameter + cursor. This variant is recommended if + large tables have to be mapped, because the result value is built + up in memory by each function. + + + + If tableforest is false, then the resulting + XML document looks like this: + + + data + data + + + + ... + + + ... + +]]> + + If tableforest is true, the result is an + XML content fragment that looks like this: + + data + data + + + + ... + + +... +]]> + + If no table name is available, that is, when mapping a query or a + cursor, the string table is used in the first + format, row in the second format. + + + + The choice between these formats is up to the user. The first + format is a proper XML document, which will be important in many + applications. The second format tends to be more useful in the + cursor_to_xml function if the result values are to be + reassembled into one document later on. The functions for + producing XML content discussed above, in particular + xmlelement, can be used to alter the results + to taste. + + + + The data values are mapped in the same way as described for the + function xmlelement above. + + + + The parameter nulls determines whether null + values should be included in the output. If true, null values in + columns are represented as: + +]]> + where xsi is the XML namespace prefix for XML + Schema Instance. An appropriate namespace declaration will be + added to the result value. If false, columns containing null + values are simply omitted from the output. + + + + The parameter targetns specifies the + desired XML namespace of the result. If no particular namespace + is wanted, an empty string should be passed. + + + + The following functions return XML Schema documents describing the + mappings performed by the corresponding functions above: + +table_to_xmlschema ( table regclass, nulls boolean, + tableforest boolean, targetns text ) xml +query_to_xmlschema ( query text, nulls boolean, + tableforest boolean, targetns text ) xml +cursor_to_xmlschema ( cursor refcursor, nulls boolean, + tableforest boolean, targetns text ) xml + + It is essential that the same parameters are passed in order to + obtain matching XML data mappings and XML Schema documents. + + + + The following functions produce XML data mappings and the + corresponding XML Schema in one document (or forest), linked + together. They can be useful where self-contained and + self-describing results are wanted: + +table_to_xml_and_xmlschema ( table regclass, nulls boolean, + tableforest boolean, targetns text ) xml +query_to_xml_and_xmlschema ( query text, nulls boolean, + tableforest boolean, targetns text ) xml + + + + + In addition, the following functions are available to produce + analogous mappings of entire schemas or the entire current + database: + +schema_to_xml ( schema name, nulls boolean, + tableforest boolean, targetns text ) xml +schema_to_xmlschema ( schema name, nulls boolean, + tableforest boolean, targetns text ) xml +schema_to_xml_and_xmlschema ( schema name, nulls boolean, + tableforest boolean, targetns text ) xml + +database_to_xml ( nulls boolean, + tableforest boolean, targetns text ) xml +database_to_xmlschema ( nulls boolean, + tableforest boolean, targetns text ) xml +database_to_xml_and_xmlschema ( nulls boolean, + tableforest boolean, targetns text ) xml + + + These functions ignore tables that are not readable by the current user. + The database-wide functions additionally ignore schemas that the current + user does not have USAGE (lookup) privilege for. + + + + Note that these potentially produce a lot of data, which needs to + be built up in memory. When requesting content mappings of large + schemas or databases, it might be worthwhile to consider mapping the + tables separately instead, possibly even through a cursor. + + + + The result of a schema content mapping looks like this: + + + +table1-mapping + +table2-mapping + +... + +]]> + + where the format of a table mapping depends on the + tableforest parameter as explained above. + + + + The result of a database content mapping looks like this: + + + + + ... + + + + ... + + +... + +]]> + + where the schema mapping is as above. + + + + As an example of using the output produced by these functions, + shows an XSLT stylesheet that + converts the output of + table_to_xml_and_xmlschema to an HTML + document containing a tabular rendition of the table data. In a + similar manner, the results from these functions can be + converted into other XML-based formats. + + + + XSLT Stylesheet for Converting SQL/XML Output to HTML + + + + + + + + + + + + + <xsl:value-of select="name(current())"/> + + + + + + + + + + + + + + + + +
+ + +
+ +
+]]>
+
+
+
diff --git a/doc/src/sgml/func/func.sgml b/doc/src/sgml/func/func.sgml new file mode 100644 index 0000000000000..f351ef53f63d4 --- /dev/null +++ b/doc/src/sgml/func/func.sgml @@ -0,0 +1,84 @@ + + + + Functions and Operators + + + function + + + + operator + + + + PostgreSQL provides a large number of + functions and operators for the built-in data types. This chapter + describes most of them, although additional special-purpose functions + appear in relevant sections of the manual. Users can also + define their own functions and operators, as described in + . The + psql commands \df and + \do can be used to list all + available functions and operators, respectively. + + + + The notation used throughout this chapter to describe the argument and + result data types of a function or operator is like this: + +repeat ( text, integer ) text + + which says that the function repeat takes one text and + one integer argument and returns a result of type text. The right arrow + is also used to indicate the result of an example, thus: + +repeat('Pg', 4) PgPgPgPg + + + + + If you are concerned about portability then note that most of + the functions and operators described in this chapter, with the + exception of the most trivial arithmetic and comparison operators + and some explicitly marked functions, are not specified by the + SQL standard. Some of this extended functionality + is present in other SQL database management + systems, and in many cases this functionality is compatible and + consistent between the various implementations. + + + +&func-logical; +&func-comparison; +&func-math; +&func-string; +&func-binarystring; +&func-bitstring; +&func-matching; +&func-formatting; +&func-datetime; +&func-enum; +&func-geometry; +&func-net; +&func-textsearch; +&func-uuid; +&func-xml; +&func-json; +&func-sequence; +&func-conditional; +&func-array; +&func-range; +&func-aggregate; +&func-window; +&func-merge-support; +&func-subquery; +&func-comparisons; +&func-srf; +&func-info; +&func-admin; +&func-trigger; +&func-event-triggers; +&func-statistics; + + From 88f0fdabead51539aa5bdefcf188f07d4651ee10 Mon Sep 17 00:00:00 2001 From: Dean Rasheed Date: Mon, 4 Aug 2025 16:18:59 +0100 Subject: [PATCH 433/602] Fix typo in create_index.sql. Introduced by 578b229718e. Author: Dean Rasheed Reviewed-by: Tender Wang Discussion: https://postgr.es/m/CAEZATCV_CzRSOPMf1gbHQ7xTmyrV6kE7ViCBD6B81WF7GfTAEA@mail.gmail.com Backpatch-through: 13 --- src/test/regress/expected/create_index.out | 4 ++-- src/test/regress/sql/create_index.sql | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/test/regress/expected/create_index.out b/src/test/regress/expected/create_index.out index 9ade7b835e69f..98e68e972be05 100644 --- a/src/test/regress/expected/create_index.out +++ b/src/test/regress/expected/create_index.out @@ -1624,8 +1624,8 @@ DROP TABLE cwi_test; -- CREATE TABLE syscol_table (a INT); -- System columns cannot be indexed -CREATE INDEX ON syscolcol_table (ctid); -ERROR: relation "syscolcol_table" does not exist +CREATE INDEX ON syscol_table (ctid); +ERROR: index creation on system columns is not supported -- nor used in expressions CREATE INDEX ON syscol_table ((ctid >= '(1000,0)')); ERROR: index creation on system columns is not supported diff --git a/src/test/regress/sql/create_index.sql b/src/test/regress/sql/create_index.sql index e21ff426519b0..eabc9623b2061 100644 --- a/src/test/regress/sql/create_index.sql +++ b/src/test/regress/sql/create_index.sql @@ -635,7 +635,7 @@ DROP TABLE cwi_test; CREATE TABLE syscol_table (a INT); -- System columns cannot be indexed -CREATE INDEX ON syscolcol_table (ctid); +CREATE INDEX ON syscol_table (ctid); -- nor used in expressions CREATE INDEX ON syscol_table ((ctid >= '(1000,0)')); From 6551a05d9cf8ea75c0db232b661dadd16e595854 Mon Sep 17 00:00:00 2001 From: Melanie Plageman Date: Mon, 4 Aug 2025 15:07:32 -0400 Subject: [PATCH 434/602] Minor test fixes in 035_standby_logical_decoding.pl Import usleep, which, due to an oversight in oversight in commit 48796a98d5ae was used but not imported. Correct the comparison string used in two logfile checks. Previously, it was incorrect and thus the test could never have failed. Also wordsmith a comment to make it clear when hot_standby_feedback is meant to be on during the test scenarios. Reported-by: Melanie Plageman Author: Bertrand Drouvot Reviewed-by: Masahiko Sawada Discussion: https://postgr.es/m/flat/CAAKRu_YO2mEm%3DZWZKPjTMU%3DgW5Y83_KMi_1cr51JwavH0ctd7w%40mail.gmail.com Backpatch-through: 16 --- src/test/recovery/t/035_standby_logical_decoding.pl | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/test/recovery/t/035_standby_logical_decoding.pl b/src/test/recovery/t/035_standby_logical_decoding.pl index 921813483e37c..c9c182892cf84 100644 --- a/src/test/recovery/t/035_standby_logical_decoding.pl +++ b/src/test/recovery/t/035_standby_logical_decoding.pl @@ -8,6 +8,7 @@ use PostgreSQL::Test::Cluster; use PostgreSQL::Test::Utils; +use Time::HiRes qw(usleep); use Test::More; if ($ENV{enable_injection_points} ne 'yes') @@ -623,7 +624,7 @@ sub wait_until_vacuum_can_remove /ERROR: cannot copy invalidated replication slot "vacuum_full_inactiveslot"/, "invalidated slot cannot be copied"); -# Turn hot_standby_feedback back on +# Set hot_standby_feedback to on change_hot_standby_feedback_and_wait_for_xmins(1, 1); ################################################## @@ -754,12 +755,12 @@ sub wait_until_vacuum_can_remove # message should not be issued ok( !$node_standby->log_contains( - "invalidating obsolete slot \"no_conflict_inactiveslot\"", $logstart), + "invalidating obsolete replication slot \"no_conflict_inactiveslot\"", $logstart), 'inactiveslot slot invalidation is not logged with vacuum on conflict_test' ); ok( !$node_standby->log_contains( - "invalidating obsolete slot \"no_conflict_activeslot\"", $logstart), + "invalidating obsolete replication slot \"no_conflict_activeslot\"", $logstart), 'activeslot slot invalidation is not logged with vacuum on conflict_test' ); From 1469e312977c8a5baeb1f9cb4222d171faf285b3 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Tue, 5 Aug 2025 10:53:49 +0200 Subject: [PATCH 435/602] Fix mixups of FooGetDatum() vs. DatumGetFoo() Some of these were accidentally reversed, but there was no ill effect. Reviewed-by: Tom Lane Discussion: https://www.postgresql.org/message-id/flat/8246d7ff-f4b7-4363-913e-827dadfeb145%40eisentraut.org --- contrib/pageinspect/btreefuncs.c | 2 +- contrib/pageinspect/gistfuncs.c | 4 ++-- src/backend/access/common/printsimple.c | 2 +- src/backend/access/nbtree/nbtcompare.c | 4 ++-- src/backend/access/transam/xlog.c | 6 +++--- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/contrib/pageinspect/btreefuncs.c b/contrib/pageinspect/btreefuncs.c index 294821231fc3b..4e2e8891cddfd 100644 --- a/contrib/pageinspect/btreefuncs.c +++ b/contrib/pageinspect/btreefuncs.c @@ -506,7 +506,7 @@ bt_page_print_tuples(ua_page_items *uargs) j = 0; memset(nulls, 0, sizeof(nulls)); - values[j++] = DatumGetInt16(offset); + values[j++] = Int16GetDatum(offset); values[j++] = ItemPointerGetDatum(&itup->t_tid); values[j++] = Int32GetDatum((int) IndexTupleSize(itup)); values[j++] = BoolGetDatum(IndexTupleHasNulls(itup)); diff --git a/contrib/pageinspect/gistfuncs.c b/contrib/pageinspect/gistfuncs.c index 7b16e2a1ef33c..1b299374890b0 100644 --- a/contrib/pageinspect/gistfuncs.c +++ b/contrib/pageinspect/gistfuncs.c @@ -174,7 +174,7 @@ gist_page_items_bytea(PG_FUNCTION_ARGS) memset(nulls, 0, sizeof(nulls)); - values[0] = DatumGetInt16(offset); + values[0] = Int16GetDatum(offset); values[1] = ItemPointerGetDatum(&itup->t_tid); values[2] = Int32GetDatum((int) IndexTupleSize(itup)); @@ -281,7 +281,7 @@ gist_page_items(PG_FUNCTION_ARGS) memset(nulls, 0, sizeof(nulls)); - values[0] = DatumGetInt16(offset); + values[0] = Int16GetDatum(offset); values[1] = ItemPointerGetDatum(&itup->t_tid); values[2] = Int32GetDatum((int) IndexTupleSize(itup)); values[3] = BoolGetDatum(ItemIdIsDead(id)); diff --git a/src/backend/access/common/printsimple.c b/src/backend/access/common/printsimple.c index f346ab3e8125b..a09c8fcd3323e 100644 --- a/src/backend/access/common/printsimple.c +++ b/src/backend/access/common/printsimple.c @@ -123,7 +123,7 @@ printsimple(TupleTableSlot *slot, DestReceiver *self) case OIDOID: { - Oid num = ObjectIdGetDatum(value); + Oid num = DatumGetObjectId(value); char str[10]; /* 10 digits */ int len; diff --git a/src/backend/access/nbtree/nbtcompare.c b/src/backend/access/nbtree/nbtcompare.c index 4da5a3c1d161d..e1b52acd20dc2 100644 --- a/src/backend/access/nbtree/nbtcompare.c +++ b/src/backend/access/nbtree/nbtcompare.c @@ -555,7 +555,7 @@ btcharcmp(PG_FUNCTION_ARGS) static Datum char_decrement(Relation rel, Datum existing, bool *underflow) { - uint8 cexisting = UInt8GetDatum(existing); + uint8 cexisting = DatumGetUInt8(existing); if (cexisting == 0) { @@ -571,7 +571,7 @@ char_decrement(Relation rel, Datum existing, bool *underflow) static Datum char_increment(Relation rel, Datum existing, bool *overflow) { - uint8 cexisting = UInt8GetDatum(existing); + uint8 cexisting = DatumGetUInt8(existing); if (cexisting == UCHAR_MAX) { diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 5553c20fee8ce..9a4de1616bcc9 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -9011,7 +9011,7 @@ do_pg_backup_start(const char *backupidstr, bool fast, List **tablespaces, * work correctly, it is critical that sessionBackupState is only updated * after this block is over. */ - PG_ENSURE_ERROR_CLEANUP(do_pg_abort_backup, DatumGetBool(true)); + PG_ENSURE_ERROR_CLEANUP(do_pg_abort_backup, BoolGetDatum(true)); { bool gotUniqueStartpoint = false; DIR *tblspcdir; @@ -9250,7 +9250,7 @@ do_pg_backup_start(const char *backupidstr, bool fast, List **tablespaces, state->starttime = (pg_time_t) time(NULL); } - PG_END_ENSURE_ERROR_CLEANUP(do_pg_abort_backup, DatumGetBool(true)); + PG_END_ENSURE_ERROR_CLEANUP(do_pg_abort_backup, BoolGetDatum(true)); state->started_in_recovery = backup_started_in_recovery; @@ -9590,7 +9590,7 @@ register_persistent_abort_backup_handler(void) if (already_done) return; - before_shmem_exit(do_pg_abort_backup, DatumGetBool(false)); + before_shmem_exit(do_pg_abort_backup, BoolGetDatum(false)); already_done = true; } From c9a5860f7a56cc639d6a73519b8b2a00d26d960c Mon Sep 17 00:00:00 2001 From: Amit Kapila Date: Tue, 5 Aug 2025 09:34:22 +0000 Subject: [PATCH 436/602] Throw ERROR when publish_generated_columns is specified without a value. Previously, specifying the publication option 'publish_generated_columns' without an explicit value would incorrectly default to 'stored', which is not the intended behavior. This patch fixes the issue by raising an ERROR when no value is provided for 'publish_generated_columns', ensuring that users must explicitly specify a valid option. Author: Peter Smith Reviewed-by: vignesh C Backpatch-through: 18, where it was introduced Discussion: https://postgr.es/m/CAHut+PsCUCWiEKmB10DxhoPfXbF6jw5RD9ib2LuaQeA_XraW7w@mail.gmail.com --- src/backend/commands/publicationcmds.c | 20 ++++++++++---------- src/test/regress/expected/publication.out | 15 ++++----------- src/test/regress/sql/publication.sql | 7 ++----- 3 files changed, 16 insertions(+), 26 deletions(-) diff --git a/src/backend/commands/publicationcmds.c b/src/backend/commands/publicationcmds.c index 1bf7eaae5b362..803c26ab216dd 100644 --- a/src/backend/commands/publicationcmds.c +++ b/src/backend/commands/publicationcmds.c @@ -2113,20 +2113,20 @@ AlterPublicationOwner_oid(Oid pubid, Oid newOwnerId) static char defGetGeneratedColsOption(DefElem *def) { - char *sval; + char *sval = ""; /* - * If no parameter value given, assume "stored" is meant. + * A parameter value is required. */ - if (!def->arg) - return PUBLISH_GENCOLS_STORED; - - sval = defGetString(def); + if (def->arg) + { + sval = defGetString(def); - if (pg_strcasecmp(sval, "none") == 0) - return PUBLISH_GENCOLS_NONE; - if (pg_strcasecmp(sval, "stored") == 0) - return PUBLISH_GENCOLS_STORED; + if (pg_strcasecmp(sval, "none") == 0) + return PUBLISH_GENCOLS_NONE; + if (pg_strcasecmp(sval, "stored") == 0) + return PUBLISH_GENCOLS_STORED; + } ereport(ERROR, errcode(ERRCODE_SYNTAX_ERROR), diff --git a/src/test/regress/expected/publication.out b/src/test/regress/expected/publication.out index 1ec3fa34a2d5a..53268059142ee 100644 --- a/src/test/regress/expected/publication.out +++ b/src/test/regress/expected/publication.out @@ -36,6 +36,9 @@ LINE 1: ...pub_xxx WITH (publish_generated_columns = stored, publish_ge... CREATE PUBLICATION testpub_xxx WITH (publish_generated_columns = foo); ERROR: invalid value for publication parameter "publish_generated_columns": "foo" DETAIL: Valid values are "none" and "stored". +CREATE PUBLICATION testpub_xxx WITH (publish_generated_columns); +ERROR: invalid value for publication parameter "publish_generated_columns": "" +DETAIL: Valid values are "none" and "stored". \dRp List of publications Name | Owner | All tables | Inserts | Updates | Deletes | Truncates | Generated columns | Via root @@ -1844,8 +1847,7 @@ DROP SCHEMA sch1 cascade; DROP SCHEMA sch2 cascade; -- ====================================================== -- Test the 'publish_generated_columns' parameter with the following values: --- 'stored', 'none', and the default (no value specified), which defaults to --- 'stored'. +-- 'stored', 'none'. SET client_min_messages = 'ERROR'; CREATE PUBLICATION pub1 FOR ALL TABLES WITH (publish_generated_columns = stored); \dRp+ pub1 @@ -1863,17 +1865,8 @@ CREATE PUBLICATION pub2 FOR ALL TABLES WITH (publish_generated_columns = none); regress_publication_user | t | t | t | t | t | none | f (1 row) -CREATE PUBLICATION pub3 FOR ALL TABLES WITH (publish_generated_columns); -\dRp+ pub3 - Publication pub3 - Owner | All tables | Inserts | Updates | Deletes | Truncates | Generated columns | Via root ---------------------------+------------+---------+---------+---------+-----------+-------------------+---------- - regress_publication_user | t | t | t | t | t | stored | f -(1 row) - DROP PUBLICATION pub1; DROP PUBLICATION pub2; -DROP PUBLICATION pub3; -- Test the 'publish_generated_columns' parameter as 'none' and 'stored' for -- different scenarios with/without generated columns in column lists. CREATE TABLE gencols (a int, gen1 int GENERATED ALWAYS AS (a * 2) STORED); diff --git a/src/test/regress/sql/publication.sql b/src/test/regress/sql/publication.sql index 2585f08318150..deddf0da8445f 100644 --- a/src/test/regress/sql/publication.sql +++ b/src/test/regress/sql/publication.sql @@ -26,6 +26,7 @@ CREATE PUBLICATION testpub_xxx WITH (publish = 'cluster, vacuum'); CREATE PUBLICATION testpub_xxx WITH (publish_via_partition_root = 'true', publish_via_partition_root = '0'); CREATE PUBLICATION testpub_xxx WITH (publish_generated_columns = stored, publish_generated_columns = none); CREATE PUBLICATION testpub_xxx WITH (publish_generated_columns = foo); +CREATE PUBLICATION testpub_xxx WITH (publish_generated_columns); \dRp @@ -1183,19 +1184,15 @@ DROP SCHEMA sch2 cascade; -- ====================================================== -- Test the 'publish_generated_columns' parameter with the following values: --- 'stored', 'none', and the default (no value specified), which defaults to --- 'stored'. +-- 'stored', 'none'. SET client_min_messages = 'ERROR'; CREATE PUBLICATION pub1 FOR ALL TABLES WITH (publish_generated_columns = stored); \dRp+ pub1 CREATE PUBLICATION pub2 FOR ALL TABLES WITH (publish_generated_columns = none); \dRp+ pub2 -CREATE PUBLICATION pub3 FOR ALL TABLES WITH (publish_generated_columns); -\dRp+ pub3 DROP PUBLICATION pub1; DROP PUBLICATION pub2; -DROP PUBLICATION pub3; -- Test the 'publish_generated_columns' parameter as 'none' and 'stored' for -- different scenarios with/without generated columns in column lists. From 2ad6e80de9a6300daffcc0987667e45012fbecde Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Tue, 5 Aug 2025 11:38:34 +0200 Subject: [PATCH 437/602] Fix various hash function uses These instances were using Datum-returning functions where a lower-level function returning uint32 would be more appropriate. Reviewed-by: Tom Lane Discussion: https://www.postgresql.org/message-id/flat/8246d7ff-f4b7-4363-913e-827dadfeb145%40eisentraut.org --- contrib/sepgsql/uavc.c | 4 ++-- src/backend/access/common/tupdesc.c | 6 +++--- src/backend/storage/file/fileset.c | 2 +- src/backend/utils/adt/multirangetypes.c | 2 +- src/backend/utils/adt/rangetypes.c | 2 +- src/backend/utils/cache/catcache.c | 2 +- 6 files changed, 9 insertions(+), 9 deletions(-) diff --git a/contrib/sepgsql/uavc.c b/contrib/sepgsql/uavc.c index 65ea8e7946a6e..d9ccbc38bc538 100644 --- a/contrib/sepgsql/uavc.c +++ b/contrib/sepgsql/uavc.c @@ -66,8 +66,8 @@ static char *avc_unlabeled; /* system 'unlabeled' label */ static uint32 sepgsql_avc_hash(const char *scontext, const char *tcontext, uint16 tclass) { - return hash_any((const unsigned char *) scontext, strlen(scontext)) - ^ hash_any((const unsigned char *) tcontext, strlen(tcontext)) + return hash_bytes((const unsigned char *) scontext, strlen(scontext)) + ^ hash_bytes((const unsigned char *) tcontext, strlen(tcontext)) ^ tclass; } diff --git a/src/backend/access/common/tupdesc.c b/src/backend/access/common/tupdesc.c index 020d00cd01ce7..be60005ae4600 100644 --- a/src/backend/access/common/tupdesc.c +++ b/src/backend/access/common/tupdesc.c @@ -815,10 +815,10 @@ hashRowType(TupleDesc desc) uint32 s; int i; - s = hash_combine(0, hash_uint32(desc->natts)); - s = hash_combine(s, hash_uint32(desc->tdtypeid)); + s = hash_combine(0, hash_bytes_uint32(desc->natts)); + s = hash_combine(s, hash_bytes_uint32(desc->tdtypeid)); for (i = 0; i < desc->natts; ++i) - s = hash_combine(s, hash_uint32(TupleDescAttr(desc, i)->atttypid)); + s = hash_combine(s, hash_bytes_uint32(TupleDescAttr(desc, i)->atttypid)); return s; } diff --git a/src/backend/storage/file/fileset.c b/src/backend/storage/file/fileset.c index 64141c7cb91c9..4d5ee353fd7a0 100644 --- a/src/backend/storage/file/fileset.c +++ b/src/backend/storage/file/fileset.c @@ -185,7 +185,7 @@ FileSetPath(char *path, FileSet *fileset, Oid tablespace) static Oid ChooseTablespace(const FileSet *fileset, const char *name) { - uint32 hash = hash_any((const unsigned char *) name, strlen(name)); + uint32 hash = hash_bytes((const unsigned char *) name, strlen(name)); return fileset->tablespaces[hash % fileset->ntablespaces]; } diff --git a/src/backend/utils/adt/multirangetypes.c b/src/backend/utils/adt/multirangetypes.c index cd84ced5b487c..626b5513fe71a 100644 --- a/src/backend/utils/adt/multirangetypes.c +++ b/src/backend/utils/adt/multirangetypes.c @@ -2833,7 +2833,7 @@ hash_multirange(PG_FUNCTION_ARGS) upper_hash = 0; /* Merge hashes of flags and bounds */ - range_hash = hash_uint32((uint32) flags); + range_hash = hash_bytes_uint32((uint32) flags); range_hash ^= lower_hash; range_hash = pg_rotate_left32(range_hash, 1); range_hash ^= upper_hash; diff --git a/src/backend/utils/adt/rangetypes.c b/src/backend/utils/adt/rangetypes.c index 66cc0acf4a712..691679388df71 100644 --- a/src/backend/utils/adt/rangetypes.c +++ b/src/backend/utils/adt/rangetypes.c @@ -1444,7 +1444,7 @@ hash_range(PG_FUNCTION_ARGS) upper_hash = 0; /* Merge hashes of flags and bounds */ - result = hash_uint32((uint32) flags); + result = hash_bytes_uint32((uint32) flags); result ^= lower_hash; result = pg_rotate_left32(result, 1); result ^= upper_hash; diff --git a/src/backend/utils/cache/catcache.c b/src/backend/utils/cache/catcache.c index d1b25214376ed..e2cd3feaf81d3 100644 --- a/src/backend/utils/cache/catcache.c +++ b/src/backend/utils/cache/catcache.c @@ -213,7 +213,7 @@ namehashfast(Datum datum) { char *key = NameStr(*DatumGetName(datum)); - return hash_any((unsigned char *) key, strlen(key)); + return hash_bytes((unsigned char *) key, strlen(key)); } static bool From 0f5ade7a367c16d823c75a81abb10e2ec98b4206 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Tue, 5 Aug 2025 12:11:36 +0200 Subject: [PATCH 438/602] Fix varatt versus Datum type confusions Macros like VARDATA() and VARSIZE() should be thought of as taking values of type pointer to struct varlena or some other related struct. The way they are implemented, you can pass anything to it and it will cast it right. But this is in principle incorrect. To fix, add the required DatumGetPointer() calls. Or in a couple of cases, remove superfluous PointerGetDatum() calls. It is planned in a subsequent patch to change macros like VARDATA() and VARSIZE() to inline functions, which will enforce stricter typing. This is in preparation for that. Reviewed-by: Tom Lane Discussion: https://www.postgresql.org/message-id/flat/928ea48f-77c6-417b-897c-621ef16685a6%40eisentraut.org --- contrib/hstore/hstore_gin.c | 2 +- contrib/hstore/hstore_gist.c | 4 ++-- contrib/hstore/hstore_io.c | 24 ++++++++++----------- contrib/hstore/hstore_op.c | 4 ++-- contrib/test_decoding/test_decoding.c | 2 +- src/backend/access/brin/brin_minmax_multi.c | 2 +- src/backend/access/common/heaptuple.c | 8 +++---- src/backend/access/common/reloptions.c | 8 +++---- src/backend/access/common/toast_internals.c | 2 +- src/backend/access/gin/gininsert.c | 2 +- src/backend/access/spgist/spgutils.c | 4 ++-- src/backend/access/table/toast_helper.c | 2 +- src/backend/replication/logical/proto.c | 2 +- src/backend/replication/pgoutput/pgoutput.c | 4 ++-- src/backend/statistics/mcv.c | 2 +- src/backend/tsearch/ts_selfuncs.c | 2 +- src/backend/utils/adt/jsonb_gin.c | 4 ++-- src/backend/utils/adt/jsonb_op.c | 8 +++---- src/backend/utils/adt/jsonfuncs.c | 4 ++-- src/backend/utils/adt/jsonpath_exec.c | 4 ++-- src/backend/utils/adt/multirangetypes.c | 7 +++--- src/backend/utils/adt/rangetypes.c | 6 ++---- src/backend/utils/adt/tsvector_op.c | 24 ++++++++++----------- 23 files changed, 65 insertions(+), 66 deletions(-) diff --git a/contrib/hstore/hstore_gin.c b/contrib/hstore/hstore_gin.c index 766c00bb6a735..2e5fa115924ba 100644 --- a/contrib/hstore/hstore_gin.c +++ b/contrib/hstore/hstore_gin.c @@ -127,7 +127,7 @@ gin_extract_hstore_query(PG_FUNCTION_ARGS) /* Nulls in the array are ignored, cf hstoreArrayToPairs */ if (key_nulls[i]) continue; - item = makeitem(VARDATA(key_datums[i]), VARSIZE(key_datums[i]) - VARHDRSZ, KEYFLAG); + item = makeitem(VARDATA(DatumGetPointer(key_datums[i])), VARSIZE(DatumGetPointer(key_datums[i])) - VARHDRSZ, KEYFLAG); entries[j++] = PointerGetDatum(item); } diff --git a/contrib/hstore/hstore_gist.c b/contrib/hstore/hstore_gist.c index a3b08af385016..69515dc3d3fbd 100644 --- a/contrib/hstore/hstore_gist.c +++ b/contrib/hstore/hstore_gist.c @@ -576,7 +576,7 @@ ghstore_consistent(PG_FUNCTION_ARGS) if (key_nulls[i]) continue; - crc = crc32_sz(VARDATA(key_datums[i]), VARSIZE(key_datums[i]) - VARHDRSZ); + crc = crc32_sz(VARDATA(DatumGetPointer(key_datums[i])), VARSIZE(DatumGetPointer(key_datums[i])) - VARHDRSZ); if (!(GETBIT(sign, HASHVAL(crc, siglen)))) res = false; } @@ -599,7 +599,7 @@ ghstore_consistent(PG_FUNCTION_ARGS) if (key_nulls[i]) continue; - crc = crc32_sz(VARDATA(key_datums[i]), VARSIZE(key_datums[i]) - VARHDRSZ); + crc = crc32_sz(VARDATA(DatumGetPointer(key_datums[i])), VARSIZE(DatumGetPointer(key_datums[i])) - VARHDRSZ); if (GETBIT(sign, HASHVAL(crc, siglen))) res = true; } diff --git a/contrib/hstore/hstore_io.c b/contrib/hstore/hstore_io.c index 4f867e4bd1f1c..9c53877c4a582 100644 --- a/contrib/hstore/hstore_io.c +++ b/contrib/hstore/hstore_io.c @@ -684,22 +684,22 @@ hstore_from_arrays(PG_FUNCTION_ARGS) if (!value_nulls || value_nulls[i]) { - pairs[i].key = VARDATA(key_datums[i]); + pairs[i].key = VARDATA(DatumGetPointer(key_datums[i])); pairs[i].val = NULL; pairs[i].keylen = - hstoreCheckKeyLen(VARSIZE(key_datums[i]) - VARHDRSZ); + hstoreCheckKeyLen(VARSIZE(DatumGetPointer(key_datums[i])) - VARHDRSZ); pairs[i].vallen = 4; pairs[i].isnull = true; pairs[i].needfree = false; } else { - pairs[i].key = VARDATA(key_datums[i]); - pairs[i].val = VARDATA(value_datums[i]); + pairs[i].key = VARDATA(DatumGetPointer(key_datums[i])); + pairs[i].val = VARDATA(DatumGetPointer(value_datums[i])); pairs[i].keylen = - hstoreCheckKeyLen(VARSIZE(key_datums[i]) - VARHDRSZ); + hstoreCheckKeyLen(VARSIZE(DatumGetPointer(key_datums[i])) - VARHDRSZ); pairs[i].vallen = - hstoreCheckValLen(VARSIZE(value_datums[i]) - VARHDRSZ); + hstoreCheckValLen(VARSIZE(DatumGetPointer(value_datums[i])) - VARHDRSZ); pairs[i].isnull = false; pairs[i].needfree = false; } @@ -778,22 +778,22 @@ hstore_from_array(PG_FUNCTION_ARGS) if (in_nulls[i * 2 + 1]) { - pairs[i].key = VARDATA(in_datums[i * 2]); + pairs[i].key = VARDATA(DatumGetPointer(in_datums[i * 2])); pairs[i].val = NULL; pairs[i].keylen = - hstoreCheckKeyLen(VARSIZE(in_datums[i * 2]) - VARHDRSZ); + hstoreCheckKeyLen(VARSIZE(DatumGetPointer(in_datums[i * 2])) - VARHDRSZ); pairs[i].vallen = 4; pairs[i].isnull = true; pairs[i].needfree = false; } else { - pairs[i].key = VARDATA(in_datums[i * 2]); - pairs[i].val = VARDATA(in_datums[i * 2 + 1]); + pairs[i].key = VARDATA(DatumGetPointer(in_datums[i * 2])); + pairs[i].val = VARDATA(DatumGetPointer(in_datums[i * 2 + 1])); pairs[i].keylen = - hstoreCheckKeyLen(VARSIZE(in_datums[i * 2]) - VARHDRSZ); + hstoreCheckKeyLen(VARSIZE(DatumGetPointer(in_datums[i * 2])) - VARHDRSZ); pairs[i].vallen = - hstoreCheckValLen(VARSIZE(in_datums[i * 2 + 1]) - VARHDRSZ); + hstoreCheckValLen(VARSIZE(DatumGetPointer(in_datums[i * 2 + 1])) - VARHDRSZ); pairs[i].isnull = false; pairs[i].needfree = false; } diff --git a/contrib/hstore/hstore_op.c b/contrib/hstore/hstore_op.c index 5e57eceffc817..bcba75f925808 100644 --- a/contrib/hstore/hstore_op.c +++ b/contrib/hstore/hstore_op.c @@ -107,8 +107,8 @@ hstoreArrayToPairs(ArrayType *a, int *npairs) { if (!key_nulls[i]) { - key_pairs[j].key = VARDATA(key_datums[i]); - key_pairs[j].keylen = VARSIZE(key_datums[i]) - VARHDRSZ; + key_pairs[j].key = VARDATA(DatumGetPointer(key_datums[i])); + key_pairs[j].keylen = VARSIZE(DatumGetPointer(key_datums[i])) - VARHDRSZ; key_pairs[j].val = NULL; key_pairs[j].vallen = 0; key_pairs[j].needfree = 0; diff --git a/contrib/test_decoding/test_decoding.c b/contrib/test_decoding/test_decoding.c index bb495563200c3..f671a7d4b3125 100644 --- a/contrib/test_decoding/test_decoding.c +++ b/contrib/test_decoding/test_decoding.c @@ -581,7 +581,7 @@ tuple_to_stringinfo(StringInfo s, TupleDesc tupdesc, HeapTuple tuple, bool skip_ /* print data */ if (isnull) appendStringInfoString(s, "null"); - else if (typisvarlena && VARATT_IS_EXTERNAL_ONDISK(origval)) + else if (typisvarlena && VARATT_IS_EXTERNAL_ONDISK(DatumGetPointer(origval))) appendStringInfoString(s, "unchanged-toast-datum"); else if (!typisvarlena) print_literal(s, typid, diff --git a/src/backend/access/brin/brin_minmax_multi.c b/src/backend/access/brin/brin_minmax_multi.c index 0d1507a2a3624..b85a70a0db28e 100644 --- a/src/backend/access/brin/brin_minmax_multi.c +++ b/src/backend/access/brin/brin_minmax_multi.c @@ -624,7 +624,7 @@ brin_range_serialize(Ranges *range) for (i = 0; i < nvalues; i++) { - len += VARSIZE_ANY(range->values[i]); + len += VARSIZE_ANY(DatumGetPointer(range->values[i])); } } else if (typlen == -2) /* cstring */ diff --git a/src/backend/access/common/heaptuple.c b/src/backend/access/common/heaptuple.c index 969d1028cae89..a410b5eb99b99 100644 --- a/src/backend/access/common/heaptuple.c +++ b/src/backend/access/common/heaptuple.c @@ -189,7 +189,7 @@ getmissingattr(TupleDesc tupleDesc, if (att->attlen > 0) key.len = att->attlen; else - key.len = VARSIZE_ANY(attrmiss->am_value); + key.len = VARSIZE_ANY(DatumGetPointer(attrmiss->am_value)); key.value = attrmiss->am_value; entry = hash_search(missing_cache, &key, HASH_ENTER, &found); @@ -901,9 +901,9 @@ expand_tuple(HeapTuple *targetHeapTuple, att->attlen, attrmiss[attnum].am_value); - targetDataLen = att_addlength_pointer(targetDataLen, - att->attlen, - attrmiss[attnum].am_value); + targetDataLen = att_addlength_datum(targetDataLen, + att->attlen, + attrmiss[attnum].am_value); } else { diff --git a/src/backend/access/common/reloptions.c b/src/backend/access/common/reloptions.c index 50747c1639612..594a657ea1a78 100644 --- a/src/backend/access/common/reloptions.c +++ b/src/backend/access/common/reloptions.c @@ -1190,8 +1190,8 @@ transformRelOptions(Datum oldOptions, List *defList, const char *namspace, for (i = 0; i < noldoptions; i++) { - char *text_str = VARDATA(oldoptions[i]); - int text_len = VARSIZE(oldoptions[i]) - VARHDRSZ; + char *text_str = VARDATA(DatumGetPointer(oldoptions[i])); + int text_len = VARSIZE(DatumGetPointer(oldoptions[i])) - VARHDRSZ; /* Search for a match in defList */ foreach(cell, defList) @@ -1456,8 +1456,8 @@ parseRelOptionsInternal(Datum options, bool validate, for (i = 0; i < noptions; i++) { - char *text_str = VARDATA(optiondatums[i]); - int text_len = VARSIZE(optiondatums[i]) - VARHDRSZ; + char *text_str = VARDATA(DatumGetPointer(optiondatums[i])); + int text_len = VARSIZE(DatumGetPointer(optiondatums[i])) - VARHDRSZ; int j; /* Search for a match in reloptions */ diff --git a/src/backend/access/common/toast_internals.c b/src/backend/access/common/toast_internals.c index 7d8be8346ce52..196e06115e936 100644 --- a/src/backend/access/common/toast_internals.c +++ b/src/backend/access/common/toast_internals.c @@ -144,7 +144,7 @@ toast_save_datum(Relation rel, Datum value, int num_indexes; int validIndex; - Assert(!VARATT_IS_EXTERNAL(value)); + Assert(!VARATT_IS_EXTERNAL(dval)); /* * Open the toast relation and its indexes. We can use the index to check diff --git a/src/backend/access/gin/gininsert.c b/src/backend/access/gin/gininsert.c index a65acd8910493..47b1898a06463 100644 --- a/src/backend/access/gin/gininsert.c +++ b/src/backend/access/gin/gininsert.c @@ -2233,7 +2233,7 @@ _gin_build_tuple(OffsetNumber attrnum, unsigned char category, else if (typlen > 0) keylen = typlen; else if (typlen == -1) - keylen = VARSIZE_ANY(key); + keylen = VARSIZE_ANY(DatumGetPointer(key)); else if (typlen == -2) keylen = strlen(DatumGetPointer(key)) + 1; else diff --git a/src/backend/access/spgist/spgutils.c b/src/backend/access/spgist/spgutils.c index 95fea74e296f8..9b86c016acb37 100644 --- a/src/backend/access/spgist/spgutils.c +++ b/src/backend/access/spgist/spgutils.c @@ -785,7 +785,7 @@ SpGistGetInnerTypeSize(SpGistTypeDesc *att, Datum datum) else if (att->attlen > 0) size = att->attlen; else - size = VARSIZE_ANY(datum); + size = VARSIZE_ANY(DatumGetPointer(datum)); return MAXALIGN(size); } @@ -804,7 +804,7 @@ memcpyInnerDatum(void *target, SpGistTypeDesc *att, Datum datum) } else { - size = (att->attlen > 0) ? att->attlen : VARSIZE_ANY(datum); + size = (att->attlen > 0) ? att->attlen : VARSIZE_ANY(DatumGetPointer(datum)); memcpy(target, DatumGetPointer(datum), size); } } diff --git a/src/backend/access/table/toast_helper.c b/src/backend/access/table/toast_helper.c index b60fab0a4d294..11f97d65367d5 100644 --- a/src/backend/access/table/toast_helper.c +++ b/src/backend/access/table/toast_helper.c @@ -330,7 +330,7 @@ toast_delete_external(Relation rel, const Datum *values, const bool *isnull, if (isnull[i]) continue; - else if (VARATT_IS_EXTERNAL_ONDISK(value)) + else if (VARATT_IS_EXTERNAL_ONDISK(DatumGetPointer(value))) toast_delete_datum(rel, value, is_speculative); } } diff --git a/src/backend/replication/logical/proto.c b/src/backend/replication/logical/proto.c index 1a352b542dc56..1b3d9eb49dd70 100644 --- a/src/backend/replication/logical/proto.c +++ b/src/backend/replication/logical/proto.c @@ -809,7 +809,7 @@ logicalrep_write_tuple(StringInfo out, Relation rel, TupleTableSlot *slot, continue; } - if (att->attlen == -1 && VARATT_IS_EXTERNAL_ONDISK(values[i])) + if (att->attlen == -1 && VARATT_IS_EXTERNAL_ONDISK(DatumGetPointer(values[i]))) { /* * Unchanged toasted datum. (Note that we don't promise to detect diff --git a/src/backend/replication/pgoutput/pgoutput.c b/src/backend/replication/pgoutput/pgoutput.c index f4c977262c5a4..80540c017bd3a 100644 --- a/src/backend/replication/pgoutput/pgoutput.c +++ b/src/backend/replication/pgoutput/pgoutput.c @@ -1374,8 +1374,8 @@ pgoutput_row_filter(Relation relation, TupleTableSlot *old_slot, * VARTAG_INDIRECT. See ReorderBufferToastReplace. */ if (att->attlen == -1 && - VARATT_IS_EXTERNAL_ONDISK(new_slot->tts_values[i]) && - !VARATT_IS_EXTERNAL_ONDISK(old_slot->tts_values[i])) + VARATT_IS_EXTERNAL_ONDISK(DatumGetPointer(new_slot->tts_values[i])) && + !VARATT_IS_EXTERNAL_ONDISK(DatumGetPointer(old_slot->tts_values[i]))) { if (!tmp_new_slot) { diff --git a/src/backend/statistics/mcv.c b/src/backend/statistics/mcv.c index d98cda698d941..f59fb82154370 100644 --- a/src/backend/statistics/mcv.c +++ b/src/backend/statistics/mcv.c @@ -767,7 +767,7 @@ statext_mcv_serialize(MCVList *mcvlist, VacAttrStats **stats) values[dim][i] = PointerGetDatum(PG_DETOAST_DATUM(values[dim][i])); /* serialized length (uint32 length + data) */ - len = VARSIZE_ANY_EXHDR(values[dim][i]); + len = VARSIZE_ANY_EXHDR(DatumGetPointer(values[dim][i])); info[dim].nbytes += sizeof(uint32); /* length */ info[dim].nbytes += len; /* value (no header) */ diff --git a/src/backend/tsearch/ts_selfuncs.c b/src/backend/tsearch/ts_selfuncs.c index 0c1d2bc1109da..453a5e5c2ea06 100644 --- a/src/backend/tsearch/ts_selfuncs.c +++ b/src/backend/tsearch/ts_selfuncs.c @@ -233,7 +233,7 @@ mcelem_tsquery_selec(TSQuery query, Datum *mcelem, int nmcelem, * The text Datums came from an array, so it cannot be compressed or * stored out-of-line -- it's safe to use VARSIZE_ANY*. */ - Assert(!VARATT_IS_COMPRESSED(mcelem[i]) && !VARATT_IS_EXTERNAL(mcelem[i])); + Assert(!VARATT_IS_COMPRESSED(DatumGetPointer(mcelem[i])) && !VARATT_IS_EXTERNAL(DatumGetPointer(mcelem[i]))); lookup[i].element = (text *) DatumGetPointer(mcelem[i]); lookup[i].frequency = numbers[i]; } diff --git a/src/backend/utils/adt/jsonb_gin.c b/src/backend/utils/adt/jsonb_gin.c index c1950792b5aea..9b56248cf0bee 100644 --- a/src/backend/utils/adt/jsonb_gin.c +++ b/src/backend/utils/adt/jsonb_gin.c @@ -896,8 +896,8 @@ gin_extract_jsonb_query(PG_FUNCTION_ARGS) continue; /* We rely on the array elements not being toasted */ entries[j++] = make_text_key(JGINFLAG_KEY, - VARDATA_ANY(key_datums[i]), - VARSIZE_ANY_EXHDR(key_datums[i])); + VARDATA_ANY(DatumGetPointer(key_datums[i])), + VARSIZE_ANY_EXHDR(DatumGetPointer(key_datums[i]))); } *nentries = j; diff --git a/src/backend/utils/adt/jsonb_op.c b/src/backend/utils/adt/jsonb_op.c index fa5603f26e1d6..51d38e321fb2f 100644 --- a/src/backend/utils/adt/jsonb_op.c +++ b/src/backend/utils/adt/jsonb_op.c @@ -63,8 +63,8 @@ jsonb_exists_any(PG_FUNCTION_ARGS) strVal.type = jbvString; /* We rely on the array elements not being toasted */ - strVal.val.string.val = VARDATA_ANY(key_datums[i]); - strVal.val.string.len = VARSIZE_ANY_EXHDR(key_datums[i]); + strVal.val.string.val = VARDATA_ANY(DatumGetPointer(key_datums[i])); + strVal.val.string.len = VARSIZE_ANY_EXHDR(DatumGetPointer(key_datums[i])); if (findJsonbValueFromContainer(&jb->root, JB_FOBJECT | JB_FARRAY, @@ -96,8 +96,8 @@ jsonb_exists_all(PG_FUNCTION_ARGS) strVal.type = jbvString; /* We rely on the array elements not being toasted */ - strVal.val.string.val = VARDATA_ANY(key_datums[i]); - strVal.val.string.len = VARSIZE_ANY_EXHDR(key_datums[i]); + strVal.val.string.val = VARDATA_ANY(DatumGetPointer(key_datums[i])); + strVal.val.string.len = VARSIZE_ANY_EXHDR(DatumGetPointer(key_datums[i])); if (findJsonbValueFromContainer(&jb->root, JB_FOBJECT | JB_FARRAY, diff --git a/src/backend/utils/adt/jsonfuncs.c b/src/backend/utils/adt/jsonfuncs.c index bcb1720b6cde2..370456408bfba 100644 --- a/src/backend/utils/adt/jsonfuncs.c +++ b/src/backend/utils/adt/jsonfuncs.c @@ -4766,8 +4766,8 @@ jsonb_delete_array(PG_FUNCTION_ARGS) continue; /* We rely on the array elements not being toasted */ - keyptr = VARDATA_ANY(keys_elems[i]); - keylen = VARSIZE_ANY_EXHDR(keys_elems[i]); + keyptr = VARDATA_ANY(DatumGetPointer(keys_elems[i])); + keylen = VARSIZE_ANY_EXHDR(DatumGetPointer(keys_elems[i])); if (keylen == v.val.string.len && memcmp(keyptr, v.val.string.val, keylen) == 0) { diff --git a/src/backend/utils/adt/jsonpath_exec.c b/src/backend/utils/adt/jsonpath_exec.c index dbab24737ef1f..407041b14a177 100644 --- a/src/backend/utils/adt/jsonpath_exec.c +++ b/src/backend/utils/adt/jsonpath_exec.c @@ -3074,8 +3074,8 @@ JsonItemFromDatum(Datum val, Oid typid, int32 typmod, JsonbValue *res) case TEXTOID: case VARCHAROID: res->type = jbvString; - res->val.string.val = VARDATA_ANY(val); - res->val.string.len = VARSIZE_ANY_EXHDR(val); + res->val.string.val = VARDATA_ANY(DatumGetPointer(val)); + res->val.string.len = VARSIZE_ANY_EXHDR(DatumGetPointer(val)); break; case DATEOID: case TIMEOID: diff --git a/src/backend/utils/adt/multirangetypes.c b/src/backend/utils/adt/multirangetypes.c index 626b5513fe71a..46f2ec0c29fbd 100644 --- a/src/backend/utils/adt/multirangetypes.c +++ b/src/backend/utils/adt/multirangetypes.c @@ -394,12 +394,13 @@ multirange_send(PG_FUNCTION_ARGS) for (int i = 0; i < range_count; i++) { Datum range; + bytea *outputbytes; range = RangeTypePGetDatum(ranges[i]); - range = PointerGetDatum(SendFunctionCall(&cache->typioproc, range)); + outputbytes = SendFunctionCall(&cache->typioproc, range); - pq_sendint32(buf, VARSIZE(range) - VARHDRSZ); - pq_sendbytes(buf, VARDATA(range), VARSIZE(range) - VARHDRSZ); + pq_sendint32(buf, VARSIZE(outputbytes) - VARHDRSZ); + pq_sendbytes(buf, VARDATA(outputbytes), VARSIZE(outputbytes) - VARHDRSZ); } PG_RETURN_BYTEA_P(pq_endtypsend(buf)); diff --git a/src/backend/utils/adt/rangetypes.c b/src/backend/utils/adt/rangetypes.c index 691679388df71..c83b239b3bb28 100644 --- a/src/backend/utils/adt/rangetypes.c +++ b/src/backend/utils/adt/rangetypes.c @@ -285,8 +285,7 @@ range_send(PG_FUNCTION_ARGS) if (RANGE_HAS_LBOUND(flags)) { - Datum bound = PointerGetDatum(SendFunctionCall(&cache->typioproc, - lower.val)); + bytea *bound = SendFunctionCall(&cache->typioproc, lower.val); uint32 bound_len = VARSIZE(bound) - VARHDRSZ; char *bound_data = VARDATA(bound); @@ -296,8 +295,7 @@ range_send(PG_FUNCTION_ARGS) if (RANGE_HAS_UBOUND(flags)) { - Datum bound = PointerGetDatum(SendFunctionCall(&cache->typioproc, - upper.val)); + bytea *bound = SendFunctionCall(&cache->typioproc, upper.val); uint32 bound_len = VARSIZE(bound) - VARHDRSZ; char *bound_data = VARDATA(bound); diff --git a/src/backend/utils/adt/tsvector_op.c b/src/backend/utils/adt/tsvector_op.c index 1fa1275ca63b2..0625da9532f6c 100644 --- a/src/backend/utils/adt/tsvector_op.c +++ b/src/backend/utils/adt/tsvector_op.c @@ -329,8 +329,8 @@ tsvector_setweight_by_filter(PG_FUNCTION_ARGS) if (nulls[i]) continue; - lex = VARDATA(dlexemes[i]); - lex_len = VARSIZE(dlexemes[i]) - VARHDRSZ; + lex = VARDATA(DatumGetPointer(dlexemes[i])); + lex_len = VARSIZE(DatumGetPointer(dlexemes[i])) - VARHDRSZ; lex_pos = tsvector_bsearch(tsout, lex, lex_len); if (lex_pos >= 0 && (j = POSDATALEN(tsout, entry + lex_pos)) != 0) @@ -443,10 +443,10 @@ compare_text_lexemes(const void *va, const void *vb) { Datum a = *((const Datum *) va); Datum b = *((const Datum *) vb); - char *alex = VARDATA_ANY(a); - int alex_len = VARSIZE_ANY_EXHDR(a); - char *blex = VARDATA_ANY(b); - int blex_len = VARSIZE_ANY_EXHDR(b); + char *alex = VARDATA_ANY(DatumGetPointer(a)); + int alex_len = VARSIZE_ANY_EXHDR(DatumGetPointer(a)); + char *blex = VARDATA_ANY(DatumGetPointer(b)); + int blex_len = VARSIZE_ANY_EXHDR(DatumGetPointer(b)); return tsCompareString(alex, alex_len, blex, blex_len, false); } @@ -605,8 +605,8 @@ tsvector_delete_arr(PG_FUNCTION_ARGS) if (nulls[i]) continue; - lex = VARDATA(dlexemes[i]); - lex_len = VARSIZE(dlexemes[i]) - VARHDRSZ; + lex = VARDATA(DatumGetPointer(dlexemes[i])); + lex_len = VARSIZE(DatumGetPointer(dlexemes[i])) - VARHDRSZ; lex_pos = tsvector_bsearch(tsin, lex, lex_len); if (lex_pos >= 0) @@ -770,7 +770,7 @@ array_to_tsvector(PG_FUNCTION_ARGS) (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), errmsg("lexeme array may not contain nulls"))); - if (VARSIZE(dlexemes[i]) - VARHDRSZ == 0) + if (VARSIZE(DatumGetPointer(dlexemes[i])) - VARHDRSZ == 0) ereport(ERROR, (errcode(ERRCODE_ZERO_LENGTH_CHARACTER_STRING), errmsg("lexeme array may not contain empty strings"))); @@ -786,7 +786,7 @@ array_to_tsvector(PG_FUNCTION_ARGS) /* Calculate space needed for surviving lexemes. */ for (i = 0; i < nitems; i++) - datalen += VARSIZE(dlexemes[i]) - VARHDRSZ; + datalen += VARSIZE(DatumGetPointer(dlexemes[i])) - VARHDRSZ; tslen = CALCDATASIZE(nitems, datalen); /* Allocate and fill tsvector. */ @@ -798,8 +798,8 @@ array_to_tsvector(PG_FUNCTION_ARGS) cur = STRPTR(tsout); for (i = 0; i < nitems; i++) { - char *lex = VARDATA(dlexemes[i]); - int lex_len = VARSIZE(dlexemes[i]) - VARHDRSZ; + char *lex = VARDATA(DatumGetPointer(dlexemes[i])); + int lex_len = VARSIZE(DatumGetPointer(dlexemes[i])) - VARHDRSZ; memcpy(cur, lex, lex_len); arrout[i].haspos = 0; From e035863c9a04beeecc254c3bfe48dab58e389e10 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Tue, 5 Aug 2025 17:01:25 +0200 Subject: [PATCH 439/602] Convert varatt.h access macros to static inline functions. We've only bothered converting the external interfaces, not the endian-dependent internal macros (which should not be used by any callers other than the interface functions in this header, anyway). The VARTAG_1B_E() changes are required for C++ compatibility. Author: Peter Eisentraut Reviewed-by: Tom Lane Discussion: https://postgr.es/m/928ea48f-77c6-417b-897c-621ef16685a6@eisentraut.org --- doc/src/sgml/xfunc.sgml | 2 +- src/include/varatt.h | 336 +++++++++++++++++++++++++++++++--------- 2 files changed, 261 insertions(+), 77 deletions(-) diff --git a/doc/src/sgml/xfunc.sgml b/doc/src/sgml/xfunc.sgml index 2d81afce8cb9b..30219f432d970 100644 --- a/doc/src/sgml/xfunc.sgml +++ b/doc/src/sgml/xfunc.sgml @@ -2165,7 +2165,7 @@ memcpy(destination->data, buffer, 40); it's considered good style to use the macro VARHDRSZ to refer to the size of the overhead for a variable-length type. Also, the length field must be set using the - SET_VARSIZE macro, not by simple assignment. + SET_VARSIZE function, not by simple assignment. diff --git a/src/include/varatt.h b/src/include/varatt.h index 2e8564d49980b..aeeabf9145b59 100644 --- a/src/include/varatt.h +++ b/src/include/varatt.h @@ -89,20 +89,35 @@ typedef enum vartag_external VARTAG_ONDISK = 18 } vartag_external; +/* Is a TOAST pointer either type of expanded-object pointer? */ /* this test relies on the specific tag values above */ -#define VARTAG_IS_EXPANDED(tag) \ - (((tag) & ~1) == VARTAG_EXPANDED_RO) +static inline bool +VARTAG_IS_EXPANDED(vartag_external tag) +{ + return ((tag & ~1) == VARTAG_EXPANDED_RO); +} -#define VARTAG_SIZE(tag) \ - ((tag) == VARTAG_INDIRECT ? sizeof(varatt_indirect) : \ - VARTAG_IS_EXPANDED(tag) ? sizeof(varatt_expanded) : \ - (tag) == VARTAG_ONDISK ? sizeof(varatt_external) : \ - (AssertMacro(false), 0)) +/* Size of the data part of a "TOAST pointer" datum */ +static inline Size +VARTAG_SIZE(vartag_external tag) +{ + if (tag == VARTAG_INDIRECT) + return sizeof(varatt_indirect); + else if (VARTAG_IS_EXPANDED(tag)) + return sizeof(varatt_expanded); + else if (tag == VARTAG_ONDISK) + return sizeof(varatt_external); + else + { + Assert(false); + return 0; + } +} /* * These structs describe the header of a varlena object that may have been * TOASTed. Generally, don't reference these structs directly, but use the - * macros below. + * functions and macros below. * * We use separate structs for the aligned and unaligned cases because the * compiler might otherwise think it could generate code that assumes @@ -166,7 +181,9 @@ typedef struct /* * Endian-dependent macros. These are considered internal --- use the - * external macros below instead of using these directly. + * external functions below instead of using these directly. All of these + * expect an argument that is a pointer, not a Datum. Some of them have + * multiple-evaluation hazards, too. * * Note: IS_1B is true for external toast records but VARSIZE_1B will return 0 * for such records. Hence you should usually check for IS_EXTERNAL before @@ -194,7 +211,7 @@ typedef struct #define VARSIZE_1B(PTR) \ (((varattrib_1b *) (PTR))->va_header & 0x7F) #define VARTAG_1B_E(PTR) \ - (((varattrib_1b_e *) (PTR))->va_tag) + ((vartag_external) ((varattrib_1b_e *) (PTR))->va_tag) #define SET_VARSIZE_4B(PTR,len) \ (((varattrib_4b *) (PTR))->va_4byte.va_header = (len) & 0x3FFFFFFF) @@ -227,7 +244,7 @@ typedef struct #define VARSIZE_1B(PTR) \ ((((varattrib_1b *) (PTR))->va_header >> 1) & 0x7F) #define VARTAG_1B_E(PTR) \ - (((varattrib_1b_e *) (PTR))->va_tag) + ((vartag_external) ((varattrib_1b_e *) (PTR))->va_tag) #define SET_VARSIZE_4B(PTR,len) \ (((varattrib_4b *) (PTR))->va_4byte.va_header = (((uint32) (len)) << 2)) @@ -247,19 +264,19 @@ typedef struct #define VARDATA_1B_E(PTR) (((varattrib_1b_e *) (PTR))->va_data) /* - * Externally visible TOAST macros begin here. + * Externally visible TOAST functions and macros begin here. All of these + * were originally macros, accounting for the upper-case naming. + * + * Most of these functions accept a pointer to a value of a toastable data + * type. The caller's variable might be declared "text *" or the like, + * so we use "void *" here. Callers that are working with a Datum variable + * must apply DatumGetPointer before calling these functions. */ #define VARHDRSZ_EXTERNAL offsetof(varattrib_1b_e, va_data) #define VARHDRSZ_COMPRESSED offsetof(varattrib_4b, va_compressed.va_data) #define VARHDRSZ_SHORT offsetof(varattrib_1b, va_data) - #define VARATT_SHORT_MAX 0x7F -#define VARATT_CAN_MAKE_SHORT(PTR) \ - (VARATT_IS_4B_U(PTR) && \ - (VARSIZE(PTR) - VARHDRSZ + VARHDRSZ_SHORT) <= VARATT_SHORT_MAX) -#define VARATT_CONVERTED_SHORT_SIZE(PTR) \ - (VARSIZE(PTR) - VARHDRSZ + VARHDRSZ_SHORT) /* * In consumers oblivious to data alignment, call PG_DETOAST_DATUM_PACKED(), @@ -272,70 +289,234 @@ typedef struct * Code assembling a new datum should call VARDATA() and SET_VARSIZE(). * (Datums begin life untoasted.) * - * Other macros here should usually be used only by tuple assembly/disassembly + * Other functions here should usually be used only by tuple assembly/disassembly * code and code that specifically wants to work with still-toasted Datums. */ -#define VARDATA(PTR) VARDATA_4B(PTR) -#define VARSIZE(PTR) VARSIZE_4B(PTR) - -#define VARSIZE_SHORT(PTR) VARSIZE_1B(PTR) -#define VARDATA_SHORT(PTR) VARDATA_1B(PTR) - -#define VARTAG_EXTERNAL(PTR) VARTAG_1B_E(PTR) -#define VARSIZE_EXTERNAL(PTR) (VARHDRSZ_EXTERNAL + VARTAG_SIZE(VARTAG_EXTERNAL(PTR))) -#define VARDATA_EXTERNAL(PTR) VARDATA_1B_E(PTR) - -#define VARATT_IS_COMPRESSED(PTR) VARATT_IS_4B_C(PTR) -#define VARATT_IS_EXTERNAL(PTR) VARATT_IS_1B_E(PTR) -#define VARATT_IS_EXTERNAL_ONDISK(PTR) \ - (VARATT_IS_EXTERNAL(PTR) && VARTAG_EXTERNAL(PTR) == VARTAG_ONDISK) -#define VARATT_IS_EXTERNAL_INDIRECT(PTR) \ - (VARATT_IS_EXTERNAL(PTR) && VARTAG_EXTERNAL(PTR) == VARTAG_INDIRECT) -#define VARATT_IS_EXTERNAL_EXPANDED_RO(PTR) \ - (VARATT_IS_EXTERNAL(PTR) && VARTAG_EXTERNAL(PTR) == VARTAG_EXPANDED_RO) -#define VARATT_IS_EXTERNAL_EXPANDED_RW(PTR) \ - (VARATT_IS_EXTERNAL(PTR) && VARTAG_EXTERNAL(PTR) == VARTAG_EXPANDED_RW) -#define VARATT_IS_EXTERNAL_EXPANDED(PTR) \ - (VARATT_IS_EXTERNAL(PTR) && VARTAG_IS_EXPANDED(VARTAG_EXTERNAL(PTR))) -#define VARATT_IS_EXTERNAL_NON_EXPANDED(PTR) \ - (VARATT_IS_EXTERNAL(PTR) && !VARTAG_IS_EXPANDED(VARTAG_EXTERNAL(PTR))) -#define VARATT_IS_SHORT(PTR) VARATT_IS_1B(PTR) -#define VARATT_IS_EXTENDED(PTR) (!VARATT_IS_4B_U(PTR)) - -#define SET_VARSIZE(PTR, len) SET_VARSIZE_4B(PTR, len) -#define SET_VARSIZE_SHORT(PTR, len) SET_VARSIZE_1B(PTR, len) -#define SET_VARSIZE_COMPRESSED(PTR, len) SET_VARSIZE_4B_C(PTR, len) - -#define SET_VARTAG_EXTERNAL(PTR, tag) SET_VARTAG_1B_E(PTR, tag) - -#define VARSIZE_ANY(PTR) \ - (VARATT_IS_1B_E(PTR) ? VARSIZE_EXTERNAL(PTR) : \ - (VARATT_IS_1B(PTR) ? VARSIZE_1B(PTR) : \ - VARSIZE_4B(PTR))) - -/* Size of a varlena data, excluding header */ -#define VARSIZE_ANY_EXHDR(PTR) \ - (VARATT_IS_1B_E(PTR) ? VARSIZE_EXTERNAL(PTR)-VARHDRSZ_EXTERNAL : \ - (VARATT_IS_1B(PTR) ? VARSIZE_1B(PTR)-VARHDRSZ_SHORT : \ - VARSIZE_4B(PTR)-VARHDRSZ)) +/* Size of a known-not-toasted varlena datum, including header */ +static inline Size +VARSIZE(const void *PTR) +{ + return VARSIZE_4B(PTR); +} + +/* Start of data area of a known-not-toasted varlena datum */ +static inline char * +VARDATA(const void *PTR) +{ + return VARDATA_4B(PTR); +} + +/* Size of a known-short-header varlena datum, including header */ +static inline Size +VARSIZE_SHORT(const void *PTR) +{ + return VARSIZE_1B(PTR); +} + +/* Start of data area of a known-short-header varlena datum */ +static inline char * +VARDATA_SHORT(const void *PTR) +{ + return VARDATA_1B(PTR); +} + +/* Type tag of a "TOAST pointer" datum */ +static inline vartag_external +VARTAG_EXTERNAL(const void *PTR) +{ + return VARTAG_1B_E(PTR); +} + +/* Size of a "TOAST pointer" datum, including header */ +static inline Size +VARSIZE_EXTERNAL(const void *PTR) +{ + return VARHDRSZ_EXTERNAL + VARTAG_SIZE(VARTAG_EXTERNAL(PTR)); +} + +/* Start of data area of a "TOAST pointer" datum */ +static inline char * +VARDATA_EXTERNAL(const void *PTR) +{ + return VARDATA_1B_E(PTR); +} + +/* Is varlena datum in inline-compressed format? */ +static inline bool +VARATT_IS_COMPRESSED(const void *PTR) +{ + return VARATT_IS_4B_C(PTR); +} + +/* Is varlena datum a "TOAST pointer" datum? */ +static inline bool +VARATT_IS_EXTERNAL(const void *PTR) +{ + return VARATT_IS_1B_E(PTR); +} + +/* Is varlena datum a pointer to on-disk toasted data? */ +static inline bool +VARATT_IS_EXTERNAL_ONDISK(const void *PTR) +{ + return VARATT_IS_EXTERNAL(PTR) && VARTAG_EXTERNAL(PTR) == VARTAG_ONDISK; +} + +/* Is varlena datum an indirect pointer? */ +static inline bool +VARATT_IS_EXTERNAL_INDIRECT(const void *PTR) +{ + return VARATT_IS_EXTERNAL(PTR) && VARTAG_EXTERNAL(PTR) == VARTAG_INDIRECT; +} + +/* Is varlena datum a read-only pointer to an expanded object? */ +static inline bool +VARATT_IS_EXTERNAL_EXPANDED_RO(const void *PTR) +{ + return VARATT_IS_EXTERNAL(PTR) && VARTAG_EXTERNAL(PTR) == VARTAG_EXPANDED_RO; +} + +/* Is varlena datum a read-write pointer to an expanded object? */ +static inline bool +VARATT_IS_EXTERNAL_EXPANDED_RW(const void *PTR) +{ + return VARATT_IS_EXTERNAL(PTR) && VARTAG_EXTERNAL(PTR) == VARTAG_EXPANDED_RW; +} + +/* Is varlena datum either type of pointer to an expanded object? */ +static inline bool +VARATT_IS_EXTERNAL_EXPANDED(const void *PTR) +{ + return VARATT_IS_EXTERNAL(PTR) && VARTAG_IS_EXPANDED(VARTAG_EXTERNAL(PTR)); +} + +/* Is varlena datum a "TOAST pointer", but not for an expanded object? */ +static inline bool +VARATT_IS_EXTERNAL_NON_EXPANDED(const void *PTR) +{ + return VARATT_IS_EXTERNAL(PTR) && !VARTAG_IS_EXPANDED(VARTAG_EXTERNAL(PTR)); +} + +/* Is varlena datum a short-header datum? */ +static inline bool +VARATT_IS_SHORT(const void *PTR) +{ + return VARATT_IS_1B(PTR); +} + +/* Is varlena datum not in traditional (4-byte-header, uncompressed) format? */ +static inline bool +VARATT_IS_EXTENDED(const void *PTR) +{ + return !VARATT_IS_4B_U(PTR); +} + +/* Is varlena datum short enough to convert to short-header format? */ +static inline bool +VARATT_CAN_MAKE_SHORT(const void *PTR) +{ + return VARATT_IS_4B_U(PTR) && + (VARSIZE(PTR) - VARHDRSZ + VARHDRSZ_SHORT) <= VARATT_SHORT_MAX; +} + +/* Size that datum will have in short-header format, including header */ +static inline Size +VARATT_CONVERTED_SHORT_SIZE(const void *PTR) +{ + return VARSIZE(PTR) - VARHDRSZ + VARHDRSZ_SHORT; +} + +/* Set the size (including header) of a 4-byte-header varlena datum */ +static inline void +SET_VARSIZE(void *PTR, Size len) +{ + SET_VARSIZE_4B(PTR, len); +} + +/* Set the size (including header) of a short-header varlena datum */ +static inline void +SET_VARSIZE_SHORT(void *PTR, Size len) +{ + SET_VARSIZE_1B(PTR, len); +} + +/* Set the size (including header) of an inline-compressed varlena datum */ +static inline void +SET_VARSIZE_COMPRESSED(void *PTR, Size len) +{ + SET_VARSIZE_4B_C(PTR, len); +} + +/* Set the type tag of a "TOAST pointer" datum */ +static inline void +SET_VARTAG_EXTERNAL(void *PTR, vartag_external tag) +{ + SET_VARTAG_1B_E(PTR, tag); +} + +/* Size of a varlena datum of any format, including header */ +static inline Size +VARSIZE_ANY(const void *PTR) +{ + if (VARATT_IS_1B_E(PTR)) + return VARSIZE_EXTERNAL(PTR); + else if (VARATT_IS_1B(PTR)) + return VARSIZE_1B(PTR); + else + return VARSIZE_4B(PTR); +} + +/* Size of a varlena datum of any format, excluding header */ +static inline Size +VARSIZE_ANY_EXHDR(const void *PTR) +{ + if (VARATT_IS_1B_E(PTR)) + return VARSIZE_EXTERNAL(PTR) - VARHDRSZ_EXTERNAL; + else if (VARATT_IS_1B(PTR)) + return VARSIZE_1B(PTR) - VARHDRSZ_SHORT; + else + return VARSIZE_4B(PTR) - VARHDRSZ; +} + +/* Start of data area of a plain or short-header varlena datum */ /* caution: this will not work on an external or compressed-in-line Datum */ /* caution: this will return a possibly unaligned pointer */ -#define VARDATA_ANY(PTR) \ - (VARATT_IS_1B(PTR) ? VARDATA_1B(PTR) : VARDATA_4B(PTR)) +static inline char * +VARDATA_ANY(const void *PTR) +{ + return VARATT_IS_1B(PTR) ? VARDATA_1B(PTR) : VARDATA_4B(PTR); +} -/* Decompressed size and compression method of a compressed-in-line Datum */ -#define VARDATA_COMPRESSED_GET_EXTSIZE(PTR) \ - (((varattrib_4b *) (PTR))->va_compressed.va_tcinfo & VARLENA_EXTSIZE_MASK) -#define VARDATA_COMPRESSED_GET_COMPRESS_METHOD(PTR) \ - (((varattrib_4b *) (PTR))->va_compressed.va_tcinfo >> VARLENA_EXTSIZE_BITS) +/* Decompressed size of a compressed-in-line varlena datum */ +static inline Size +VARDATA_COMPRESSED_GET_EXTSIZE(const void *PTR) +{ + return ((varattrib_4b *) PTR)->va_compressed.va_tcinfo & VARLENA_EXTSIZE_MASK; +} + +/* Compression method of a compressed-in-line varlena datum */ +static inline uint32 +VARDATA_COMPRESSED_GET_COMPRESS_METHOD(const void *PTR) +{ + return ((varattrib_4b *) PTR)->va_compressed.va_tcinfo >> VARLENA_EXTSIZE_BITS; +} /* Same for external Datums; but note argument is a struct varatt_external */ -#define VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer) \ - ((toast_pointer).va_extinfo & VARLENA_EXTSIZE_MASK) -#define VARATT_EXTERNAL_GET_COMPRESS_METHOD(toast_pointer) \ - ((toast_pointer).va_extinfo >> VARLENA_EXTSIZE_BITS) +static inline Size +VARATT_EXTERNAL_GET_EXTSIZE(struct varatt_external toast_pointer) +{ + return toast_pointer.va_extinfo & VARLENA_EXTSIZE_MASK; +} +static inline uint32 +VARATT_EXTERNAL_GET_COMPRESS_METHOD(struct varatt_external toast_pointer) +{ + return toast_pointer.va_extinfo >> VARLENA_EXTSIZE_BITS; +} + +/* Set size and compress method of an externally-stored varlena datum */ +/* This has to remain a macro; beware multiple evaluations! */ #define VARATT_EXTERNAL_SET_SIZE_AND_COMPRESS_METHOD(toast_pointer, len, cm) \ do { \ Assert((cm) == TOAST_PGLZ_COMPRESSION_ID || \ @@ -351,8 +532,11 @@ typedef struct * VARHDRSZ overhead, the former doesn't. We never use compression unless it * actually saves space, so we expect either equality or less-than. */ -#define VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer) \ - (VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer) < \ - (toast_pointer).va_rawsize - VARHDRSZ) +static inline bool +VARATT_EXTERNAL_IS_COMPRESSED(struct varatt_external toast_pointer) +{ + return VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer) < + (Size) (toast_pointer.va_rawsize - VARHDRSZ); +} #endif From f291751ef86ec407b3e67a951f55e79fb160ae10 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Tue, 5 Aug 2025 12:11:33 -0400 Subject: [PATCH 440/602] Mop-up for commit e035863c9. Neither Peter nor I had tried this with USE_VALGRIND ... Per buildfarm member skink. --- src/backend/access/common/printtup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/backend/access/common/printtup.c b/src/backend/access/common/printtup.c index 830a3d883aa2e..6d3045e233211 100644 --- a/src/backend/access/common/printtup.c +++ b/src/backend/access/common/printtup.c @@ -350,7 +350,7 @@ printtup(TupleTableSlot *slot, DestReceiver *self) */ if (thisState->typisvarlena) VALGRIND_CHECK_MEM_IS_DEFINED(DatumGetPointer(attr), - VARSIZE_ANY(attr)); + VARSIZE_ANY(DatumGetPointer(attr))); if (thisState->format == 0) { From 295a39770e6f7d9c117d52f86ff0596b7d9a590e Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Tue, 5 Aug 2025 09:06:05 -0700 Subject: [PATCH 441/602] Don't copy datlocale from template unless provider matches. During CREATE DATABASE, if changing the locale provider, require that a new locale is specified rather than trying to reinterpret the template's locale using the new provider. This only affects the behavior when the template uses the builtin provider and CREATE DATABASE specifies the ICU provider without specifying the locale. Previously, that may have succeeded due to loose validation by ICU, whereas now that will cause an error. Because it can cause an error, backport only to unreleased versions. Discussion: https://postgr.es/m/5038b33a6dc639009f4b3d43fa6ae0c5ba9e04f7.camel@j-davis.com Backpatch-through: 18 --- src/backend/commands/dbcommands.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c index 502a45163c8ae..92a396b8406a3 100644 --- a/src/backend/commands/dbcommands.c +++ b/src/backend/commands/dbcommands.c @@ -1052,7 +1052,7 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) dbctype = src_ctype; if (dblocprovider == '\0') dblocprovider = src_locprovider; - if (dblocale == NULL) + if (dblocale == NULL && dblocprovider == src_locprovider) dblocale = src_locale; if (dbicurules == NULL) dbicurules = src_icurules; From deb674454c5cb7ecabecee2e04ca929eee570df4 Mon Sep 17 00:00:00 2001 From: Masahiko Sawada Date: Tue, 5 Aug 2025 10:50:45 -0700 Subject: [PATCH 442/602] Add backup_type column to pg_stat_progress_basebackup. This commit introduces a new column backup_type that indicates the type of backup being performed: either 'full' or 'incremental'. Bump catalog version. Author: Shinya Kato Reviewed-by: Yugo Nagata Discussion: https://postgr.es/m/CAOzEurQuzbHwTj1ehk1a+eeQDidJPyrE5s6mYumkjwjZnurhkQ@mail.gmail.com --- doc/src/sgml/monitoring.sgml | 10 ++++++++++ src/backend/backup/basebackup.c | 2 +- src/backend/backup/basebackup_progress.c | 9 +++++++-- src/backend/catalog/system_views.sql | 5 ++++- src/include/backup/basebackup_sink.h | 3 ++- src/include/catalog/catversion.h | 2 +- src/include/commands/progress.h | 5 +++++ src/test/regress/expected/rules.out | 7 ++++++- 8 files changed, 36 insertions(+), 7 deletions(-) diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml index fa78031ccbbf0..3f4a27a736e27 100644 --- a/doc/src/sgml/monitoring.sgml +++ b/doc/src/sgml/monitoring.sgml @@ -6791,6 +6791,16 @@ FROM pg_stat_get_backend_idset() AS backendid; advances when the phase is streaming database files.
+ + + + backup_type text + + + Backup type. Either full or + incremental. + + diff --git a/src/backend/backup/basebackup.c b/src/backend/backup/basebackup.c index f0f88838dc21a..bb7d90aa5d963 100644 --- a/src/backend/backup/basebackup.c +++ b/src/backend/backup/basebackup.c @@ -1048,7 +1048,7 @@ SendBaseBackup(BaseBackupCmd *cmd, IncrementalBackupInfo *ib) sink = bbsink_zstd_new(sink, &opt.compression_specification); /* Set up progress reporting. */ - sink = bbsink_progress_new(sink, opt.progress); + sink = bbsink_progress_new(sink, opt.progress, opt.incremental); /* * Perform the base backup, but make sure we clean up the bbsink even if diff --git a/src/backend/backup/basebackup_progress.c b/src/backend/backup/basebackup_progress.c index 1d22b541f89af..dac205936229b 100644 --- a/src/backend/backup/basebackup_progress.c +++ b/src/backend/backup/basebackup_progress.c @@ -56,7 +56,7 @@ static const bbsink_ops bbsink_progress_ops = { * forwards data to a successor sink. */ bbsink * -bbsink_progress_new(bbsink *next, bool estimate_backup_size) +bbsink_progress_new(bbsink *next, bool estimate_backup_size, bool incremental) { bbsink *sink; @@ -69,10 +69,15 @@ bbsink_progress_new(bbsink *next, bool estimate_backup_size) /* * Report that a base backup is in progress, and set the total size of the * backup to -1, which will get translated to NULL. If we're estimating - * the backup size, we'll insert the real estimate when we have it. + * the backup size, we'll insert the real estimate when we have it. Also, + * the backup type is set. */ pgstat_progress_start_command(PROGRESS_COMMAND_BASEBACKUP, InvalidOid); pgstat_progress_update_param(PROGRESS_BASEBACKUP_BACKUP_TOTAL, -1); + pgstat_progress_update_param(PROGRESS_BASEBACKUP_BACKUP_TYPE, + incremental + ? PROGRESS_BASEBACKUP_BACKUP_TYPE_INCREMENTAL + : PROGRESS_BASEBACKUP_BACKUP_TYPE_FULL); return sink; } diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index 77c693f630e4b..1b3c5a55882df 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -1327,7 +1327,10 @@ CREATE VIEW pg_stat_progress_basebackup AS CASE S.param2 WHEN -1 THEN NULL ELSE S.param2 END AS backup_total, S.param3 AS backup_streamed, S.param4 AS tablespaces_total, - S.param5 AS tablespaces_streamed + S.param5 AS tablespaces_streamed, + CASE S.param6 WHEN 1 THEN 'full' + WHEN 2 THEN 'incremental' + END AS backup_type FROM pg_stat_get_progress_info('BASEBACKUP') AS S; diff --git a/src/include/backup/basebackup_sink.h b/src/include/backup/basebackup_sink.h index 8a5ee996a45ed..310d92b8b9d45 100644 --- a/src/include/backup/basebackup_sink.h +++ b/src/include/backup/basebackup_sink.h @@ -287,7 +287,8 @@ extern bbsink *bbsink_copystream_new(bool send_to_client); extern bbsink *bbsink_gzip_new(bbsink *next, pg_compress_specification *); extern bbsink *bbsink_lz4_new(bbsink *next, pg_compress_specification *); extern bbsink *bbsink_zstd_new(bbsink *next, pg_compress_specification *); -extern bbsink *bbsink_progress_new(bbsink *next, bool estimate_backup_size); +extern bbsink *bbsink_progress_new(bbsink *next, bool estimate_backup_size, + bool incremental); extern bbsink *bbsink_server_new(bbsink *next, char *pathname); extern bbsink *bbsink_throttle_new(bbsink *next, uint32 maxrate); diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index 750a9d8a09b25..c4fe8b991af46 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -57,6 +57,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 202508041 +#define CATALOG_VERSION_NO 202508051 #endif diff --git a/src/include/commands/progress.h b/src/include/commands/progress.h index 7c736e7b03bcf..1cde4bd9bcf14 100644 --- a/src/include/commands/progress.h +++ b/src/include/commands/progress.h @@ -130,6 +130,7 @@ #define PROGRESS_BASEBACKUP_BACKUP_STREAMED 2 #define PROGRESS_BASEBACKUP_TBLSPC_TOTAL 3 #define PROGRESS_BASEBACKUP_TBLSPC_STREAMED 4 +#define PROGRESS_BASEBACKUP_BACKUP_TYPE 5 /* Phases of pg_basebackup (as advertised via PROGRESS_BASEBACKUP_PHASE) */ #define PROGRESS_BASEBACKUP_PHASE_WAIT_CHECKPOINT 1 @@ -138,6 +139,10 @@ #define PROGRESS_BASEBACKUP_PHASE_WAIT_WAL_ARCHIVE 4 #define PROGRESS_BASEBACKUP_PHASE_TRANSFER_WAL 5 +/* Types of pg_basebackup (as advertised via PROGRESS_BASEBACKUP_BACKUP_TYPE) */ +#define PROGRESS_BASEBACKUP_BACKUP_TYPE_FULL 1 +#define PROGRESS_BASEBACKUP_BACKUP_TYPE_INCREMENTAL 2 + /* Progress parameters for PROGRESS_COPY */ #define PROGRESS_COPY_BYTES_PROCESSED 0 #define PROGRESS_COPY_BYTES_TOTAL 1 diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out index 6509fda77a994..35e8aad7701be 100644 --- a/src/test/regress/expected/rules.out +++ b/src/test/regress/expected/rules.out @@ -1977,7 +1977,12 @@ pg_stat_progress_basebackup| SELECT pid, END AS backup_total, param3 AS backup_streamed, param4 AS tablespaces_total, - param5 AS tablespaces_streamed + param5 AS tablespaces_streamed, + CASE param6 + WHEN 1 THEN 'full'::text + WHEN 2 THEN 'incremental'::text + ELSE NULL::text + END AS backup_type FROM pg_stat_get_progress_info('BASEBACKUP'::text) s(pid, datid, relid, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10, param11, param12, param13, param14, param15, param16, param17, param18, param19, param20); pg_stat_progress_cluster| SELECT s.pid, s.datid, From 37fc1803cc12120f19184cd952865cc35e0f1755 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Herrera?= Date: Tue, 5 Aug 2025 20:09:42 +0200 Subject: [PATCH 443/602] Hide expensive pg_upgrade test behind PG_TEST_EXTRA This new test is very expensive. Make it opt-in. Discussion: https://postgr.es/m/202508051433.ebznuqrxt4b2@alvherre.pgsql --- doc/src/sgml/regress.sgml | 13 +++++++++++++ src/bin/pg_upgrade/t/002_pg_upgrade.pl | 3 +++ 2 files changed, 16 insertions(+) diff --git a/doc/src/sgml/regress.sgml b/doc/src/sgml/regress.sgml index bf4ffb3057636..769b721037f42 100644 --- a/doc/src/sgml/regress.sgml +++ b/doc/src/sgml/regress.sgml @@ -284,6 +284,19 @@ make check-world PG_TEST_EXTRA='kerberos ldap ssl load_balance libpq_encryption' + + regress_dump_restore + + + Runs an additional test suite in + src/bin/pg_upgrade/t/002_pg_upgrade.pl which + cycles the regression database through pg_dump/ + pg_restore. Not enabled by default because it + is resource intensive. + + + + sepgsql diff --git a/src/bin/pg_upgrade/t/002_pg_upgrade.pl b/src/bin/pg_upgrade/t/002_pg_upgrade.pl index 7d82593879d57..0b15e38297e2e 100644 --- a/src/bin/pg_upgrade/t/002_pg_upgrade.pl +++ b/src/bin/pg_upgrade/t/002_pg_upgrade.pl @@ -375,6 +375,9 @@ sub get_dump_for_comparison { my $dstnode = PostgreSQL::Test::Cluster->new('dst_node'); + skip "regress_dump_restore not enabled in PG_TEST_EXTRA" + if (!$ENV{PG_TEST_EXTRA} + || $ENV{PG_TEST_EXTRA} !~ /\bregress_dump_restore\b/); skip "different Postgres versions" if ($oldnode->pg_version != $dstnode->pg_version); skip "source node not using default install" From 455a040d966897edd3901f044945398450da338a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Herrera?= Date: Tue, 5 Aug 2025 20:22:32 +0200 Subject: [PATCH 444/602] Put PG_TEST_EXTRA doc items back in alphabetical order A few items appears to have added in random order over the years. --- doc/src/sgml/regress.sgml | 62 +++++++++++++++++++-------------------- 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/doc/src/sgml/regress.sgml b/doc/src/sgml/regress.sgml index 769b721037f42..8838fe7f0225f 100644 --- a/doc/src/sgml/regress.sgml +++ b/doc/src/sgml/regress.sgml @@ -285,88 +285,88 @@ make check-world PG_TEST_EXTRA='kerberos ldap ssl load_balance libpq_encryption' - regress_dump_restore + libpq_encryption - Runs an additional test suite in - src/bin/pg_upgrade/t/002_pg_upgrade.pl which - cycles the regression database through pg_dump/ - pg_restore. Not enabled by default because it - is resource intensive. + Runs the test src/interfaces/libpq/t/005_negotiate_encryption.pl. + This opens TCP/IP listen sockets. If PG_TEST_EXTRA + also includes kerberos, additional tests that require + an MIT Kerberos installation are enabled. - sepgsql + load_balance - Runs the test suite under contrib/sepgsql. This - requires an SELinux environment that is set up in a specific way; see - . + Runs the test src/interfaces/libpq/t/004_load_balance_dns.pl. + This requires editing the system hosts file and + opens TCP/IP listen sockets. - ssl + oauth - Runs the test suite under src/test/ssl. This opens TCP/IP listen sockets. + Runs the test suite under src/test/modules/oauth_validator. + This opens TCP/IP listen sockets for a test server running HTTPS. - load_balance + regress_dump_restore - Runs the test src/interfaces/libpq/t/004_load_balance_dns.pl. - This requires editing the system hosts file and - opens TCP/IP listen sockets. + Runs an additional test suite in + src/bin/pg_upgrade/t/002_pg_upgrade.pl which + cycles the regression database through pg_dump/ + pg_restore. Not enabled by default because it + is resource intensive. - libpq_encryption + sepgsql - Runs the test src/interfaces/libpq/t/005_negotiate_encryption.pl. - This opens TCP/IP listen sockets. If PG_TEST_EXTRA - also includes kerberos, additional tests that require - an MIT Kerberos installation are enabled. + Runs the test suite under contrib/sepgsql. This + requires an SELinux environment that is set up in a specific way; see + . - wal_consistency_checking + ssl - Uses wal_consistency_checking=all while running - certain tests under src/test/recovery. Not - enabled by default because it is resource intensive. + Runs the test suite under src/test/ssl. This opens TCP/IP listen sockets. - xid_wraparound + wal_consistency_checking - Runs the test suite under src/test/modules/xid_wraparound. - Not enabled by default because it is resource intensive. + Uses wal_consistency_checking=all while running + certain tests under src/test/recovery. Not + enabled by default because it is resource intensive. - oauth + xid_wraparound - Runs the test suite under src/test/modules/oauth_validator. - This opens TCP/IP listen sockets for a test server running HTTPS. + Runs the test suite under src/test/modules/xid_wraparound. + Not enabled by default because it is resource intensive. From 80c758a2e1d720a942610f2f889448d69ce2ce95 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Tue, 5 Aug 2025 16:51:10 -0400 Subject: [PATCH 445/602] Fix incorrect return value in brin_minmax_multi_distance_numeric(). The result of "DirectFunctionCall1(numeric_float8, d)" is already in Datum form, but the code was incorrectly applying PG_RETURN_FLOAT8() to it. On machines where float8 is pass-by-reference, this would result in complete garbage, since an unpredictable pointer value would be treated as an integer and then converted to float. It's not entirely clear how much of a problem would ensue on 64-bit hardware, but certainly interpreting a float8 bitpattern as uint64 and then converting that to float isn't the intended behavior. As luck would have it, even the complete-garbage case doesn't break BRIN indexes, since the results are only used to make choices about how to merge values into ranges: at worst, we'd make poor choices resulting in an inefficient index. Doubtless that explains the lack of field complaints. However, users with BRIN indexes that use the numeric_minmax_multi_ops opclass may wish to reindex in hopes of making their indexes more efficient. Author: Peter Eisentraut Co-authored-by: Tom Lane Discussion: https://postgr.es/m/2093712.1753983215@sss.pgh.pa.us Backpatch-through: 14 --- src/backend/access/brin/brin_minmax_multi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/backend/access/brin/brin_minmax_multi.c b/src/backend/access/brin/brin_minmax_multi.c index b85a70a0db28e..a5a414182caa6 100644 --- a/src/backend/access/brin/brin_minmax_multi.c +++ b/src/backend/access/brin/brin_minmax_multi.c @@ -2032,7 +2032,7 @@ brin_minmax_multi_distance_numeric(PG_FUNCTION_ARGS) d = DirectFunctionCall2(numeric_sub, a2, a1); /* a2 - a1 */ - PG_RETURN_FLOAT8(DirectFunctionCall1(numeric_float8, d)); + PG_RETURN_DATUM(DirectFunctionCall1(numeric_float8, d)); } /* From b5c53b403c93393c3725558294cbf4dbfb575e42 Mon Sep 17 00:00:00 2001 From: Masahiko Sawada Date: Tue, 5 Aug 2025 15:30:28 -0700 Subject: [PATCH 446/602] Suppress maybe-uninitialized warning. Following commit e035863c9a0, building with -O0 began triggering warnings about potentially uninitialized 'workbuf' usage. While theoretically the initialization isn't necessary since VARDATA() doesn't access the contents of the pointed-to object, this commit explicitly initializes the workbuf variable to suppress the warning. Buildfarm members adder and flaviventris have shown the warning. Reviewed-by: Tom Lane Discussion: https://postgr.es/m/CAD21AoCOZxfqnNgfM5yVKJZYnOq5m2Q96fBGy1fovEqQ9V4OZA@mail.gmail.com --- src/backend/storage/large_object/inv_api.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/backend/storage/large_object/inv_api.c b/src/backend/storage/large_object/inv_api.c index 68b76f2cc18a0..a874000c8ca26 100644 --- a/src/backend/storage/large_object/inv_api.c +++ b/src/backend/storage/large_object/inv_api.c @@ -561,7 +561,7 @@ inv_write(LargeObjectDesc *obj_desc, const char *buf, int nbytes) char data[LOBLKSIZE + VARHDRSZ]; /* ensure union is aligned well enough: */ int32 align_it; - } workbuf; + } workbuf = {0}; char *workb = VARDATA(&workbuf.hdr); HeapTuple newtup; Datum values[Natts_pg_largeobject]; @@ -752,7 +752,7 @@ inv_truncate(LargeObjectDesc *obj_desc, int64 len) char data[LOBLKSIZE + VARHDRSZ]; /* ensure union is aligned well enough: */ int32 align_it; - } workbuf; + } workbuf = {0}; char *workb = VARDATA(&workbuf.hdr); HeapTuple newtup; Datum values[Natts_pg_largeobject]; From 0b6aea03843dc75871b0490c6302a71a8fecb53d Mon Sep 17 00:00:00 2001 From: Fujii Masao Date: Wed, 6 Aug 2025 16:47:20 +0900 Subject: [PATCH 447/602] doc: Recommend ANALYZE after ALTER TABLE ... SET EXPRESSION AS. ALTER TABLE ... SET EXPRESSION AS removes statistics for the target column, so running ANALYZE afterward is recommended. But this was previously not documented, even though a similar recommendation exists for ALTER TABLE ... SET DATA TYPE, which also clears the column's statistics. This commit updates the documentation to include the ANALYZE recommendation for SET EXPRESSION AS. Since v18, virtual generated columns are supported, and these columns never have statistics. Therefore, ANALYZE is not needed after SET DATA TYPE or SET EXPRESSION AS when used on virtual generated columns. This commit also updates the documentation to clarify that ANALYZE is unnecessary in such cases. Back-patch the ANALYZE recommendation for SET EXPRESSION AS to v17 where the feature was introduced, and the note about virtual generated columns to v18 where those columns were added. Author: Yugo Nagata Reviewed-by: Fujii Masao Discussion: https://postgr.es/m/20250804151418.0cf365bd2855d606763443fe@sraoss.co.jp Backpatch-through: 17 --- doc/src/sgml/ref/alter_table.sgml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/doc/src/sgml/ref/alter_table.sgml b/doc/src/sgml/ref/alter_table.sgml index 541e093a519d7..8867da6c69307 100644 --- a/doc/src/sgml/ref/alter_table.sgml +++ b/doc/src/sgml/ref/alter_table.sgml @@ -210,6 +210,8 @@ WITH ( MODULUS numeric_literal, REM When this form is used, the column's statistics are removed, so running ANALYZE on the table afterwards is recommended. + For a virtual generated column, ANALYZE + is not necessary because such columns never have statistics. @@ -271,6 +273,15 @@ WITH ( MODULUS numeric_literal, REM in a stored generated column is rewritten and all the future changes will apply the new generation expression. + + + When this form is used on a stored generated column, its statistics + are removed, so running + ANALYZE + on the table afterwards is recommended. + For a virtual generated column, ANALYZE + is not necessary because such columns never have statistics. + From 225ebfe30a1ae9fda74f3d8f98ea6fa511b60624 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Wed, 6 Aug 2025 17:22:03 +0900 Subject: [PATCH 448/602] Add regression test for short varlenas saved in TOAST relations toast_save_datum() has for a very long time some code able to handle short varlenas (values up to 126 bytes reduced to a 1-byte header), converting such varlenas to an external on-disk TOAST pointer with the value saved uncompressed in the secondary TOAST relation. There was zero coverage for this code path. This commit adds a test able to exercise it, relying on two external attributes, one with a low toast_tuple_target, so as it is possible to trigger the threshold for the insertion of short varlenas into the TOAST relation. Author: Nikhil Kumar Veldanda Co-authored-by: Michael Paquier Discussion: https://postgr.es/m/aJAl7-NvIk0kZByz@paquier.xyz --- src/test/regress/expected/strings.out | 34 +++++++++++++++++++++++++++ src/test/regress/sql/strings.sql | 20 ++++++++++++++++ 2 files changed, 54 insertions(+) diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out index 1bfd33de3f3c3..ba302da51e7b2 100644 --- a/src/test/regress/expected/strings.out +++ b/src/test/regress/expected/strings.out @@ -2090,6 +2090,40 @@ SELECT c FROM toasttest; x (1 row) +DROP TABLE toasttest; +-- test with short varlenas (up to 126 data bytes reduced to a 1-byte header) +-- being toasted. +CREATE TABLE toasttest (f1 text, f2 text); +ALTER TABLE toasttest SET (toast_tuple_target = 128); +ALTER TABLE toasttest ALTER COLUMN f1 SET STORAGE EXTERNAL; +ALTER TABLE toasttest ALTER COLUMN f2 SET STORAGE EXTERNAL; +-- Here, the first value is a varlena large enough to make it toasted and +-- stored uncompressed. The second value is a short varlena, toasted +-- and stored uncompressed. +INSERT INTO toasttest values(repeat('1234', 1000), repeat('5678', 30)); +SELECT reltoastrelid::regclass AS reltoastname FROM pg_class + WHERE oid = 'toasttest'::regclass \gset +-- There should be two values inserted in the toast relation. +SELECT count(*) FROM :reltoastname WHERE chunk_seq = 0; + count +------- + 2 +(1 row) + +SELECT substr(f1, 5, 10) AS f1_data, substr(f2, 5, 10) AS f2_data + FROM toasttest; + f1_data | f2_data +------------+------------ + 1234123412 | 5678567856 +(1 row) + +SELECT pg_column_compression(f1) AS f1_comp, pg_column_compression(f2) AS f2_comp + FROM toasttest; + f1_comp | f2_comp +---------+--------- + | +(1 row) + DROP TABLE toasttest; -- -- test length diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql index 92c445c243961..b94004cc08ce6 100644 --- a/src/test/regress/sql/strings.sql +++ b/src/test/regress/sql/strings.sql @@ -650,6 +650,26 @@ SELECT length(c), c::text FROM toasttest; SELECT c FROM toasttest; DROP TABLE toasttest; +-- test with short varlenas (up to 126 data bytes reduced to a 1-byte header) +-- being toasted. +CREATE TABLE toasttest (f1 text, f2 text); +ALTER TABLE toasttest SET (toast_tuple_target = 128); +ALTER TABLE toasttest ALTER COLUMN f1 SET STORAGE EXTERNAL; +ALTER TABLE toasttest ALTER COLUMN f2 SET STORAGE EXTERNAL; +-- Here, the first value is a varlena large enough to make it toasted and +-- stored uncompressed. The second value is a short varlena, toasted +-- and stored uncompressed. +INSERT INTO toasttest values(repeat('1234', 1000), repeat('5678', 30)); +SELECT reltoastrelid::regclass AS reltoastname FROM pg_class + WHERE oid = 'toasttest'::regclass \gset +-- There should be two values inserted in the toast relation. +SELECT count(*) FROM :reltoastname WHERE chunk_seq = 0; +SELECT substr(f1, 5, 10) AS f1_data, substr(f2, 5, 10) AS f2_data + FROM toasttest; +SELECT pg_column_compression(f1) AS f1_comp, pg_column_compression(f2) AS f2_comp + FROM toasttest; +DROP TABLE toasttest; + -- -- test length -- From 8c7445a008109100f9784ea2d8e02cb6769dfb09 Mon Sep 17 00:00:00 2001 From: Dean Rasheed Date: Wed, 6 Aug 2025 09:41:11 +0100 Subject: [PATCH 449/602] Convert src/tools/testint128.c into a test module. This creates a new test module src/test/modules/test_int128 and moves src/tools/testint128.c into it so that it can be built using the normal build system, allowing the 128-bit integer arithmetic functions in src/include/common/int128.h to be tested automatically. For now, the tests are skipped on platforms that don't have native int128 support. While at it, fix the test128 union in the test code: the "hl" member of test128 was incorrectly defined to be a union instead of a struct, which meant that the tests were only ever setting and checking half of each 128-bit integer value. Author: Dean Rasheed Reviewed-by: John Naylor Discussion: https://postgr.es/m/CAEZATCWgBMc9ZwKMYqQpaQz2X6gaamYRB+RnMsUNcdMcL2Mj_w@mail.gmail.com --- src/include/common/int128.h | 2 +- src/test/modules/Makefile | 1 + src/test/modules/meson.build | 1 + src/test/modules/test_int128/.gitignore | 2 + src/test/modules/test_int128/Makefile | 23 ++++++++ src/test/modules/test_int128/meson.build | 33 ++++++++++++ .../modules/test_int128/t/001_test_int128.pl | 27 ++++++++++ .../modules/test_int128/test_int128.c} | 54 +++++++++++++------ 8 files changed, 125 insertions(+), 18 deletions(-) create mode 100644 src/test/modules/test_int128/.gitignore create mode 100644 src/test/modules/test_int128/Makefile create mode 100644 src/test/modules/test_int128/meson.build create mode 100644 src/test/modules/test_int128/t/001_test_int128.pl rename src/{tools/testint128.c => test/modules/test_int128/test_int128.c} (70%) diff --git a/src/include/common/int128.h b/src/include/common/int128.h index a50f5709c2988..f22530a164ec4 100644 --- a/src/include/common/int128.h +++ b/src/include/common/int128.h @@ -6,7 +6,7 @@ * We make use of the native int128 type if there is one, otherwise * implement things the hard way based on two int64 halves. * - * See src/tools/testint128.c for a simple test harness for this file. + * See src/test/modules/test_int128 for a simple test harness for this file. * * Copyright (c) 2017-2025, PostgreSQL Global Development Group * diff --git a/src/test/modules/Makefile b/src/test/modules/Makefile index 7d3d3d52b45e9..903a8ac151aa1 100644 --- a/src/test/modules/Makefile +++ b/src/test/modules/Makefile @@ -25,6 +25,7 @@ SUBDIRS = \ test_escape \ test_extensions \ test_ginpostinglist \ + test_int128 \ test_integerset \ test_json_parser \ test_lfind \ diff --git a/src/test/modules/meson.build b/src/test/modules/meson.build index dd5cd065ba10c..93be0f57289a8 100644 --- a/src/test/modules/meson.build +++ b/src/test/modules/meson.build @@ -24,6 +24,7 @@ subdir('test_dsm_registry') subdir('test_escape') subdir('test_extensions') subdir('test_ginpostinglist') +subdir('test_int128') subdir('test_integerset') subdir('test_json_parser') subdir('test_lfind') diff --git a/src/test/modules/test_int128/.gitignore b/src/test/modules/test_int128/.gitignore new file mode 100644 index 0000000000000..277fec6ed2cd6 --- /dev/null +++ b/src/test/modules/test_int128/.gitignore @@ -0,0 +1,2 @@ +/tmp_check/ +/test_int128 diff --git a/src/test/modules/test_int128/Makefile b/src/test/modules/test_int128/Makefile new file mode 100644 index 0000000000000..2e86ee93a9d7c --- /dev/null +++ b/src/test/modules/test_int128/Makefile @@ -0,0 +1,23 @@ +# src/test/modules/test_int128/Makefile + +PGFILEDESC = "test_int128 - test 128-bit integer arithmetic" + +PROGRAM = test_int128 +OBJS = $(WIN32RES) test_int128.o + +PG_CPPFLAGS = -I$(libpq_srcdir) +PG_LIBS_INTERNAL += $(libpq_pgport) + +NO_INSTALL = 1 +TAP_TESTS = 1 + +ifdef USE_PGXS +PG_CONFIG = pg_config +PGXS := $(shell $(PG_CONFIG) --pgxs) +include $(PGXS) +else +subdir = src/test/modules/test_int128 +top_builddir = ../../../.. +include $(top_builddir)/src/Makefile.global +include $(top_srcdir)/contrib/contrib-global.mk +endif diff --git a/src/test/modules/test_int128/meson.build b/src/test/modules/test_int128/meson.build new file mode 100644 index 0000000000000..4c2be7a0326f7 --- /dev/null +++ b/src/test/modules/test_int128/meson.build @@ -0,0 +1,33 @@ +# Copyright (c) 2025, PostgreSQL Global Development Group + +test_int128_sources = files( + 'test_int128.c', +) + +if host_system == 'windows' + test_int128_sources += rc_bin_gen.process(win32ver_rc, extra_args: [ + '--NAME', 'test_int128', + '--FILEDESC', 'test int128 program',]) +endif + +test_int128 = executable('test_int128', + test_int128_sources, + dependencies: [frontend_code, libpq], + kwargs: default_bin_args + { + 'install': false, + }, +) +testprep_targets += test_int128 + + +tests += { + 'name': 'test_int128', + 'sd': meson.current_source_dir(), + 'bd': meson.current_build_dir(), + 'tap': { + 'tests': [ + 't/001_test_int128.pl', + ], + 'deps': [test_int128], + }, +} diff --git a/src/test/modules/test_int128/t/001_test_int128.pl b/src/test/modules/test_int128/t/001_test_int128.pl new file mode 100644 index 0000000000000..0c683869f34e7 --- /dev/null +++ b/src/test/modules/test_int128/t/001_test_int128.pl @@ -0,0 +1,27 @@ +# Copyright (c) 2025, PostgreSQL Global Development Group + +# Test 128-bit integer arithmetic code in int128.h + +use strict; +use warnings FATAL => 'all'; + +use PostgreSQL::Test::Utils; +use Test::More; + +# Run the test program with 1M iterations +my $exe = "test_int128"; +my $size = 1_000_000; + +note "testing executable $exe"; + +my ($stdout, $stderr) = run_command([ $exe, $size ]); + +SKIP: +{ + skip "no native int128 type", 2 if $stdout =~ /skipping tests/; + + is($stdout, "", "test_int128: no stdout"); + is($stderr, "", "test_int128: no stderr"); +} + +done_testing(); diff --git a/src/tools/testint128.c b/src/test/modules/test_int128/test_int128.c similarity index 70% rename from src/tools/testint128.c rename to src/test/modules/test_int128/test_int128.c index a25631e277d2e..75099a75178e6 100644 --- a/src/tools/testint128.c +++ b/src/test/modules/test_int128/test_int128.c @@ -1,6 +1,6 @@ /*------------------------------------------------------------------------- * - * testint128.c + * test_int128.c * Testbed for roll-our-own 128-bit integer arithmetic. * * This is a standalone test program that compares the behavior of an @@ -10,13 +10,18 @@ * * * IDENTIFICATION - * src/tools/testint128.c + * src/test/modules/test_int128/test_int128.c * *------------------------------------------------------------------------- */ #include "postgres_fe.h" +#include + +/* Require a native int128 type */ +#ifdef HAVE_INT128 + /* * By default, we test the non-native implementation in int128.h; but * by predefining USE_NATIVE_INT128 to 1, you can test the native @@ -36,7 +41,7 @@ typedef union { int128 i128; INT128 I128; - union + struct { #ifdef WORDS_BIGENDIAN int64 hi; @@ -48,6 +53,7 @@ typedef union } hl; } test128; +#define INT128_HEX_FORMAT "%016" PRIx64 "%016" PRIx64 /* * Control version of comparator. @@ -75,7 +81,7 @@ main(int argc, char **argv) { long count; - pg_prng_seed(&pg_global_prng_state, 0); + pg_prng_seed(&pg_global_prng_state, (uint64) time(NULL)); if (argc >= 2) count = strtol(argv[1], NULL, 0); @@ -99,9 +105,9 @@ main(int argc, char **argv) if (t1.hl.hi != t2.hl.hi || t1.hl.lo != t2.hl.lo) { - printf("%016lX%016lX + unsigned %lX\n", x, y, z); - printf("native = %016lX%016lX\n", t1.hl.hi, t1.hl.lo); - printf("result = %016lX%016lX\n", t2.hl.hi, t2.hl.lo); + printf(INT128_HEX_FORMAT " + unsigned " PRIx64 "\n", x, y, z); + printf("native = " INT128_HEX_FORMAT "\n", t1.hl.hi, t1.hl.lo); + printf("result = " INT128_HEX_FORMAT "\n", t2.hl.hi, t2.hl.lo); return 1; } @@ -114,9 +120,9 @@ main(int argc, char **argv) if (t1.hl.hi != t2.hl.hi || t1.hl.lo != t2.hl.lo) { - printf("%016lX%016lX + signed %lX\n", x, y, z); - printf("native = %016lX%016lX\n", t1.hl.hi, t1.hl.lo); - printf("result = %016lX%016lX\n", t2.hl.hi, t2.hl.lo); + printf(INT128_HEX_FORMAT " + signed " PRIx64 "\n", x, y, z); + printf("native = " INT128_HEX_FORMAT "\n", t1.hl.hi, t1.hl.lo); + printf("result = " INT128_HEX_FORMAT "\n", t2.hl.hi, t2.hl.lo); return 1; } @@ -128,9 +134,9 @@ main(int argc, char **argv) if (t1.hl.hi != t2.hl.hi || t1.hl.lo != t2.hl.lo) { - printf("%lX * %lX\n", x, y); - printf("native = %016lX%016lX\n", t1.hl.hi, t1.hl.lo); - printf("result = %016lX%016lX\n", t2.hl.hi, t2.hl.lo); + printf(PRIx64 " * " PRIx64 "\n", x, y); + printf("native = " INT128_HEX_FORMAT "\n", t1.hl.hi, t1.hl.lo); + printf("result = " INT128_HEX_FORMAT "\n", t2.hl.hi, t2.hl.lo); return 1; } @@ -146,8 +152,8 @@ main(int argc, char **argv) printf("comparison failure: %d vs %d\n", my_int128_compare(t1.i128, t2.i128), int128_compare(t1.I128, t2.I128)); - printf("arg1 = %016lX%016lX\n", t1.hl.hi, t1.hl.lo); - printf("arg2 = %016lX%016lX\n", t2.hl.hi, t2.hl.lo); + printf("arg1 = " INT128_HEX_FORMAT "\n", t1.hl.hi, t1.hl.lo); + printf("arg2 = " INT128_HEX_FORMAT "\n", t2.hl.hi, t2.hl.lo); return 1; } @@ -160,11 +166,25 @@ main(int argc, char **argv) printf("comparison failure: %d vs %d\n", my_int128_compare(t1.i128, t2.i128), int128_compare(t1.I128, t2.I128)); - printf("arg1 = %016lX%016lX\n", t1.hl.hi, t1.hl.lo); - printf("arg2 = %016lX%016lX\n", t2.hl.hi, t2.hl.lo); + printf("arg1 = " INT128_HEX_FORMAT "\n", t1.hl.hi, t1.hl.lo); + printf("arg2 = " INT128_HEX_FORMAT "\n", t2.hl.hi, t2.hl.lo); return 1; } } return 0; } + +#else /* ! HAVE_INT128 */ + +/* + * For now, do nothing if we don't have a native int128 type. + */ +int +main(int argc, char **argv) +{ + printf("skipping tests: no native int128 type\n"); + return 0; +} + +#endif From 73d33be4dab7923bf879b82e8ea5fa214099b516 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Wed, 6 Aug 2025 10:54:48 +0200 Subject: [PATCH 450/602] Remove INT64_HEX_FORMAT and UINT64_HEX_FORMAT These were introduced (commit efdc7d74753) at the same time as we were moving to using the standard inttypes.h format macros (commit a0ed19e0a9e). It doesn't seem useful to keep a new already-deprecated interface like this with only a few users, so remove the new symbols again and have the callers use PRIx64. (Also, INT64_HEX_FORMAT was kind of a misnomer, since hex formats all use unsigned types.) Reviewed-by: Nathan Bossart Reviewed-by: Thomas Munro Discussion: https://www.postgresql.org/message-id/flat/0ac47b5d-e5ab-4cac-98a7-bdee0e2831e4%40eisentraut.org --- contrib/postgres_fdw/option.c | 2 +- src/backend/utils/error/csvlog.c | 2 +- src/backend/utils/error/elog.c | 4 ++-- src/backend/utils/error/jsonlog.c | 2 +- src/include/c.h | 2 -- src/test/modules/test_radixtree/test_radixtree.c | 2 +- 6 files changed, 6 insertions(+), 8 deletions(-) diff --git a/contrib/postgres_fdw/option.c b/contrib/postgres_fdw/option.c index d6fa89bad9399..04788b7e8b35f 100644 --- a/contrib/postgres_fdw/option.c +++ b/contrib/postgres_fdw/option.c @@ -522,7 +522,7 @@ process_pgfdw_appname(const char *appname) appendStringInfoString(&buf, application_name); break; case 'c': - appendStringInfo(&buf, INT64_HEX_FORMAT ".%x", MyStartTime, MyProcPid); + appendStringInfo(&buf, "%" PRIx64 ".%x", MyStartTime, MyProcPid); break; case 'C': appendStringInfoString(&buf, cluster_name); diff --git a/src/backend/utils/error/csvlog.c b/src/backend/utils/error/csvlog.c index fdac3c048e36a..c3159ed7d979b 100644 --- a/src/backend/utils/error/csvlog.c +++ b/src/backend/utils/error/csvlog.c @@ -120,7 +120,7 @@ write_csvlog(ErrorData *edata) appendStringInfoChar(&buf, ','); /* session id */ - appendStringInfo(&buf, INT64_HEX_FORMAT ".%x", MyStartTime, MyProcPid); + appendStringInfo(&buf, "%" PRIx64 ".%x", MyStartTime, MyProcPid); appendStringInfoChar(&buf, ','); /* Line number */ diff --git a/src/backend/utils/error/elog.c b/src/backend/utils/error/elog.c index afce1a8e1f003..b7b9692f8c884 100644 --- a/src/backend/utils/error/elog.c +++ b/src/backend/utils/error/elog.c @@ -2959,12 +2959,12 @@ log_status_format(StringInfo buf, const char *format, ErrorData *edata) { char strfbuf[128]; - snprintf(strfbuf, sizeof(strfbuf) - 1, INT64_HEX_FORMAT ".%x", + snprintf(strfbuf, sizeof(strfbuf) - 1, "%" PRIx64 ".%x", MyStartTime, MyProcPid); appendStringInfo(buf, "%*s", padding, strfbuf); } else - appendStringInfo(buf, INT64_HEX_FORMAT ".%x", MyStartTime, MyProcPid); + appendStringInfo(buf, "%" PRIx64 ".%x", MyStartTime, MyProcPid); break; case 'p': if (padding != 0) diff --git a/src/backend/utils/error/jsonlog.c b/src/backend/utils/error/jsonlog.c index 519eacf17f83c..2619f49904201 100644 --- a/src/backend/utils/error/jsonlog.c +++ b/src/backend/utils/error/jsonlog.c @@ -168,7 +168,7 @@ write_jsonlog(ErrorData *edata) } /* Session id */ - appendJSONKeyValueFmt(&buf, "session_id", true, INT64_HEX_FORMAT ".%x", + appendJSONKeyValueFmt(&buf, "session_id", true, "%" PRIx64 ".%x", MyStartTime, MyProcPid); /* Line number */ diff --git a/src/include/c.h b/src/include/c.h index 6d4495bdd9f68..bbdaa88c63a6f 100644 --- a/src/include/c.h +++ b/src/include/c.h @@ -530,8 +530,6 @@ typedef uint32 bits32; /* >= 32 bits */ /* snprintf format strings to use for 64-bit integers */ #define INT64_FORMAT "%" PRId64 #define UINT64_FORMAT "%" PRIu64 -#define INT64_HEX_FORMAT "%" PRIx64 -#define UINT64_HEX_FORMAT "%" PRIx64 /* * 128-bit signed and unsigned integers diff --git a/src/test/modules/test_radixtree/test_radixtree.c b/src/test/modules/test_radixtree/test_radixtree.c index 32de6a3123e4d..80ad029616473 100644 --- a/src/test/modules/test_radixtree/test_radixtree.c +++ b/src/test/modules/test_radixtree/test_radixtree.c @@ -44,7 +44,7 @@ uint64 _expected = (expected_expr); \ if (_result != _expected) \ elog(ERROR, \ - "%s yielded " UINT64_HEX_FORMAT ", expected " UINT64_HEX_FORMAT " (%s) in file \"%s\" line %u", \ + "%s yielded %" PRIx64 ", expected %" PRIx64 " (%s) in file \"%s\" line %u", \ #result_expr, _result, _expected, #expected_expr, __FILE__, __LINE__); \ } while (0) From 811633105a38bc40ff2f75879a4b9a1113bb0347 Mon Sep 17 00:00:00 2001 From: Dean Rasheed Date: Wed, 6 Aug 2025 10:16:14 +0100 Subject: [PATCH 451/602] Fix printf format specfiers in test_int128 module. Compiler warnings introduced by 8c7445a0081. Author: Dean Rasheed --- src/test/modules/test_int128/test_int128.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/test/modules/test_int128/test_int128.c b/src/test/modules/test_int128/test_int128.c index 75099a75178e6..caa06541a1fe3 100644 --- a/src/test/modules/test_int128/test_int128.c +++ b/src/test/modules/test_int128/test_int128.c @@ -105,7 +105,7 @@ main(int argc, char **argv) if (t1.hl.hi != t2.hl.hi || t1.hl.lo != t2.hl.lo) { - printf(INT128_HEX_FORMAT " + unsigned " PRIx64 "\n", x, y, z); + printf(INT128_HEX_FORMAT " + unsigned %016" PRIx64 "\n", x, y, z); printf("native = " INT128_HEX_FORMAT "\n", t1.hl.hi, t1.hl.lo); printf("result = " INT128_HEX_FORMAT "\n", t2.hl.hi, t2.hl.lo); return 1; @@ -120,7 +120,7 @@ main(int argc, char **argv) if (t1.hl.hi != t2.hl.hi || t1.hl.lo != t2.hl.lo) { - printf(INT128_HEX_FORMAT " + signed " PRIx64 "\n", x, y, z); + printf(INT128_HEX_FORMAT " + signed %016" PRIx64 "\n", x, y, z); printf("native = " INT128_HEX_FORMAT "\n", t1.hl.hi, t1.hl.lo); printf("result = " INT128_HEX_FORMAT "\n", t2.hl.hi, t2.hl.lo); return 1; @@ -134,7 +134,7 @@ main(int argc, char **argv) if (t1.hl.hi != t2.hl.hi || t1.hl.lo != t2.hl.lo) { - printf(PRIx64 " * " PRIx64 "\n", x, y); + printf("%016" PRIx64 " * %016" PRIx64 "\n", x, y); printf("native = " INT128_HEX_FORMAT "\n", t1.hl.hi, t1.hl.lo); printf("result = " INT128_HEX_FORMAT "\n", t2.hl.hi, t2.hl.lo); return 1; From 5761d991c98471b7dab3147b71f9caf00a6f8ce4 Mon Sep 17 00:00:00 2001 From: Dean Rasheed Date: Wed, 6 Aug 2025 11:37:07 +0100 Subject: [PATCH 452/602] Refactor int128.h, bringing the native and non-native code together. This rearranges the code in src/include/common/int128.h, so that the native and non-native implementations of each function are together inside the function body (as they are in src/include/common/int.h), rather than being in separate parts of the file. This improves readability and maintainability, making it easier to compare the native and non-native implementations, and avoiding the need to duplicate every function comment and declaration. Author: Dean Rasheed Reviewed-by: John Naylor Discussion: https://postgr.es/m/CAEZATCWgBMc9ZwKMYqQpaQz2X6gaamYRB+RnMsUNcdMcL2Mj_w@mail.gmail.com --- src/include/common/int128.h | 112 ++++++++++++++---------------------- 1 file changed, 42 insertions(+), 70 deletions(-) diff --git a/src/include/common/int128.h b/src/include/common/int128.h index f22530a164ec4..8c300e56d9a32 100644 --- a/src/include/common/int128.h +++ b/src/include/common/int128.h @@ -29,81 +29,21 @@ #endif #endif - -#if USE_NATIVE_INT128 - -typedef int128 INT128; - -/* - * Add an unsigned int64 value into an INT128 variable. - */ -static inline void -int128_add_uint64(INT128 *i128, uint64 v) -{ - *i128 += v; -} - /* - * Add a signed int64 value into an INT128 variable. - */ -static inline void -int128_add_int64(INT128 *i128, int64 v) -{ - *i128 += v; -} - -/* - * Add the 128-bit product of two int64 values into an INT128 variable. + * If native int128 support is enabled, INT128 is just int128. Otherwise, it + * is a structure with separate 64-bit high and low parts. * - * XXX with a stupid compiler, this could actually be less efficient than - * the other implementation; maybe we should do it by hand always? - */ -static inline void -int128_add_int64_mul_int64(INT128 *i128, int64 x, int64 y) -{ - *i128 += (int128) x * (int128) y; -} - -/* - * Compare two INT128 values, return -1, 0, or +1. - */ -static inline int -int128_compare(INT128 x, INT128 y) -{ - if (x < y) - return -1; - if (x > y) - return 1; - return 0; -} - -/* - * Widen int64 to INT128. - */ -static inline INT128 -int64_to_int128(int64 v) -{ - return (INT128) v; -} - -/* - * Convert INT128 to int64 (losing any high-order bits). - * This also works fine for casting down to uint64. - */ -static inline int64 -int128_to_int64(INT128 val) -{ - return (int64) val; -} - -#else /* !USE_NATIVE_INT128 */ - -/* * We lay out the INT128 structure with the same content and byte ordering * that a native int128 type would (probably) have. This makes no difference * for ordinary use of INT128, but allows union'ing INT128 with int128 for * testing purposes. */ +#if USE_NATIVE_INT128 + +typedef int128 INT128; + +#else + typedef struct { #ifdef WORDS_BIGENDIAN @@ -115,12 +55,17 @@ typedef struct #endif } INT128; +#endif + /* * Add an unsigned int64 value into an INT128 variable. */ static inline void int128_add_uint64(INT128 *i128, uint64 v) { +#if USE_NATIVE_INT128 + *i128 += v; +#else /* * First add the value to the .lo part, then check to see if a carry needs * to be propagated into the .hi part. A carry is needed if both inputs @@ -134,6 +79,7 @@ int128_add_uint64(INT128 *i128, uint64 v) if (((int64) v < 0 && (int64) oldlo < 0) || (((int64) v < 0 || (int64) oldlo < 0) && (int64) i128->lo >= 0)) i128->hi++; +#endif } /* @@ -142,6 +88,9 @@ int128_add_uint64(INT128 *i128, uint64 v) static inline void int128_add_int64(INT128 *i128, int64 v) { +#if USE_NATIVE_INT128 + *i128 += v; +#else /* * This is much like the above except that the carry logic differs for * negative v. Ordinarily we'd need to subtract 1 from the .hi part @@ -161,6 +110,7 @@ int128_add_int64(INT128 *i128, int64 v) if (!((int64) oldlo < 0 || (int64) i128->lo >= 0)) i128->hi--; } +#endif } /* @@ -176,6 +126,13 @@ int128_add_int64(INT128 *i128, int64 v) static inline void int128_add_int64_mul_int64(INT128 *i128, int64 x, int64 y) { +#if USE_NATIVE_INT128 + /* + * XXX with a stupid compiler, this could actually be less efficient than + * the non-native implementation; maybe we should do it by hand always? + */ + *i128 += (int128) x * (int128) y; +#else /* INT64_AU32 must use arithmetic right shift */ StaticAssertDecl(((int64) -1 >> 1) == (int64) -1, "arithmetic right shift is needed"); @@ -229,6 +186,7 @@ int128_add_int64_mul_int64(INT128 *i128, int64 x, int64 y) /* the fourth term: always unsigned */ int128_add_uint64(i128, x_l32 * y_l32); } +#endif } /* @@ -237,6 +195,13 @@ int128_add_int64_mul_int64(INT128 *i128, int64 x, int64 y) static inline int int128_compare(INT128 x, INT128 y) { +#if USE_NATIVE_INT128 + if (x < y) + return -1; + if (x > y) + return 1; + return 0; +#else if (x.hi < y.hi) return -1; if (x.hi > y.hi) @@ -246,6 +211,7 @@ int128_compare(INT128 x, INT128 y) if (x.lo > y.lo) return 1; return 0; +#endif } /* @@ -254,11 +220,15 @@ int128_compare(INT128 x, INT128 y) static inline INT128 int64_to_int128(int64 v) { +#if USE_NATIVE_INT128 + return (INT128) v; +#else INT128 val; val.lo = (uint64) v; val.hi = (v < 0) ? -INT64CONST(1) : INT64CONST(0); return val; +#endif } /* @@ -268,9 +238,11 @@ int64_to_int128(int64 v) static inline int64 int128_to_int64(INT128 val) { +#if USE_NATIVE_INT128 + return (int64) val; +#else return (int64) val.lo; +#endif } -#endif /* USE_NATIVE_INT128 */ - #endif /* INT128_H */ From 99139c46cbf2bda7880592c4e5da1ebf177888ac Mon Sep 17 00:00:00 2001 From: Fujii Masao Date: Wed, 6 Aug 2025 22:52:13 +0900 Subject: [PATCH 453/602] Fix typo in comment. Author: Chao Li Reviewed-by: Fujii Masao Discussion: https://postgr.es/m/CD9B2247-617A-4761-8338-2705C8728E2A@highgo.com --- src/interfaces/ecpg/compatlib/informix.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/interfaces/ecpg/compatlib/informix.c b/src/interfaces/ecpg/compatlib/informix.c index e829d722f2228..ca11a81f1bcfe 100644 --- a/src/interfaces/ecpg/compatlib/informix.c +++ b/src/interfaces/ecpg/compatlib/informix.c @@ -807,8 +807,10 @@ rfmtlong(long lng_val, const char *fmt, char *outbuf) if (strchr(fmt, (int) '(') && strchr(fmt, (int) ')')) brackets_ok = 1; - /* get position of the right-most dot in the format-string */ - /* and fill the temp-string wit '0's up to there. */ + /* + * get position of the right-most dot in the format-string and fill the + * temp-string with '0's up to there. + */ dotpos = getRightMostDot(fmt); /* start to parse the format-string */ From 35baa60cc7f374401f06c7dc422e93bcf31b942b Mon Sep 17 00:00:00 2001 From: Nathan Bossart Date: Wed, 6 Aug 2025 12:08:07 -0500 Subject: [PATCH 454/602] Rename transformRelOptions()'s "namspace" parameter to "nameSpace". MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The name "namspace" looks like a typo, but it was presumably meant to avoid using the "namespace" C++ keyword. This commit renames the parameter to "nameSpace" to prevent future confusion while still avoiding the keyword. Reviewed-by: Álvaro Herrera Reviewed-by: Tom Lane Reviewed-by: Ashutosh Bapat Discussion: https://postgr.es/m/aJJxpfsDfiQ1VbJ5%40nathan --- src/backend/access/common/reloptions.c | 10 +++++----- src/include/access/reloptions.h | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/backend/access/common/reloptions.c b/src/backend/access/common/reloptions.c index 594a657ea1a78..0af3fea68fa48 100644 --- a/src/backend/access/common/reloptions.c +++ b/src/backend/access/common/reloptions.c @@ -1164,7 +1164,7 @@ add_local_string_reloption(local_relopts *relopts, const char *name, * but we declare them as Datums to avoid including array.h in reloptions.h. */ Datum -transformRelOptions(Datum oldOptions, List *defList, const char *namspace, +transformRelOptions(Datum oldOptions, List *defList, const char *nameSpace, const char *const validnsps[], bool acceptOidsOff, bool isReset) { Datum result; @@ -1200,14 +1200,14 @@ transformRelOptions(Datum oldOptions, List *defList, const char *namspace, int kw_len; /* ignore if not in the same namespace */ - if (namspace == NULL) + if (nameSpace == NULL) { if (def->defnamespace != NULL) continue; } else if (def->defnamespace == NULL) continue; - else if (strcmp(def->defnamespace, namspace) != 0) + else if (strcmp(def->defnamespace, nameSpace) != 0) continue; kw_len = strlen(def->defname); @@ -1277,14 +1277,14 @@ transformRelOptions(Datum oldOptions, List *defList, const char *namspace, } /* ignore if not in the same namespace */ - if (namspace == NULL) + if (nameSpace == NULL) { if (def->defnamespace != NULL) continue; } else if (def->defnamespace == NULL) continue; - else if (strcmp(def->defnamespace, namspace) != 0) + else if (strcmp(def->defnamespace, nameSpace) != 0) continue; /* diff --git a/src/include/access/reloptions.h b/src/include/access/reloptions.h index dfbb4c854606c..a604a4702c37c 100644 --- a/src/include/access/reloptions.h +++ b/src/include/access/reloptions.h @@ -233,7 +233,7 @@ extern void add_local_string_reloption(local_relopts *relopts, const char *name, fill_string_relopt filler, int offset); extern Datum transformRelOptions(Datum oldOptions, List *defList, - const char *namspace, const char *const validnsps[], + const char *nameSpace, const char *const validnsps[], bool acceptOidsOff, bool isReset); extern List *untransformRelOptions(Datum options); extern bytea *extractRelOptions(HeapTuple tuple, TupleDesc tupdesc, From 9ea3b6f751abd7701f3f32d9df26d66410754c94 Mon Sep 17 00:00:00 2001 From: Nathan Bossart Date: Wed, 6 Aug 2025 13:37:00 -0500 Subject: [PATCH 455/602] Expand usage of macros for protocol characters. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit makes use of the existing PqMsg_* macros in more places and adds new PqReplMsg_* and PqBackupMsg_* macros for use in special replication and backup messages, respectively. Author: Dave Cramer Co-authored-by: Fabrízio de Royes Mello Reviewed-by: Jacob Champion Reviewed-by: Álvaro Herrera Reviewed-by: Euler Taveira Discussion: https://postgr.es/m/aIECfYfevCUpenBT@nathan Discussion: https://postgr.es/m/CAFcNs%2Br73NOUb7%2BqKrV4HHEki02CS96Z%2Bx19WaFgE087BWwEng%40mail.gmail.com --- src/backend/backup/basebackup_copy.c | 14 +++++------ .../replication/logical/applyparallelworker.c | 4 +-- src/backend/replication/logical/worker.c | 10 ++++---- src/backend/replication/walreceiver.c | 8 +++--- src/backend/replication/walsender.c | 25 +++++++++++-------- src/bin/pg_basebackup/pg_basebackup.c | 9 ++++--- src/bin/pg_basebackup/pg_recvlogical.c | 11 ++++---- src/bin/pg_basebackup/receivelog.c | 11 ++++---- src/include/libpq/protocol.h | 21 ++++++++++++++++ 9 files changed, 70 insertions(+), 43 deletions(-) diff --git a/src/backend/backup/basebackup_copy.c b/src/backend/backup/basebackup_copy.c index 18b0b5a52d3f8..eb45d3bcb663b 100644 --- a/src/backend/backup/basebackup_copy.c +++ b/src/backend/backup/basebackup_copy.c @@ -143,7 +143,7 @@ bbsink_copystream_begin_backup(bbsink *sink) buf = palloc(mysink->base.bbs_buffer_length + MAXIMUM_ALIGNOF); mysink->msgbuffer = buf + (MAXIMUM_ALIGNOF - 1); mysink->base.bbs_buffer = buf + MAXIMUM_ALIGNOF; - mysink->msgbuffer[0] = 'd'; /* archive or manifest data */ + mysink->msgbuffer[0] = PqMsg_CopyData; /* archive or manifest data */ /* Tell client the backup start location. */ SendXlogRecPtrResult(state->startptr, state->starttli); @@ -170,7 +170,7 @@ bbsink_copystream_begin_archive(bbsink *sink, const char *archive_name) ti = list_nth(state->tablespaces, state->tablespace_num); pq_beginmessage(&buf, PqMsg_CopyData); - pq_sendbyte(&buf, 'n'); /* New archive */ + pq_sendbyte(&buf, PqBackupMsg_NewArchive); pq_sendstring(&buf, archive_name); pq_sendstring(&buf, ti->path == NULL ? "" : ti->path); pq_endmessage(&buf); @@ -191,7 +191,7 @@ bbsink_copystream_archive_contents(bbsink *sink, size_t len) if (mysink->send_to_client) { /* Add one because we're also sending a leading type byte. */ - pq_putmessage('d', mysink->msgbuffer, len + 1); + pq_putmessage(PqMsg_CopyData, mysink->msgbuffer, len + 1); } /* Consider whether to send a progress report to the client. */ @@ -221,7 +221,7 @@ bbsink_copystream_archive_contents(bbsink *sink, size_t len) mysink->last_progress_report_time = now; pq_beginmessage(&buf, PqMsg_CopyData); - pq_sendbyte(&buf, 'p'); /* Progress report */ + pq_sendbyte(&buf, PqBackupMsg_ProgressReport); pq_sendint64(&buf, state->bytes_done); pq_endmessage(&buf); pq_flush_if_writable(); @@ -247,7 +247,7 @@ bbsink_copystream_end_archive(bbsink *sink) mysink->bytes_done_at_last_time_check = state->bytes_done; mysink->last_progress_report_time = GetCurrentTimestamp(); pq_beginmessage(&buf, PqMsg_CopyData); - pq_sendbyte(&buf, 'p'); /* Progress report */ + pq_sendbyte(&buf, PqBackupMsg_ProgressReport); pq_sendint64(&buf, state->bytes_done); pq_endmessage(&buf); pq_flush_if_writable(); @@ -262,7 +262,7 @@ bbsink_copystream_begin_manifest(bbsink *sink) StringInfoData buf; pq_beginmessage(&buf, PqMsg_CopyData); - pq_sendbyte(&buf, 'm'); /* Manifest */ + pq_sendbyte(&buf, PqBackupMsg_Manifest); pq_endmessage(&buf); } @@ -277,7 +277,7 @@ bbsink_copystream_manifest_contents(bbsink *sink, size_t len) if (mysink->send_to_client) { /* Add one because we're also sending a leading type byte. */ - pq_putmessage('d', mysink->msgbuffer, len + 1); + pq_putmessage(PqMsg_CopyData, mysink->msgbuffer, len + 1); } } diff --git a/src/backend/replication/logical/applyparallelworker.c b/src/backend/replication/logical/applyparallelworker.c index 1fa931a74229d..cd0e19176fdb2 100644 --- a/src/backend/replication/logical/applyparallelworker.c +++ b/src/backend/replication/logical/applyparallelworker.c @@ -778,10 +778,10 @@ LogicalParallelApplyLoop(shm_mq_handle *mqh) /* * The first byte of messages sent from leader apply worker to - * parallel apply workers can only be 'w'. + * parallel apply workers can only be PqReplMsg_WALData. */ c = pq_getmsgbyte(&s); - if (c != 'w') + if (c != PqReplMsg_WALData) elog(ERROR, "unexpected message \"%c\"", c); /* diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c index 89e241c839280..0fdc5de57ba89 100644 --- a/src/backend/replication/logical/worker.c +++ b/src/backend/replication/logical/worker.c @@ -3994,7 +3994,7 @@ LogicalRepApplyLoop(XLogRecPtr last_received) c = pq_getmsgbyte(&s); - if (c == 'w') + if (c == PqReplMsg_WALData) { XLogRecPtr start_lsn; XLogRecPtr end_lsn; @@ -4016,7 +4016,7 @@ LogicalRepApplyLoop(XLogRecPtr last_received) maybe_advance_nonremovable_xid(&rdt_data, false); } - else if (c == 'k') + else if (c == PqReplMsg_Keepalive) { XLogRecPtr end_lsn; TimestampTz timestamp; @@ -4035,7 +4035,7 @@ LogicalRepApplyLoop(XLogRecPtr last_received) UpdateWorkerStats(last_received, timestamp, true); } - else if (c == 's') /* Primary status update */ + else if (c == PqReplMsg_PrimaryStatusUpdate) { rdt_data.remote_lsn = pq_getmsgint64(&s); rdt_data.remote_oldestxid = FullTransactionIdFromU64((uint64) pq_getmsgint64(&s)); @@ -4267,7 +4267,7 @@ send_feedback(XLogRecPtr recvpos, bool force, bool requestReply) else resetStringInfo(reply_message); - pq_sendbyte(reply_message, 'r'); + pq_sendbyte(reply_message, PqReplMsg_StandbyStatusUpdate); pq_sendint64(reply_message, recvpos); /* write */ pq_sendint64(reply_message, flushpos); /* flush */ pq_sendint64(reply_message, writepos); /* apply */ @@ -4438,7 +4438,7 @@ request_publisher_status(RetainDeadTuplesData *rdt_data) * Send the current time to update the remote walsender's latest reply * message received time. */ - pq_sendbyte(request_message, 'p'); + pq_sendbyte(request_message, PqReplMsg_PrimaryStatusRequest); pq_sendint64(request_message, GetCurrentTimestamp()); elog(DEBUG2, "sending publisher status request message"); diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c index b62811017116f..7361ffc9dcf5e 100644 --- a/src/backend/replication/walreceiver.c +++ b/src/backend/replication/walreceiver.c @@ -826,7 +826,7 @@ XLogWalRcvProcessMsg(unsigned char type, char *buf, Size len, TimeLineID tli) switch (type) { - case 'w': /* WAL records */ + case PqReplMsg_WALData: { StringInfoData incoming_message; @@ -850,7 +850,7 @@ XLogWalRcvProcessMsg(unsigned char type, char *buf, Size len, TimeLineID tli) XLogWalRcvWrite(buf, len, dataStart, tli); break; } - case 'k': /* Keepalive */ + case PqReplMsg_Keepalive: { StringInfoData incoming_message; @@ -1130,7 +1130,7 @@ XLogWalRcvSendReply(bool force, bool requestReply) applyPtr = GetXLogReplayRecPtr(NULL); resetStringInfo(&reply_message); - pq_sendbyte(&reply_message, 'r'); + pq_sendbyte(&reply_message, PqReplMsg_StandbyStatusUpdate); pq_sendint64(&reply_message, writePtr); pq_sendint64(&reply_message, flushPtr); pq_sendint64(&reply_message, applyPtr); @@ -1234,7 +1234,7 @@ XLogWalRcvSendHSFeedback(bool immed) /* Construct the message and send it. */ resetStringInfo(&reply_message); - pq_sendbyte(&reply_message, 'h'); + pq_sendbyte(&reply_message, PqReplMsg_HotStandbyFeedback); pq_sendint64(&reply_message, GetCurrentTimestamp()); pq_sendint32(&reply_message, xmin); pq_sendint32(&reply_message, xmin_epoch); diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c index ee911394a23c6..0855bae3535a6 100644 --- a/src/backend/replication/walsender.c +++ b/src/backend/replication/walsender.c @@ -1534,7 +1534,7 @@ WalSndPrepareWrite(LogicalDecodingContext *ctx, XLogRecPtr lsn, TransactionId xi resetStringInfo(ctx->out); - pq_sendbyte(ctx->out, 'w'); + pq_sendbyte(ctx->out, PqReplMsg_WALData); pq_sendint64(ctx->out, lsn); /* dataStart */ pq_sendint64(ctx->out, lsn); /* walEnd */ @@ -2292,7 +2292,8 @@ ProcessRepliesIfAny(void) switch (firstchar) { /* - * 'd' means a standby reply wrapped in a CopyData packet. + * PqMsg_CopyData means a standby reply wrapped in a CopyData + * packet. */ case PqMsg_CopyData: ProcessStandbyMessage(); @@ -2300,8 +2301,9 @@ ProcessRepliesIfAny(void) break; /* - * CopyDone means the standby requested to finish streaming. - * Reply with CopyDone, if we had not sent that already. + * PqMsg_CopyDone means the standby requested to finish + * streaming. Reply with CopyDone, if we had not sent that + * already. */ case PqMsg_CopyDone: if (!streamingDoneSending) @@ -2315,7 +2317,8 @@ ProcessRepliesIfAny(void) break; /* - * 'X' means that the standby is closing down the socket. + * PqMsg_Terminate means that the standby is closing down the + * socket. */ case PqMsg_Terminate: proc_exit(0); @@ -2350,15 +2353,15 @@ ProcessStandbyMessage(void) switch (msgtype) { - case 'r': + case PqReplMsg_StandbyStatusUpdate: ProcessStandbyReplyMessage(); break; - case 'h': + case PqReplMsg_HotStandbyFeedback: ProcessStandbyHSFeedbackMessage(); break; - case 'p': + case PqReplMsg_PrimaryStatusRequest: ProcessStandbyPSRequestMessage(); break; @@ -2752,7 +2755,7 @@ ProcessStandbyPSRequestMessage(void) /* construct the message... */ resetStringInfo(&output_message); - pq_sendbyte(&output_message, 's'); + pq_sendbyte(&output_message, PqReplMsg_PrimaryStatusUpdate); pq_sendint64(&output_message, lsn); pq_sendint64(&output_message, (int64) U64FromFullTransactionId(fullOldestXidInCommit)); pq_sendint64(&output_message, (int64) U64FromFullTransactionId(nextFullXid)); @@ -3364,7 +3367,7 @@ XLogSendPhysical(void) * OK to read and send the slice. */ resetStringInfo(&output_message); - pq_sendbyte(&output_message, 'w'); + pq_sendbyte(&output_message, PqReplMsg_WALData); pq_sendint64(&output_message, startptr); /* dataStart */ pq_sendint64(&output_message, SendRqstPtr); /* walEnd */ @@ -4135,7 +4138,7 @@ WalSndKeepalive(bool requestReply, XLogRecPtr writePtr) /* construct the message... */ resetStringInfo(&output_message); - pq_sendbyte(&output_message, 'k'); + pq_sendbyte(&output_message, PqReplMsg_Keepalive); pq_sendint64(&output_message, XLogRecPtrIsInvalid(writePtr) ? sentPtr : writePtr); pq_sendint64(&output_message, GetCurrentTimestamp()); pq_sendbyte(&output_message, requestReply ? 1 : 0); diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c index 55621f35fb6b7..0a3ca4315de1e 100644 --- a/src/bin/pg_basebackup/pg_basebackup.c +++ b/src/bin/pg_basebackup/pg_basebackup.c @@ -35,6 +35,7 @@ #include "fe_utils/option_utils.h" #include "fe_utils/recovery_gen.h" #include "getopt_long.h" +#include "libpq/protocol.h" #include "receivelog.h" #include "streamutil.h" @@ -1338,7 +1339,7 @@ ReceiveArchiveStreamChunk(size_t r, char *copybuf, void *callback_data) /* Each CopyData message begins with a type byte. */ switch (GetCopyDataByte(r, copybuf, &cursor)) { - case 'n': + case PqBackupMsg_NewArchive: { /* New archive. */ char *archive_name; @@ -1410,7 +1411,7 @@ ReceiveArchiveStreamChunk(size_t r, char *copybuf, void *callback_data) break; } - case 'd': + case PqMsg_CopyData: { /* Archive or manifest data. */ if (state->manifest_buffer != NULL) @@ -1446,7 +1447,7 @@ ReceiveArchiveStreamChunk(size_t r, char *copybuf, void *callback_data) break; } - case 'p': + case PqBackupMsg_ProgressReport: { /* * Progress report. @@ -1465,7 +1466,7 @@ ReceiveArchiveStreamChunk(size_t r, char *copybuf, void *callback_data) break; } - case 'm': + case PqBackupMsg_Manifest: { /* * Manifest data will be sent next. This message is not diff --git a/src/bin/pg_basebackup/pg_recvlogical.c b/src/bin/pg_basebackup/pg_recvlogical.c index 0e9d2e2394731..7a4d1a2d2ca66 100644 --- a/src/bin/pg_basebackup/pg_recvlogical.c +++ b/src/bin/pg_basebackup/pg_recvlogical.c @@ -24,6 +24,7 @@ #include "getopt_long.h" #include "libpq-fe.h" #include "libpq/pqsignal.h" +#include "libpq/protocol.h" #include "pqexpbuffer.h" #include "streamutil.h" @@ -149,7 +150,7 @@ sendFeedback(PGconn *conn, TimestampTz now, bool force, bool replyRequested) LSN_FORMAT_ARGS(output_fsync_lsn), replication_slot); - replybuf[len] = 'r'; + replybuf[len] = PqReplMsg_StandbyStatusUpdate; len += 1; fe_sendint64(output_written_lsn, &replybuf[len]); /* write */ len += 8; @@ -454,7 +455,7 @@ StreamLogicalLog(void) } /* Check the message type. */ - if (copybuf[0] == 'k') + if (copybuf[0] == PqReplMsg_Keepalive) { int pos; bool replyRequested; @@ -466,7 +467,7 @@ StreamLogicalLog(void) * We just check if the server requested a reply, and ignore the * rest. */ - pos = 1; /* skip msgtype 'k' */ + pos = 1; /* skip msgtype PqReplMsg_Keepalive */ walEnd = fe_recvint64(©buf[pos]); output_written_lsn = Max(walEnd, output_written_lsn); @@ -509,7 +510,7 @@ StreamLogicalLog(void) continue; } - else if (copybuf[0] != 'w') + else if (copybuf[0] != PqReplMsg_WALData) { pg_log_error("unrecognized streaming header: \"%c\"", copybuf[0]); @@ -521,7 +522,7 @@ StreamLogicalLog(void) * message. We only need the WAL location field (dataStart), the rest * of the header is ignored. */ - hdr_len = 1; /* msgtype 'w' */ + hdr_len = 1; /* msgtype PqReplMsg_WALData */ hdr_len += 8; /* dataStart */ hdr_len += 8; /* walEnd */ hdr_len += 8; /* sendTime */ diff --git a/src/bin/pg_basebackup/receivelog.c b/src/bin/pg_basebackup/receivelog.c index f2b54d3c50171..25b13c7f55cd1 100644 --- a/src/bin/pg_basebackup/receivelog.c +++ b/src/bin/pg_basebackup/receivelog.c @@ -21,6 +21,7 @@ #include "access/xlog_internal.h" #include "common/logging.h" #include "libpq-fe.h" +#include "libpq/protocol.h" #include "receivelog.h" #include "streamutil.h" @@ -338,7 +339,7 @@ sendFeedback(PGconn *conn, XLogRecPtr blockpos, TimestampTz now, bool replyReque char replybuf[1 + 8 + 8 + 8 + 8 + 1]; int len = 0; - replybuf[len] = 'r'; + replybuf[len] = PqReplMsg_StandbyStatusUpdate; len += 1; fe_sendint64(blockpos, &replybuf[len]); /* write */ len += 8; @@ -823,13 +824,13 @@ HandleCopyStream(PGconn *conn, StreamCtl *stream, } /* Check the message type. */ - if (copybuf[0] == 'k') + if (copybuf[0] == PqReplMsg_Keepalive) { if (!ProcessKeepaliveMsg(conn, stream, copybuf, r, blockpos, &last_status)) goto error; } - else if (copybuf[0] == 'w') + else if (copybuf[0] == PqReplMsg_WALData) { if (!ProcessWALDataMsg(conn, stream, copybuf, r, &blockpos)) goto error; @@ -1001,7 +1002,7 @@ ProcessKeepaliveMsg(PGconn *conn, StreamCtl *stream, char *copybuf, int len, * Parse the keepalive message, enclosed in the CopyData message. We just * check if the server requested a reply, and ignore the rest. */ - pos = 1; /* skip msgtype 'k' */ + pos = 1; /* skip msgtype PqReplMsg_Keepalive */ pos += 8; /* skip walEnd */ pos += 8; /* skip sendTime */ @@ -1064,7 +1065,7 @@ ProcessWALDataMsg(PGconn *conn, StreamCtl *stream, char *copybuf, int len, * message. We only need the WAL location field (dataStart), the rest of * the header is ignored. */ - hdr_len = 1; /* msgtype 'w' */ + hdr_len = 1; /* msgtype PqReplMsg_WALData */ hdr_len += 8; /* dataStart */ hdr_len += 8; /* walEnd */ hdr_len += 8; /* sendTime */ diff --git a/src/include/libpq/protocol.h b/src/include/libpq/protocol.h index b0bcb3cdc26eb..c64e628628d4b 100644 --- a/src/include/libpq/protocol.h +++ b/src/include/libpq/protocol.h @@ -69,6 +69,27 @@ #define PqMsg_Progress 'P' +/* Replication codes sent by the primary (wrapped in CopyData messages). */ + +#define PqReplMsg_Keepalive 'k' +#define PqReplMsg_PrimaryStatusUpdate 's' +#define PqReplMsg_WALData 'w' + + +/* Replication codes sent by the standby (wrapped in CopyData messages). */ + +#define PqReplMsg_HotStandbyFeedback 'h' +#define PqReplMsg_PrimaryStatusRequest 'p' +#define PqReplMsg_StandbyStatusUpdate 'r' + + +/* Codes used for backups via COPY OUT (wrapped in CopyData messages). */ + +#define PqBackupMsg_Manifest 'm' +#define PqBackupMsg_NewArchive 'n' +#define PqBackupMsg_ProgressReport 'p' + + /* These are the authentication request codes sent by the backend. */ #define AUTH_REQ_OK 0 /* User is authenticated */ From 2242b26ce472db9ac69dc71008c566ea9cd3a5fd Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Thu, 7 Aug 2025 11:02:04 +0900 Subject: [PATCH 456/602] Fix incorrect Datum conversion in timestamptz_trunc_internal() The code used a PG_RETURN_TIMESTAMPTZ() where the return type is TimestampTz and not a Datum. On 64-bit systems, there is no effect since this just ends up casting 64-bit integers back and forth. On 32-bit systems, timestamptz is pass-by-reference. PG_RETURN_TIMESTAMPTZ() allocates new memory and returns the address, meaning that the caller could interpret this as a timestamp value. The effect is using "date_trunc(..., 'infinity'::timestamptz) will return random values (instead of the correct return value 'infinity'). Bug introduced in commit d85ce012f99f. Author: Peter Eisentraut Reviewed-by: Tom Lane Discussion: https://postgr.es/m/2d320b6f-b4af-4fbc-9eec-5d0fa15d187b@eisentraut.org Discussion: https://postgr.es/m/4bf60a84-2862-4a53-acd5-8eddf134a60e@eisentraut.org Backpatch-through: 18 --- src/backend/utils/adt/timestamp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/backend/utils/adt/timestamp.c b/src/backend/utils/adt/timestamp.c index 25cff56c3d07e..e640b48205b40 100644 --- a/src/backend/utils/adt/timestamp.c +++ b/src/backend/utils/adt/timestamp.c @@ -4954,7 +4954,7 @@ timestamptz_trunc_internal(text *units, TimestampTz timestamp, pg_tz *tzp) case DTK_SECOND: case DTK_MILLISEC: case DTK_MICROSEC: - PG_RETURN_TIMESTAMPTZ(timestamp); + return timestamp; break; default: From 572c0f1b0e2a9ed61816239f59d568217079bb8c Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Thu, 7 Aug 2025 11:49:25 +0900 Subject: [PATCH 457/602] Improve tests of date_trunc() with infinity and unsupported units Commit d85ce012f99f has added some new error handling code to date_trunc() of timestamp, timestamptz, and interval with infinite values. However, the new test cases added by that commit did not actually test all of the new code, missing coverage for the following cases: 1) For timestamp without time zone: 1-1) infinite value with valid unit 1-2) infinite value with unsupported unit 1-3) finite value with unsupported unit, for a code path older than d85ce012f99f. 2) For timestamp with time zone, without a time zone specified for the truncation: 2-1) infinite value with valid unit 2-2) infinite value with unsupported unit 2-3) finite value with unsupported unit, for a code path older than d85ce012f99f. 3) For timestamp with time zone, with a time zone specified for the truncation: 3-1) infinite value with valid unit. 3-2) infinite value with unsupported unit. This commit also provides coverage for the bug fixed in 2242b26ce472, through cases 2-1) and 3-1), when using an infinite value with a valid unit, with[out] the optional time zone parameter used for the truncation. Author: Peter Eisentraut Discussion: https://postgr.es/m/2d320b6f-b4af-4fbc-9eec-5d0fa15d187b@eisentraut.org Discussion: https://postgr.es/m/4bf60a84-2862-4a53-acd5-8eddf134a60e@eisentraut.org Backpatch-through: 18 --- src/test/regress/expected/timestamp.out | 10 ++++++++++ src/test/regress/expected/timestamptz.out | 18 ++++++++++++++++++ src/test/regress/sql/timestamp.sql | 4 +++- src/test/regress/sql/timestamptz.sql | 7 +++++-- 4 files changed, 36 insertions(+), 3 deletions(-) diff --git a/src/test/regress/expected/timestamp.out b/src/test/regress/expected/timestamp.out index 6aaa19c8f4e46..14a9f5b56a690 100644 --- a/src/test/regress/expected/timestamp.out +++ b/src/test/regress/expected/timestamp.out @@ -591,6 +591,16 @@ SELECT date_trunc( 'week', timestamp '2004-02-29 15:44:17.71393' ) AS week_trunc Mon Feb 23 00:00:00 2004 (1 row) +SELECT date_trunc( 'week', timestamp 'infinity' ) AS inf_trunc; + inf_trunc +----------- + infinity +(1 row) + +SELECT date_trunc( 'timezone', timestamp '2004-02-29 15:44:17.71393' ) AS notsupp_trunc; +ERROR: unit "timezone" not supported for type timestamp without time zone +SELECT date_trunc( 'timezone', timestamp 'infinity' ) AS notsupp_inf_trunc; +ERROR: unit "timezone" not supported for type timestamp without time zone SELECT date_trunc( 'ago', timestamp 'infinity' ) AS invalid_trunc; ERROR: unit "ago" not recognized for type timestamp without time zone -- verify date_bin behaves the same as date_trunc for relevant intervals diff --git a/src/test/regress/expected/timestamptz.out b/src/test/regress/expected/timestamptz.out index 2a69953ff25ed..5dc8a621f6c07 100644 --- a/src/test/regress/expected/timestamptz.out +++ b/src/test/regress/expected/timestamptz.out @@ -760,6 +760,16 @@ SELECT date_trunc( 'week', timestamp with time zone '2004-02-29 15:44:17.71393' Mon Feb 23 00:00:00 2004 PST (1 row) +SELECT date_trunc( 'week', timestamp with time zone 'infinity' ) AS inf_trunc; + inf_trunc +----------- + infinity +(1 row) + +SELECT date_trunc( 'timezone', timestamp with time zone '2004-02-29 15:44:17.71393' ) AS notsupp_trunc; +ERROR: unit "timezone" not supported for type timestamp with time zone +SELECT date_trunc( 'timezone', timestamp with time zone 'infinity' ) AS notsupp_inf_trunc; +ERROR: unit "timezone" not supported for type timestamp with time zone SELECT date_trunc( 'ago', timestamp with time zone 'infinity' ) AS invalid_trunc; ERROR: unit "ago" not recognized for type timestamp with time zone SELECT date_trunc('day', timestamp with time zone '2001-02-16 20:38:40+00', 'Australia/Sydney') as sydney_trunc; -- zone name @@ -780,6 +790,14 @@ SELECT date_trunc('day', timestamp with time zone '2001-02-16 20:38:40+00', 'VET Thu Feb 15 20:00:00 2001 PST (1 row) +SELECT date_trunc('timezone', timestamp with time zone 'infinity', 'GMT') AS notsupp_zone_trunc; +ERROR: unit "timezone" not supported for type timestamp with time zone +SELECT date_trunc( 'week', timestamp with time zone 'infinity', 'GMT') AS inf_zone_trunc; + inf_zone_trunc +---------------- + infinity +(1 row) + SELECT date_trunc('ago', timestamp with time zone 'infinity', 'GMT') AS invalid_zone_trunc; ERROR: unit "ago" not recognized for type timestamp with time zone -- verify date_bin behaves the same as date_trunc for relevant intervals diff --git a/src/test/regress/sql/timestamp.sql b/src/test/regress/sql/timestamp.sql index 55f80530ea05e..313757ed041a1 100644 --- a/src/test/regress/sql/timestamp.sql +++ b/src/test/regress/sql/timestamp.sql @@ -175,7 +175,9 @@ SELECT d1 - timestamp without time zone '1997-01-02' AS diff FROM TIMESTAMP_TBL WHERE d1 BETWEEN '1902-01-01' AND '2038-01-01'; SELECT date_trunc( 'week', timestamp '2004-02-29 15:44:17.71393' ) AS week_trunc; - +SELECT date_trunc( 'week', timestamp 'infinity' ) AS inf_trunc; +SELECT date_trunc( 'timezone', timestamp '2004-02-29 15:44:17.71393' ) AS notsupp_trunc; +SELECT date_trunc( 'timezone', timestamp 'infinity' ) AS notsupp_inf_trunc; SELECT date_trunc( 'ago', timestamp 'infinity' ) AS invalid_trunc; -- verify date_bin behaves the same as date_trunc for relevant intervals diff --git a/src/test/regress/sql/timestamptz.sql b/src/test/regress/sql/timestamptz.sql index caca3123f13f6..6ace851d16911 100644 --- a/src/test/regress/sql/timestamptz.sql +++ b/src/test/regress/sql/timestamptz.sql @@ -217,15 +217,18 @@ SELECT d1 - timestamp with time zone '1997-01-02' AS diff FROM TIMESTAMPTZ_TBL WHERE d1 BETWEEN '1902-01-01' AND '2038-01-01'; SELECT date_trunc( 'week', timestamp with time zone '2004-02-29 15:44:17.71393' ) AS week_trunc; +SELECT date_trunc( 'week', timestamp with time zone 'infinity' ) AS inf_trunc; +SELECT date_trunc( 'timezone', timestamp with time zone '2004-02-29 15:44:17.71393' ) AS notsupp_trunc; +SELECT date_trunc( 'timezone', timestamp with time zone 'infinity' ) AS notsupp_inf_trunc; SELECT date_trunc( 'ago', timestamp with time zone 'infinity' ) AS invalid_trunc; SELECT date_trunc('day', timestamp with time zone '2001-02-16 20:38:40+00', 'Australia/Sydney') as sydney_trunc; -- zone name SELECT date_trunc('day', timestamp with time zone '2001-02-16 20:38:40+00', 'GMT') as gmt_trunc; -- fixed-offset abbreviation SELECT date_trunc('day', timestamp with time zone '2001-02-16 20:38:40+00', 'VET') as vet_trunc; -- variable-offset abbreviation +SELECT date_trunc('timezone', timestamp with time zone 'infinity', 'GMT') AS notsupp_zone_trunc; +SELECT date_trunc( 'week', timestamp with time zone 'infinity', 'GMT') AS inf_zone_trunc; SELECT date_trunc('ago', timestamp with time zone 'infinity', 'GMT') AS invalid_zone_trunc; - - -- verify date_bin behaves the same as date_trunc for relevant intervals SELECT str, From d9bb8ef093d62763cfd19d37e6bb8182998a3f88 Mon Sep 17 00:00:00 2001 From: Dean Rasheed Date: Thu, 7 Aug 2025 09:20:02 +0100 Subject: [PATCH 458/602] Optimise non-native 128-bit addition in int128.h. On platforms without native 128-bit integer support, simplify the test for carry in int128_add_uint64() by noting that the low-part addition is unsigned integer arithmetic, which is just modular arithmetic. Therefore the test for carry can simply be written as "new value < old value" (i.e., a test for modular wrap-around). This can then be made branchless so that on modern compilers it produces the same machine instructions as native 128-bit addition, making it significantly simpler and faster. Similarly, the test for carry in int128_add_int64() can be written in much the same way, but with an extra term to compensate for the sign of the value being added. Again, on modern compilers this leads to branchless code, often identical to the native 128-bit integer addition machine code. Author: Dean Rasheed Reviewed-by: John Naylor Discussion: https://postgr.es/m/CAEZATCWgBMc9ZwKMYqQpaQz2X6gaamYRB+RnMsUNcdMcL2Mj_w@mail.gmail.com --- src/include/common/int128.h | 36 ++++++++++++++++-------------------- 1 file changed, 16 insertions(+), 20 deletions(-) diff --git a/src/include/common/int128.h b/src/include/common/int128.h index 8c300e56d9a32..a84e5ca25f0d0 100644 --- a/src/include/common/int128.h +++ b/src/include/common/int128.h @@ -68,17 +68,17 @@ int128_add_uint64(INT128 *i128, uint64 v) #else /* * First add the value to the .lo part, then check to see if a carry needs - * to be propagated into the .hi part. A carry is needed if both inputs - * have high bits set, or if just one input has high bit set while the new - * .lo part doesn't. Remember that .lo part is unsigned; we cast to - * signed here just as a cheap way to check the high bit. + * to be propagated into the .hi part. Since this is unsigned integer + * arithmetic, which is just modular arithmetic, a carry is needed if the + * new .lo part is less than the old .lo part (i.e., if modular + * wrap-around occurred). Writing this in the form below, rather than + * using an "if" statement causes modern compilers to produce branchless + * machine code identical to the native code. */ uint64 oldlo = i128->lo; i128->lo += v; - if (((int64) v < 0 && (int64) oldlo < 0) || - (((int64) v < 0 || (int64) oldlo < 0) && (int64) i128->lo >= 0)) - i128->hi++; + i128->hi += (i128->lo < oldlo); #endif } @@ -93,23 +93,19 @@ int128_add_int64(INT128 *i128, int64 v) #else /* * This is much like the above except that the carry logic differs for - * negative v. Ordinarily we'd need to subtract 1 from the .hi part - * (corresponding to adding the sign-extended bits of v to it); but if - * there is a carry out of the .lo part, that cancels and we do nothing. + * negative v -- we need to subtract 1 from the .hi part if the new .lo + * value is greater than the old .lo value. That can be achieved without + * any branching by adding the sign bit from v (v >> 63 = 0 or -1) to the + * previous result (for negative v, if the new .lo value is less than the + * old .lo value, the two terms cancel and we leave the .hi part + * unchanged, otherwise we subtract 1 from the .hi part). With modern + * compilers this often produces machine code identical to the native + * code. */ uint64 oldlo = i128->lo; i128->lo += v; - if (v >= 0) - { - if ((int64) oldlo < 0 && (int64) i128->lo >= 0) - i128->hi++; - } - else - { - if (!((int64) oldlo < 0 || (int64) i128->lo >= 0)) - i128->hi--; - } + i128->hi += (i128->lo < oldlo) + (v >> 63); #endif } From d8a08dbee46c4121bbb819df1c98533824974c45 Mon Sep 17 00:00:00 2001 From: Dean Rasheed Date: Thu, 7 Aug 2025 09:52:30 +0100 Subject: [PATCH 459/602] Simplify non-native 64x64-bit multiplication in int128.h. In the non-native code in int128_add_int64_mul_int64(), use signed 64-bit integer multiplication instead of unsigned multiplication for the first three product terms. This simplifies the code needed to add each product term to the result, leading to more compact and efficient code. The actual performance gain is quite modest, but it seems worth it to improve the code's readability. Author: Dean Rasheed Reviewed-by: John Naylor Discussion: https://postgr.es/m/CAEZATCWgBMc9ZwKMYqQpaQz2X6gaamYRB+RnMsUNcdMcL2Mj_w@mail.gmail.com --- src/include/common/int128.h | 48 ++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 27 deletions(-) diff --git a/src/include/common/int128.h b/src/include/common/int128.h index a84e5ca25f0d0..addef99bfa5d1 100644 --- a/src/include/common/int128.h +++ b/src/include/common/int128.h @@ -110,11 +110,11 @@ int128_add_int64(INT128 *i128, int64 v) } /* - * INT64_AU32 extracts the most significant 32 bits of int64 as int64, while - * INT64_AL32 extracts the least significant 32 bits as uint64. + * INT64_HI_INT32 extracts the most significant 32 bits of int64 as int32. + * INT64_LO_UINT32 extracts the least significant 32 bits as uint32. */ -#define INT64_AU32(i64) ((i64) >> 32) -#define INT64_AL32(i64) ((i64) & UINT64CONST(0xFFFFFFFF)) +#define INT64_HI_INT32(i64) ((int32) ((i64) >> 32)) +#define INT64_LO_UINT32(i64) ((uint32) (i64)) /* * Add the 128-bit product of two int64 values into an INT128 variable. @@ -129,7 +129,7 @@ int128_add_int64_mul_int64(INT128 *i128, int64 x, int64 y) */ *i128 += (int128) x * (int128) y; #else - /* INT64_AU32 must use arithmetic right shift */ + /* INT64_HI_INT32 must use arithmetic right shift */ StaticAssertDecl(((int64) -1 >> 1) == (int64) -1, "arithmetic right shift is needed"); @@ -154,33 +154,27 @@ int128_add_int64_mul_int64(INT128 *i128, int64 x, int64 y) /* No need to work hard if product must be zero */ if (x != 0 && y != 0) { - int64 x_u32 = INT64_AU32(x); - uint64 x_l32 = INT64_AL32(x); - int64 y_u32 = INT64_AU32(y); - uint64 y_l32 = INT64_AL32(y); + int32 x_hi = INT64_HI_INT32(x); + uint32 x_lo = INT64_LO_UINT32(x); + int32 y_hi = INT64_HI_INT32(y); + uint32 y_lo = INT64_LO_UINT32(y); int64 tmp; /* the first term */ - i128->hi += x_u32 * y_u32; - - /* the second term: sign-extend it only if x is negative */ - tmp = x_u32 * y_l32; - if (x < 0) - i128->hi += INT64_AU32(tmp); - else - i128->hi += ((uint64) tmp) >> 32; - int128_add_uint64(i128, ((uint64) INT64_AL32(tmp)) << 32); - - /* the third term: sign-extend it only if y is negative */ - tmp = x_l32 * y_u32; - if (y < 0) - i128->hi += INT64_AU32(tmp); - else - i128->hi += ((uint64) tmp) >> 32; - int128_add_uint64(i128, ((uint64) INT64_AL32(tmp)) << 32); + i128->hi += (int64) x_hi * (int64) y_hi; + + /* the second term: sign-extended with the sign of x */ + tmp = (int64) x_hi * (int64) y_lo; + i128->hi += INT64_HI_INT32(tmp); + int128_add_uint64(i128, ((uint64) INT64_LO_UINT32(tmp)) << 32); + + /* the third term: sign-extended with the sign of y */ + tmp = (int64) x_lo * (int64) y_hi; + i128->hi += INT64_HI_INT32(tmp); + int128_add_uint64(i128, ((uint64) INT64_LO_UINT32(tmp)) << 32); /* the fourth term: always unsigned */ - int128_add_uint64(i128, x_l32 * y_l32); + int128_add_uint64(i128, (uint64) x_lo * (uint64) y_lo); } #endif } From 1beda2c3cf5842c37edd7081468a703052c6d0f0 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Thu, 7 Aug 2025 11:48:43 +0200 Subject: [PATCH 460/602] pg_upgrade: Improve message indentation Fix commit f295494d338 to use consistent four-space indentation for verbose messages. --- src/bin/pg_upgrade/check.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bin/pg_upgrade/check.c b/src/bin/pg_upgrade/check.c index 310f53c55771b..67eedbae265a1 100644 --- a/src/bin/pg_upgrade/check.c +++ b/src/bin/pg_upgrade/check.c @@ -1713,7 +1713,7 @@ check_for_not_null_inheritance(ClusterInfo *cluster) "If the parent column(s) are NOT NULL, then the child column must\n" "also be marked NOT NULL, or the upgrade will fail.\n" "You can fix this by running\n" - " ALTER TABLE tablename ALTER column SET NOT NULL;\n" + " ALTER TABLE tablename ALTER column SET NOT NULL;\n" "on each column listed in the file:\n" " %s", report.path); } From 90bfae9f93e7eb0328cc8288bd09234315997ec8 Mon Sep 17 00:00:00 2001 From: John Naylor Date: Thu, 7 Aug 2025 17:10:52 +0700 Subject: [PATCH 461/602] Update ICU C++ API symbols Recent ICU versions have added U_SHOW_CPLUSPLUS_HEADER_API, and we need to set this to zero as well to hide the ICU C++ APIs from pg_locale.h Per discussion, we want cpluspluscheck to work cleanly in backbranches, so backpatch both this and its predecessor commit ed26c4e25a4 to all supported versions. Reported-by: Tom Lane Discussion: https://postgr.es/m/1115793.1754414782%40sss.pgh.pa.us Backpatch-through: 13 --- src/include/utils/pg_locale.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h index 931f5b3b88068..2b072cafb4d48 100644 --- a/src/include/utils/pg_locale.h +++ b/src/include/utils/pg_locale.h @@ -18,6 +18,8 @@ /* only include the C APIs, to avoid errors in cpluspluscheck */ #undef U_SHOW_CPLUSPLUS_API #define U_SHOW_CPLUSPLUS_API 0 +#undef U_SHOW_CPLUSPLUS_HEADER_API +#define U_SHOW_CPLUSPLUS_HEADER_API 0 #include #endif From 466c5435fd45209546c502bd1e8ee5b76b1cc0a1 Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Thu, 7 Aug 2025 14:29:02 +0300 Subject: [PATCH 462/602] Fix checkpointer shared memory allocation Use Min(NBuffers, MAX_CHECKPOINT_REQUESTS) instead of NBuffers in CheckpointerShmemSize() to match the actual array size limit set in CheckpointerShmemInit(). This prevents wasting shared memory when NBuffers > MAX_CHECKPOINT_REQUESTS. Also, fix the comment. Reported-by: Tom Lane Discussion: https://postgr.es/m/1439188.1754506714%40sss.pgh.pa.us Author: Xuneng Zhou Co-authored-by: Alexander Korotkov --- src/backend/postmaster/checkpointer.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c index 8490148a47d52..e84e8663e966b 100644 --- a/src/backend/postmaster/checkpointer.c +++ b/src/backend/postmaster/checkpointer.c @@ -953,11 +953,14 @@ CheckpointerShmemSize(void) Size size; /* - * Currently, the size of the requests[] array is arbitrarily set equal to - * NBuffers. This may prove too large or small ... + * The size of the requests[] array is arbitrarily set equal to NBuffers. + * But there is a cap of MAX_CHECKPOINT_REQUESTS to prevent accumulating + * too many checkpoint requests in the ring buffer. */ size = offsetof(CheckpointerShmemStruct, requests); - size = add_size(size, mul_size(NBuffers, sizeof(CheckpointerRequest))); + size = add_size(size, mul_size(Min(NBuffers, + MAX_CHECKPOINT_REQUESTS), + sizeof(CheckpointerRequest))); return size; } From 0ef891e54175c6646a132bc00e330ff64edb9888 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Thu, 7 Aug 2025 13:29:08 +0200 Subject: [PATCH 463/602] doc: Formatting improvements Small touch-up on commits 25505082f0e and 50fd428b2b9. Fix the formatting of the example messages in the documentation and adjust the wording to match the code. --- doc/src/sgml/logicaldecoding.sgml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/src/sgml/logicaldecoding.sgml b/doc/src/sgml/logicaldecoding.sgml index 593f784b69dcb..77c720c422c50 100644 --- a/doc/src/sgml/logicaldecoding.sgml +++ b/doc/src/sgml/logicaldecoding.sgml @@ -290,7 +290,7 @@ postgres=# select * from pg_logical_slot_get_changes('regression_slot', NULL, NU A logical slot will emit each change just once in normal operation. The current position of each slot is persisted only at checkpoint, so in - the case of a crash the slot may return to an earlier LSN, which will + the case of a crash the slot might return to an earlier LSN, which will then cause recent changes to be sent again when the server restarts. Logical decoding clients are responsible for avoiding ill effects from handling the same message more than once. Clients may wish to record @@ -409,7 +409,7 @@ postgres=# select * from pg_logical_slot_get_changes('regression_slot', NULL, NU should be used with caution. Unlike automatic synchronization, it does not include cyclic retries, making it more prone to synchronization failures, particularly during initial sync scenarios where the required WAL files - or catalog rows for the slot may have already been removed or are at risk + or catalog rows for the slot might have already been removed or are at risk of being removed on the standby. In contrast, automatic synchronization via sync_replication_slots provides continuous slot updates, enabling seamless failover and supporting high availability. @@ -430,8 +430,8 @@ postgres=# select * from pg_logical_slot_get_changes('regression_slot', NULL, NU standby, the slot will not be persisted to avoid data loss. In such cases, the following log message may appear: - LOG: could not synchronize replication slot "failover_slot" - DETAIL: Synchronization could lead to data loss as the remote slot needs WAL at LSN 0/03003F28 and catalog xmin 754, but the standby has LSN 0/03003F28 and catalog xmin 756 +LOG: could not synchronize replication slot "failover_slot" +DETAIL: Synchronization could lead to data loss, because the remote slot needs WAL at LSN 0/03003F28 and catalog xmin 754, but the standby has LSN 0/03003F28 and catalog xmin 756. If the logical replication slot is actively used by a consumer, no manual intervention is needed; the slot will advance automatically, From d699687b329e031cd90e967b39c3fd8a53ef8208 Mon Sep 17 00:00:00 2001 From: Dean Rasheed Date: Thu, 7 Aug 2025 15:49:24 +0100 Subject: [PATCH 464/602] Extend int128.h to support more numeric code. This adds a few more functions to int128.h, allowing more of numeric.c to use 128-bit integers on all platforms. Specifically, int64_div_fast_to_numeric() and the following aggregate functions can now use 128-bit integers for improved performance on all platforms, rather than just platforms with native support for int128: - SUM(int8) - AVG(int8) - STDDEV_POP(int2 or int4) - STDDEV_SAMP(int2 or int4) - VAR_POP(int2 or int4) - VAR_SAMP(int2 or int4) In addition to improved performance on platforms lacking native 128-bit integer support, this significantly simplifies this numeric code by allowing a lot of conditionally compiled code to be deleted. A couple of numeric functions (div_var_int64() and sqrt_var()) still contain conditionally compiled 128-bit integer code that only works on platforms with native 128-bit integer support. Making those work more generally would require rolling our own higher precision 128-bit division, which isn't supported for now. Author: Dean Rasheed Reviewed-by: John Naylor Discussion: https://postgr.es/m/CAEZATCWgBMc9ZwKMYqQpaQz2X6gaamYRB+RnMsUNcdMcL2Mj_w@mail.gmail.com --- src/backend/utils/adt/numeric.c | 502 +++++---------------- src/include/common/int128.h | 239 ++++++++++ src/test/modules/test_int128/test_int128.c | 103 ++++- src/test/regress/expected/aggregates.out | 19 + src/test/regress/sql/aggregates.sql | 5 + 5 files changed, 484 insertions(+), 384 deletions(-) diff --git a/src/backend/utils/adt/numeric.c b/src/backend/utils/adt/numeric.c index c9233565d57a7..122f2efab8b37 100644 --- a/src/backend/utils/adt/numeric.c +++ b/src/backend/utils/adt/numeric.c @@ -28,6 +28,7 @@ #include "common/hashfn.h" #include "common/int.h" +#include "common/int128.h" #include "funcapi.h" #include "lib/hyperloglog.h" #include "libpq/pqformat.h" @@ -534,10 +535,7 @@ static bool numericvar_to_int32(const NumericVar *var, int32 *result); static bool numericvar_to_int64(const NumericVar *var, int64 *result); static void int64_to_numericvar(int64 val, NumericVar *var); static bool numericvar_to_uint64(const NumericVar *var, uint64 *result); -#ifdef HAVE_INT128 -static bool numericvar_to_int128(const NumericVar *var, int128 *result); -static void int128_to_numericvar(int128 val, NumericVar *var); -#endif +static void int128_to_numericvar(INT128 val, NumericVar *var); static double numericvar_to_double_no_overflow(const NumericVar *var); static Datum numeric_abbrev_convert(Datum original_datum, SortSupport ssup); @@ -4463,25 +4461,13 @@ int64_div_fast_to_numeric(int64 val1, int log10val2) if (unlikely(pg_mul_s64_overflow(val1, factor, &new_val1))) { -#ifdef HAVE_INT128 /* do the multiplication using 128-bit integers */ - int128 tmp; + INT128 tmp; - tmp = (int128) val1 * (int128) factor; + tmp = int64_to_int128(0); + int128_add_int64_mul_int64(&tmp, val1, factor); int128_to_numericvar(tmp, &result); -#else - /* do the multiplication using numerics */ - NumericVar tmp; - - init_var(&tmp); - - int64_to_numericvar(val1, &result); - int64_to_numericvar(factor, &tmp); - mul_var(&result, &tmp, &result, 0); - - free_var(&tmp); -#endif } else int64_to_numericvar(new_val1, &result); @@ -4901,8 +4887,8 @@ numeric_pg_lsn(PG_FUNCTION_ARGS) * Actually, it's a pointer to a NumericAggState allocated in the aggregate * context. The digit buffers for the NumericVars will be there too. * - * On platforms which support 128-bit integers some aggregates instead use a - * 128-bit integer based transition datatype to speed up calculations. + * For integer inputs, some aggregates use special-purpose 64-bit or 128-bit + * integer based transition datatypes to speed up calculations. * * ---------------------------------------------------------------------- */ @@ -5566,26 +5552,27 @@ numeric_accum_inv(PG_FUNCTION_ARGS) /* - * Integer data types in general use Numeric accumulators to share code - * and avoid risk of overflow. + * Integer data types in general use Numeric accumulators to share code and + * avoid risk of overflow. However for performance reasons optimized + * special-purpose accumulator routines are used when possible: * - * However for performance reasons optimized special-purpose accumulator - * routines are used when possible. + * For 16-bit and 32-bit inputs, N and sum(X) fit into 64-bit, so 64-bit + * accumulators are used for SUM and AVG of these data types. * - * On platforms with 128-bit integer support, the 128-bit routines will be - * used when sum(X) or sum(X*X) fit into 128-bit. + * For 16-bit and 32-bit inputs, sum(X^2) fits into 128-bit, so 128-bit + * accumulators are used for STDDEV_POP, STDDEV_SAMP, VAR_POP, and VAR_SAMP of + * these data types. * - * For 16 and 32 bit inputs, the N and sum(X) fit into 64-bit so the 64-bit - * accumulators will be used for SUM and AVG of these data types. + * For 64-bit inputs, sum(X) fits into 128-bit, so a 128-bit accumulator is + * used for SUM(int8) and AVG(int8). */ -#ifdef HAVE_INT128 typedef struct Int128AggState { bool calcSumX2; /* if true, calculate sumX2 */ int64 N; /* count of processed numbers */ - int128 sumX; /* sum of processed numbers */ - int128 sumX2; /* sum of squares of processed numbers */ + INT128 sumX; /* sum of processed numbers */ + INT128 sumX2; /* sum of squares of processed numbers */ } Int128AggState; /* @@ -5631,12 +5618,12 @@ makeInt128AggStateCurrentContext(bool calcSumX2) * Accumulate a new input value for 128-bit aggregate functions. */ static void -do_int128_accum(Int128AggState *state, int128 newval) +do_int128_accum(Int128AggState *state, int64 newval) { if (state->calcSumX2) - state->sumX2 += newval * newval; + int128_add_int64_mul_int64(&state->sumX2, newval, newval); - state->sumX += newval; + int128_add_int64(&state->sumX, newval); state->N++; } @@ -5644,43 +5631,28 @@ do_int128_accum(Int128AggState *state, int128 newval) * Remove an input value from the aggregated state. */ static void -do_int128_discard(Int128AggState *state, int128 newval) +do_int128_discard(Int128AggState *state, int64 newval) { if (state->calcSumX2) - state->sumX2 -= newval * newval; + int128_sub_int64_mul_int64(&state->sumX2, newval, newval); - state->sumX -= newval; + int128_sub_int64(&state->sumX, newval); state->N--; } -typedef Int128AggState PolyNumAggState; -#define makePolyNumAggState makeInt128AggState -#define makePolyNumAggStateCurrentContext makeInt128AggStateCurrentContext -#else -typedef NumericAggState PolyNumAggState; -#define makePolyNumAggState makeNumericAggState -#define makePolyNumAggStateCurrentContext makeNumericAggStateCurrentContext -#endif - Datum int2_accum(PG_FUNCTION_ARGS) { - PolyNumAggState *state; + Int128AggState *state; - state = PG_ARGISNULL(0) ? NULL : (PolyNumAggState *) PG_GETARG_POINTER(0); + state = PG_ARGISNULL(0) ? NULL : (Int128AggState *) PG_GETARG_POINTER(0); /* Create the state data on the first call */ if (state == NULL) - state = makePolyNumAggState(fcinfo, true); + state = makeInt128AggState(fcinfo, true); if (!PG_ARGISNULL(1)) - { -#ifdef HAVE_INT128 - do_int128_accum(state, (int128) PG_GETARG_INT16(1)); -#else - do_numeric_accum(state, int64_to_numeric(PG_GETARG_INT16(1))); -#endif - } + do_int128_accum(state, PG_GETARG_INT16(1)); PG_RETURN_POINTER(state); } @@ -5688,22 +5660,16 @@ int2_accum(PG_FUNCTION_ARGS) Datum int4_accum(PG_FUNCTION_ARGS) { - PolyNumAggState *state; + Int128AggState *state; - state = PG_ARGISNULL(0) ? NULL : (PolyNumAggState *) PG_GETARG_POINTER(0); + state = PG_ARGISNULL(0) ? NULL : (Int128AggState *) PG_GETARG_POINTER(0); /* Create the state data on the first call */ if (state == NULL) - state = makePolyNumAggState(fcinfo, true); + state = makeInt128AggState(fcinfo, true); if (!PG_ARGISNULL(1)) - { -#ifdef HAVE_INT128 - do_int128_accum(state, (int128) PG_GETARG_INT32(1)); -#else - do_numeric_accum(state, int64_to_numeric(PG_GETARG_INT32(1))); -#endif - } + do_int128_accum(state, PG_GETARG_INT32(1)); PG_RETURN_POINTER(state); } @@ -5726,21 +5692,21 @@ int8_accum(PG_FUNCTION_ARGS) } /* - * Combine function for numeric aggregates which require sumX2 + * Combine function for Int128AggState for aggregates which require sumX2 */ Datum numeric_poly_combine(PG_FUNCTION_ARGS) { - PolyNumAggState *state1; - PolyNumAggState *state2; + Int128AggState *state1; + Int128AggState *state2; MemoryContext agg_context; MemoryContext old_context; if (!AggCheckCallContext(fcinfo, &agg_context)) elog(ERROR, "aggregate function called in non-aggregate context"); - state1 = PG_ARGISNULL(0) ? NULL : (PolyNumAggState *) PG_GETARG_POINTER(0); - state2 = PG_ARGISNULL(1) ? NULL : (PolyNumAggState *) PG_GETARG_POINTER(1); + state1 = PG_ARGISNULL(0) ? NULL : (Int128AggState *) PG_GETARG_POINTER(0); + state2 = PG_ARGISNULL(1) ? NULL : (Int128AggState *) PG_GETARG_POINTER(1); if (state2 == NULL) PG_RETURN_POINTER(state1); @@ -5750,16 +5716,10 @@ numeric_poly_combine(PG_FUNCTION_ARGS) { old_context = MemoryContextSwitchTo(agg_context); - state1 = makePolyNumAggState(fcinfo, true); + state1 = makeInt128AggState(fcinfo, true); state1->N = state2->N; - -#ifdef HAVE_INT128 state1->sumX = state2->sumX; state1->sumX2 = state2->sumX2; -#else - accum_sum_copy(&state1->sumX, &state2->sumX); - accum_sum_copy(&state1->sumX2, &state2->sumX2); -#endif MemoryContextSwitchTo(old_context); @@ -5769,54 +5729,51 @@ numeric_poly_combine(PG_FUNCTION_ARGS) if (state2->N > 0) { state1->N += state2->N; + int128_add_int128(&state1->sumX, state2->sumX); + int128_add_int128(&state1->sumX2, state2->sumX2); + } + PG_RETURN_POINTER(state1); +} -#ifdef HAVE_INT128 - state1->sumX += state2->sumX; - state1->sumX2 += state2->sumX2; -#else - /* The rest of this needs to work in the aggregate context */ - old_context = MemoryContextSwitchTo(agg_context); - - /* Accumulate sums */ - accum_sum_combine(&state1->sumX, &state2->sumX); - accum_sum_combine(&state1->sumX2, &state2->sumX2); +/* + * int128_serialize - serialize a 128-bit integer to binary format + */ +static inline void +int128_serialize(StringInfo buf, INT128 val) +{ + pq_sendint64(buf, PG_INT128_HI_INT64(val)); + pq_sendint64(buf, PG_INT128_LO_UINT64(val)); +} - MemoryContextSwitchTo(old_context); -#endif +/* + * int128_deserialize - deserialize binary format to a 128-bit integer. + */ +static inline INT128 +int128_deserialize(StringInfo buf) +{ + int64 hi = pq_getmsgint64(buf); + uint64 lo = pq_getmsgint64(buf); - } - PG_RETURN_POINTER(state1); + return make_int128(hi, lo); } /* * numeric_poly_serialize - * Serialize PolyNumAggState into bytea for aggregate functions which + * Serialize Int128AggState into bytea for aggregate functions which * require sumX2. */ Datum numeric_poly_serialize(PG_FUNCTION_ARGS) { - PolyNumAggState *state; + Int128AggState *state; StringInfoData buf; bytea *result; - NumericVar tmp_var; /* Ensure we disallow calling when not in aggregate context */ if (!AggCheckCallContext(fcinfo, NULL)) elog(ERROR, "aggregate function called in non-aggregate context"); - state = (PolyNumAggState *) PG_GETARG_POINTER(0); - - /* - * If the platform supports int128 then sumX and sumX2 will be a 128 bit - * integer type. Here we'll convert that into a numeric type so that the - * combine state is in the same format for both int128 enabled machines - * and machines which don't support that type. The logic here is that one - * day we might like to send these over to another server for further - * processing and we want a standard format to work with. - */ - - init_var(&tmp_var); + state = (Int128AggState *) PG_GETARG_POINTER(0); pq_begintypsend(&buf); @@ -5824,48 +5781,33 @@ numeric_poly_serialize(PG_FUNCTION_ARGS) pq_sendint64(&buf, state->N); /* sumX */ -#ifdef HAVE_INT128 - int128_to_numericvar(state->sumX, &tmp_var); -#else - accum_sum_final(&state->sumX, &tmp_var); -#endif - numericvar_serialize(&buf, &tmp_var); + int128_serialize(&buf, state->sumX); /* sumX2 */ -#ifdef HAVE_INT128 - int128_to_numericvar(state->sumX2, &tmp_var); -#else - accum_sum_final(&state->sumX2, &tmp_var); -#endif - numericvar_serialize(&buf, &tmp_var); + int128_serialize(&buf, state->sumX2); result = pq_endtypsend(&buf); - free_var(&tmp_var); - PG_RETURN_BYTEA_P(result); } /* * numeric_poly_deserialize - * Deserialize PolyNumAggState from bytea for aggregate functions which + * Deserialize Int128AggState from bytea for aggregate functions which * require sumX2. */ Datum numeric_poly_deserialize(PG_FUNCTION_ARGS) { bytea *sstate; - PolyNumAggState *result; + Int128AggState *result; StringInfoData buf; - NumericVar tmp_var; if (!AggCheckCallContext(fcinfo, NULL)) elog(ERROR, "aggregate function called in non-aggregate context"); sstate = PG_GETARG_BYTEA_PP(0); - init_var(&tmp_var); - /* * Initialize a StringInfo so that we can "receive" it using the standard * recv-function infrastructure. @@ -5873,31 +5815,19 @@ numeric_poly_deserialize(PG_FUNCTION_ARGS) initReadOnlyStringInfo(&buf, VARDATA_ANY(sstate), VARSIZE_ANY_EXHDR(sstate)); - result = makePolyNumAggStateCurrentContext(false); + result = makeInt128AggStateCurrentContext(false); /* N */ result->N = pq_getmsgint64(&buf); /* sumX */ - numericvar_deserialize(&buf, &tmp_var); -#ifdef HAVE_INT128 - numericvar_to_int128(&tmp_var, &result->sumX); -#else - accum_sum_add(&result->sumX, &tmp_var); -#endif + result->sumX = int128_deserialize(&buf); /* sumX2 */ - numericvar_deserialize(&buf, &tmp_var); -#ifdef HAVE_INT128 - numericvar_to_int128(&tmp_var, &result->sumX2); -#else - accum_sum_add(&result->sumX2, &tmp_var); -#endif + result->sumX2 = int128_deserialize(&buf); pq_getmsgend(&buf); - free_var(&tmp_var); - PG_RETURN_POINTER(result); } @@ -5907,43 +5837,37 @@ numeric_poly_deserialize(PG_FUNCTION_ARGS) Datum int8_avg_accum(PG_FUNCTION_ARGS) { - PolyNumAggState *state; + Int128AggState *state; - state = PG_ARGISNULL(0) ? NULL : (PolyNumAggState *) PG_GETARG_POINTER(0); + state = PG_ARGISNULL(0) ? NULL : (Int128AggState *) PG_GETARG_POINTER(0); /* Create the state data on the first call */ if (state == NULL) - state = makePolyNumAggState(fcinfo, false); + state = makeInt128AggState(fcinfo, false); if (!PG_ARGISNULL(1)) - { -#ifdef HAVE_INT128 - do_int128_accum(state, (int128) PG_GETARG_INT64(1)); -#else - do_numeric_accum(state, int64_to_numeric(PG_GETARG_INT64(1))); -#endif - } + do_int128_accum(state, PG_GETARG_INT64(1)); PG_RETURN_POINTER(state); } /* - * Combine function for PolyNumAggState for aggregates which don't require + * Combine function for Int128AggState for aggregates which don't require * sumX2 */ Datum int8_avg_combine(PG_FUNCTION_ARGS) { - PolyNumAggState *state1; - PolyNumAggState *state2; + Int128AggState *state1; + Int128AggState *state2; MemoryContext agg_context; MemoryContext old_context; if (!AggCheckCallContext(fcinfo, &agg_context)) elog(ERROR, "aggregate function called in non-aggregate context"); - state1 = PG_ARGISNULL(0) ? NULL : (PolyNumAggState *) PG_GETARG_POINTER(0); - state2 = PG_ARGISNULL(1) ? NULL : (PolyNumAggState *) PG_GETARG_POINTER(1); + state1 = PG_ARGISNULL(0) ? NULL : (Int128AggState *) PG_GETARG_POINTER(0); + state2 = PG_ARGISNULL(1) ? NULL : (Int128AggState *) PG_GETARG_POINTER(1); if (state2 == NULL) PG_RETURN_POINTER(state1); @@ -5953,14 +5877,10 @@ int8_avg_combine(PG_FUNCTION_ARGS) { old_context = MemoryContextSwitchTo(agg_context); - state1 = makePolyNumAggState(fcinfo, false); + state1 = makeInt128AggState(fcinfo, false); state1->N = state2->N; - -#ifdef HAVE_INT128 state1->sumX = state2->sumX; -#else - accum_sum_copy(&state1->sumX, &state2->sumX); -#endif + MemoryContextSwitchTo(old_context); PG_RETURN_POINTER(state1); @@ -5969,52 +5889,28 @@ int8_avg_combine(PG_FUNCTION_ARGS) if (state2->N > 0) { state1->N += state2->N; - -#ifdef HAVE_INT128 - state1->sumX += state2->sumX; -#else - /* The rest of this needs to work in the aggregate context */ - old_context = MemoryContextSwitchTo(agg_context); - - /* Accumulate sums */ - accum_sum_combine(&state1->sumX, &state2->sumX); - - MemoryContextSwitchTo(old_context); -#endif - + int128_add_int128(&state1->sumX, state2->sumX); } PG_RETURN_POINTER(state1); } /* * int8_avg_serialize - * Serialize PolyNumAggState into bytea using the standard - * recv-function infrastructure. + * Serialize Int128AggState into bytea for aggregate functions which + * don't require sumX2. */ Datum int8_avg_serialize(PG_FUNCTION_ARGS) { - PolyNumAggState *state; + Int128AggState *state; StringInfoData buf; bytea *result; - NumericVar tmp_var; /* Ensure we disallow calling when not in aggregate context */ if (!AggCheckCallContext(fcinfo, NULL)) elog(ERROR, "aggregate function called in non-aggregate context"); - state = (PolyNumAggState *) PG_GETARG_POINTER(0); - - /* - * If the platform supports int128 then sumX will be a 128 integer type. - * Here we'll convert that into a numeric type so that the combine state - * is in the same format for both int128 enabled machines and machines - * which don't support that type. The logic here is that one day we might - * like to send these over to another server for further processing and we - * want a standard format to work with. - */ - - init_var(&tmp_var); + state = (Int128AggState *) PG_GETARG_POINTER(0); pq_begintypsend(&buf); @@ -6022,39 +5918,30 @@ int8_avg_serialize(PG_FUNCTION_ARGS) pq_sendint64(&buf, state->N); /* sumX */ -#ifdef HAVE_INT128 - int128_to_numericvar(state->sumX, &tmp_var); -#else - accum_sum_final(&state->sumX, &tmp_var); -#endif - numericvar_serialize(&buf, &tmp_var); + int128_serialize(&buf, state->sumX); result = pq_endtypsend(&buf); - free_var(&tmp_var); - PG_RETURN_BYTEA_P(result); } /* * int8_avg_deserialize - * Deserialize bytea back into PolyNumAggState. + * Deserialize Int128AggState from bytea for aggregate functions which + * don't require sumX2. */ Datum int8_avg_deserialize(PG_FUNCTION_ARGS) { bytea *sstate; - PolyNumAggState *result; + Int128AggState *result; StringInfoData buf; - NumericVar tmp_var; if (!AggCheckCallContext(fcinfo, NULL)) elog(ERROR, "aggregate function called in non-aggregate context"); sstate = PG_GETARG_BYTEA_PP(0); - init_var(&tmp_var); - /* * Initialize a StringInfo so that we can "receive" it using the standard * recv-function infrastructure. @@ -6062,23 +5949,16 @@ int8_avg_deserialize(PG_FUNCTION_ARGS) initReadOnlyStringInfo(&buf, VARDATA_ANY(sstate), VARSIZE_ANY_EXHDR(sstate)); - result = makePolyNumAggStateCurrentContext(false); + result = makeInt128AggStateCurrentContext(false); /* N */ result->N = pq_getmsgint64(&buf); /* sumX */ - numericvar_deserialize(&buf, &tmp_var); -#ifdef HAVE_INT128 - numericvar_to_int128(&tmp_var, &result->sumX); -#else - accum_sum_add(&result->sumX, &tmp_var); -#endif + result->sumX = int128_deserialize(&buf); pq_getmsgend(&buf); - free_var(&tmp_var); - PG_RETURN_POINTER(result); } @@ -6089,24 +5969,16 @@ int8_avg_deserialize(PG_FUNCTION_ARGS) Datum int2_accum_inv(PG_FUNCTION_ARGS) { - PolyNumAggState *state; + Int128AggState *state; - state = PG_ARGISNULL(0) ? NULL : (PolyNumAggState *) PG_GETARG_POINTER(0); + state = PG_ARGISNULL(0) ? NULL : (Int128AggState *) PG_GETARG_POINTER(0); /* Should not get here with no state */ if (state == NULL) elog(ERROR, "int2_accum_inv called with NULL state"); if (!PG_ARGISNULL(1)) - { -#ifdef HAVE_INT128 - do_int128_discard(state, (int128) PG_GETARG_INT16(1)); -#else - /* Should never fail, all inputs have dscale 0 */ - if (!do_numeric_discard(state, int64_to_numeric(PG_GETARG_INT16(1)))) - elog(ERROR, "do_numeric_discard failed unexpectedly"); -#endif - } + do_int128_discard(state, PG_GETARG_INT16(1)); PG_RETURN_POINTER(state); } @@ -6114,24 +5986,16 @@ int2_accum_inv(PG_FUNCTION_ARGS) Datum int4_accum_inv(PG_FUNCTION_ARGS) { - PolyNumAggState *state; + Int128AggState *state; - state = PG_ARGISNULL(0) ? NULL : (PolyNumAggState *) PG_GETARG_POINTER(0); + state = PG_ARGISNULL(0) ? NULL : (Int128AggState *) PG_GETARG_POINTER(0); /* Should not get here with no state */ if (state == NULL) elog(ERROR, "int4_accum_inv called with NULL state"); if (!PG_ARGISNULL(1)) - { -#ifdef HAVE_INT128 - do_int128_discard(state, (int128) PG_GETARG_INT32(1)); -#else - /* Should never fail, all inputs have dscale 0 */ - if (!do_numeric_discard(state, int64_to_numeric(PG_GETARG_INT32(1)))) - elog(ERROR, "do_numeric_discard failed unexpectedly"); -#endif - } + do_int128_discard(state, PG_GETARG_INT32(1)); PG_RETURN_POINTER(state); } @@ -6160,24 +6024,16 @@ int8_accum_inv(PG_FUNCTION_ARGS) Datum int8_avg_accum_inv(PG_FUNCTION_ARGS) { - PolyNumAggState *state; + Int128AggState *state; - state = PG_ARGISNULL(0) ? NULL : (PolyNumAggState *) PG_GETARG_POINTER(0); + state = PG_ARGISNULL(0) ? NULL : (Int128AggState *) PG_GETARG_POINTER(0); /* Should not get here with no state */ if (state == NULL) elog(ERROR, "int8_avg_accum_inv called with NULL state"); if (!PG_ARGISNULL(1)) - { -#ifdef HAVE_INT128 - do_int128_discard(state, (int128) PG_GETARG_INT64(1)); -#else - /* Should never fail, all inputs have dscale 0 */ - if (!do_numeric_discard(state, int64_to_numeric(PG_GETARG_INT64(1)))) - elog(ERROR, "do_numeric_discard failed unexpectedly"); -#endif - } + do_int128_discard(state, PG_GETARG_INT64(1)); PG_RETURN_POINTER(state); } @@ -6185,12 +6041,11 @@ int8_avg_accum_inv(PG_FUNCTION_ARGS) Datum numeric_poly_sum(PG_FUNCTION_ARGS) { -#ifdef HAVE_INT128 - PolyNumAggState *state; + Int128AggState *state; Numeric res; NumericVar result; - state = PG_ARGISNULL(0) ? NULL : (PolyNumAggState *) PG_GETARG_POINTER(0); + state = PG_ARGISNULL(0) ? NULL : (Int128AggState *) PG_GETARG_POINTER(0); /* If there were no non-null inputs, return NULL */ if (state == NULL || state->N == 0) @@ -6205,21 +6060,17 @@ numeric_poly_sum(PG_FUNCTION_ARGS) free_var(&result); PG_RETURN_NUMERIC(res); -#else - return numeric_sum(fcinfo); -#endif } Datum numeric_poly_avg(PG_FUNCTION_ARGS) { -#ifdef HAVE_INT128 - PolyNumAggState *state; + Int128AggState *state; NumericVar result; Datum countd, sumd; - state = PG_ARGISNULL(0) ? NULL : (PolyNumAggState *) PG_GETARG_POINTER(0); + state = PG_ARGISNULL(0) ? NULL : (Int128AggState *) PG_GETARG_POINTER(0); /* If there were no non-null inputs, return NULL */ if (state == NULL || state->N == 0) @@ -6235,9 +6086,6 @@ numeric_poly_avg(PG_FUNCTION_ARGS) free_var(&result); PG_RETURN_DATUM(DirectFunctionCall2(numeric_div, sumd, countd)); -#else - return numeric_avg(fcinfo); -#endif } Datum @@ -6470,7 +6318,6 @@ numeric_stddev_pop(PG_FUNCTION_ARGS) PG_RETURN_NUMERIC(res); } -#ifdef HAVE_INT128 static Numeric numeric_poly_stddev_internal(Int128AggState *state, bool variance, bool sample, @@ -6514,17 +6361,15 @@ numeric_poly_stddev_internal(Int128AggState *state, return res; } -#endif Datum numeric_poly_var_samp(PG_FUNCTION_ARGS) { -#ifdef HAVE_INT128 - PolyNumAggState *state; + Int128AggState *state; Numeric res; bool is_null; - state = PG_ARGISNULL(0) ? NULL : (PolyNumAggState *) PG_GETARG_POINTER(0); + state = PG_ARGISNULL(0) ? NULL : (Int128AggState *) PG_GETARG_POINTER(0); res = numeric_poly_stddev_internal(state, true, true, &is_null); @@ -6532,20 +6377,16 @@ numeric_poly_var_samp(PG_FUNCTION_ARGS) PG_RETURN_NULL(); else PG_RETURN_NUMERIC(res); -#else - return numeric_var_samp(fcinfo); -#endif } Datum numeric_poly_stddev_samp(PG_FUNCTION_ARGS) { -#ifdef HAVE_INT128 - PolyNumAggState *state; + Int128AggState *state; Numeric res; bool is_null; - state = PG_ARGISNULL(0) ? NULL : (PolyNumAggState *) PG_GETARG_POINTER(0); + state = PG_ARGISNULL(0) ? NULL : (Int128AggState *) PG_GETARG_POINTER(0); res = numeric_poly_stddev_internal(state, false, true, &is_null); @@ -6553,20 +6394,16 @@ numeric_poly_stddev_samp(PG_FUNCTION_ARGS) PG_RETURN_NULL(); else PG_RETURN_NUMERIC(res); -#else - return numeric_stddev_samp(fcinfo); -#endif } Datum numeric_poly_var_pop(PG_FUNCTION_ARGS) { -#ifdef HAVE_INT128 - PolyNumAggState *state; + Int128AggState *state; Numeric res; bool is_null; - state = PG_ARGISNULL(0) ? NULL : (PolyNumAggState *) PG_GETARG_POINTER(0); + state = PG_ARGISNULL(0) ? NULL : (Int128AggState *) PG_GETARG_POINTER(0); res = numeric_poly_stddev_internal(state, true, false, &is_null); @@ -6574,20 +6411,16 @@ numeric_poly_var_pop(PG_FUNCTION_ARGS) PG_RETURN_NULL(); else PG_RETURN_NUMERIC(res); -#else - return numeric_var_pop(fcinfo); -#endif } Datum numeric_poly_stddev_pop(PG_FUNCTION_ARGS) { -#ifdef HAVE_INT128 - PolyNumAggState *state; + Int128AggState *state; Numeric res; bool is_null; - state = PG_ARGISNULL(0) ? NULL : (PolyNumAggState *) PG_GETARG_POINTER(0); + state = PG_ARGISNULL(0) ? NULL : (Int128AggState *) PG_GETARG_POINTER(0); res = numeric_poly_stddev_internal(state, false, false, &is_null); @@ -6595,9 +6428,6 @@ numeric_poly_stddev_pop(PG_FUNCTION_ARGS) PG_RETURN_NULL(); else PG_RETURN_NUMERIC(res); -#else - return numeric_stddev_pop(fcinfo); -#endif } /* @@ -8330,105 +8160,23 @@ numericvar_to_uint64(const NumericVar *var, uint64 *result) return true; } -#ifdef HAVE_INT128 -/* - * Convert numeric to int128, rounding if needed. - * - * If overflow, return false (no error is raised). Return true if okay. - */ -static bool -numericvar_to_int128(const NumericVar *var, int128 *result) -{ - NumericDigit *digits; - int ndigits; - int weight; - int i; - int128 val, - oldval; - bool neg; - NumericVar rounded; - - /* Round to nearest integer */ - init_var(&rounded); - set_var_from_var(var, &rounded); - round_var(&rounded, 0); - - /* Check for zero input */ - strip_var(&rounded); - ndigits = rounded.ndigits; - if (ndigits == 0) - { - *result = 0; - free_var(&rounded); - return true; - } - - /* - * For input like 10000000000, we must treat stripped digits as real. So - * the loop assumes there are weight+1 digits before the decimal point. - */ - weight = rounded.weight; - Assert(weight >= 0 && ndigits <= weight + 1); - - /* Construct the result */ - digits = rounded.digits; - neg = (rounded.sign == NUMERIC_NEG); - val = digits[0]; - for (i = 1; i <= weight; i++) - { - oldval = val; - val *= NBASE; - if (i < ndigits) - val += digits[i]; - - /* - * The overflow check is a bit tricky because we want to accept - * INT128_MIN, which will overflow the positive accumulator. We can - * detect this case easily though because INT128_MIN is the only - * nonzero value for which -val == val (on a two's complement machine, - * anyway). - */ - if ((val / NBASE) != oldval) /* possible overflow? */ - { - if (!neg || (-val) != val || val == 0 || oldval < 0) - { - free_var(&rounded); - return false; - } - } - } - - free_var(&rounded); - - *result = neg ? -val : val; - return true; -} - /* * Convert 128 bit integer to numeric. */ static void -int128_to_numericvar(int128 val, NumericVar *var) +int128_to_numericvar(INT128 val, NumericVar *var) { - uint128 uval, - newuval; + int sign; NumericDigit *ptr; int ndigits; + int32 dig; /* int128 can require at most 39 decimal digits; add one for safety */ alloc_var(var, 40 / DEC_DIGITS); - if (val < 0) - { - var->sign = NUMERIC_NEG; - uval = -val; - } - else - { - var->sign = NUMERIC_POS; - uval = val; - } + sign = int128_sign(val); + var->sign = sign < 0 ? NUMERIC_NEG : NUMERIC_POS; var->dscale = 0; - if (val == 0) + if (sign == 0) { var->ndigits = 0; var->weight = 0; @@ -8440,15 +8188,13 @@ int128_to_numericvar(int128 val, NumericVar *var) { ptr--; ndigits++; - newuval = uval / NBASE; - *ptr = uval - newuval * NBASE; - uval = newuval; - } while (uval); + int128_div_mod_int32(&val, NBASE, &dig); + *ptr = (NumericDigit) abs(dig); + } while (!int128_is_zero(val)); var->digits = ptr; var->ndigits = ndigits; var->weight = ndigits - 1; } -#endif /* * Convert a NumericVar to float8; if out of range, return +/- HUGE_VAL diff --git a/src/include/common/int128.h b/src/include/common/int128.h index addef99bfa5d1..62aae1bc6a7bb 100644 --- a/src/include/common/int128.h +++ b/src/include/common/int128.h @@ -37,11 +37,18 @@ * that a native int128 type would (probably) have. This makes no difference * for ordinary use of INT128, but allows union'ing INT128 with int128 for * testing purposes. + * + * PG_INT128_HI_INT64 and PG_INT128_LO_UINT64 allow the (signed) high and + * (unsigned) low 64-bit integer parts to be extracted portably on all + * platforms. */ #if USE_NATIVE_INT128 typedef int128 INT128; +#define PG_INT128_HI_INT64(i128) ((int64) ((i128) >> 64)) +#define PG_INT128_LO_UINT64(i128) ((uint64) (i128)) + #else typedef struct @@ -55,7 +62,28 @@ typedef struct #endif } INT128; +#define PG_INT128_HI_INT64(i128) ((i128).hi) +#define PG_INT128_LO_UINT64(i128) ((i128).lo) + +#endif + +/* + * Construct an INT128 from (signed) high and (unsigned) low 64-bit integer + * parts. + */ +static inline INT128 +make_int128(int64 hi, uint64 lo) +{ +#if USE_NATIVE_INT128 + return (((int128) hi) << 64) + lo; +#else + INT128 val; + + val.hi = hi; + val.lo = lo; + return val; #endif +} /* * Add an unsigned int64 value into an INT128 variable. @@ -109,6 +137,58 @@ int128_add_int64(INT128 *i128, int64 v) #endif } +/* + * Add an INT128 value into an INT128 variable. + */ +static inline void +int128_add_int128(INT128 *i128, INT128 v) +{ +#if USE_NATIVE_INT128 + *i128 += v; +#else + int128_add_uint64(i128, v.lo); + i128->hi += v.hi; +#endif +} + +/* + * Subtract an unsigned int64 value from an INT128 variable. + */ +static inline void +int128_sub_uint64(INT128 *i128, uint64 v) +{ +#if USE_NATIVE_INT128 + *i128 -= v; +#else + /* + * This is like int128_add_uint64(), except we must propagate a borrow to + * (subtract 1 from) the .hi part if the new .lo part is greater than the + * old .lo part. + */ + uint64 oldlo = i128->lo; + + i128->lo -= v; + i128->hi -= (i128->lo > oldlo); +#endif +} + +/* + * Subtract a signed int64 value from an INT128 variable. + */ +static inline void +int128_sub_int64(INT128 *i128, int64 v) +{ +#if USE_NATIVE_INT128 + *i128 -= v; +#else + /* Like int128_add_int64() with the sign of v inverted */ + uint64 oldlo = i128->lo; + + i128->lo -= v; + i128->hi -= (i128->lo > oldlo) + (v >> 63); +#endif +} + /* * INT64_HI_INT32 extracts the most significant 32 bits of int64 as int32. * INT64_LO_UINT32 extracts the least significant 32 bits as uint32. @@ -179,6 +259,165 @@ int128_add_int64_mul_int64(INT128 *i128, int64 x, int64 y) #endif } +/* + * Subtract the 128-bit product of two int64 values from an INT128 variable. + */ +static inline void +int128_sub_int64_mul_int64(INT128 *i128, int64 x, int64 y) +{ +#if USE_NATIVE_INT128 + *i128 -= (int128) x * (int128) y; +#else + /* As above, except subtract the 128-bit product */ + if (x != 0 && y != 0) + { + int32 x_hi = INT64_HI_INT32(x); + uint32 x_lo = INT64_LO_UINT32(x); + int32 y_hi = INT64_HI_INT32(y); + uint32 y_lo = INT64_LO_UINT32(y); + int64 tmp; + + /* the first term */ + i128->hi -= (int64) x_hi * (int64) y_hi; + + /* the second term: sign-extended with the sign of x */ + tmp = (int64) x_hi * (int64) y_lo; + i128->hi -= INT64_HI_INT32(tmp); + int128_sub_uint64(i128, ((uint64) INT64_LO_UINT32(tmp)) << 32); + + /* the third term: sign-extended with the sign of y */ + tmp = (int64) x_lo * (int64) y_hi; + i128->hi -= INT64_HI_INT32(tmp); + int128_sub_uint64(i128, ((uint64) INT64_LO_UINT32(tmp)) << 32); + + /* the fourth term: always unsigned */ + int128_sub_uint64(i128, (uint64) x_lo * (uint64) y_lo); + } +#endif +} + +/* + * Divide an INT128 variable by a signed int32 value, returning the quotient + * and remainder. The remainder will have the same sign as *i128. + * + * Note: This provides no protection against dividing by 0, or dividing + * INT128_MIN by -1, which overflows. It is the caller's responsibility to + * guard against those. + */ +static inline void +int128_div_mod_int32(INT128 *i128, int32 v, int32 *remainder) +{ +#if USE_NATIVE_INT128 + int128 old_i128 = *i128; + + *i128 /= v; + *remainder = (int32) (old_i128 - *i128 * v); +#else + /* + * To avoid any intermediate values overflowing (as happens if INT64_MIN + * is divided by -1), we first compute the quotient abs(*i128) / abs(v) + * using unsigned 64-bit arithmetic, and then fix the signs up at the end. + * + * The quotient is computed using the short division algorithm described + * in Knuth volume 2, section 4.3.1 exercise 16 (cf. div_var_int() in + * numeric.c). Since the absolute value of the divisor is known to be at + * most 2^31, the remainder carried from one digit to the next is at most + * 2^31 - 1, and so there is no danger of overflow when this is combined + * with the next digit (a 32-bit unsigned integer). + */ + uint64 n_hi; + uint64 n_lo; + uint32 d; + uint64 q; + uint64 r; + uint64 tmp; + + /* numerator: absolute value of *i128 */ + if (i128->hi < 0) + { + n_hi = 0 - ((uint64) i128->hi); + n_lo = 0 - i128->lo; + if (n_lo != 0) + n_hi--; + } + else + { + n_hi = i128->hi; + n_lo = i128->lo; + } + + /* denomimator: absolute value of v */ + d = abs(v); + + /* quotient and remainder of high 64 bits */ + q = n_hi / d; + r = n_hi % d; + n_hi = q; + + /* quotient and remainder of next 32 bits (upper half of n_lo) */ + tmp = (r << 32) + (n_lo >> 32); + q = tmp / d; + r = tmp % d; + + /* quotient and remainder of last 32 bits (lower half of n_lo) */ + tmp = (r << 32) + (uint32) n_lo; + n_lo = q << 32; + q = tmp / d; + r = tmp % d; + n_lo += q; + + /* final remainder should have the same sign as *i128 */ + *remainder = i128->hi < 0 ? (int32) (0 - r) : (int32) r; + + /* store the quotient in *i128, negating it if necessary */ + if ((i128->hi < 0) != (v < 0)) + { + n_hi = 0 - n_hi; + n_lo = 0 - n_lo; + if (n_lo != 0) + n_hi--; + } + i128->hi = (int64) n_hi; + i128->lo = n_lo; +#endif +} + +/* + * Test if an INT128 value is zero. + */ +static inline bool +int128_is_zero(INT128 x) +{ +#if USE_NATIVE_INT128 + return x == 0; +#else + return x.hi == 0 && x.lo == 0; +#endif +} + +/* + * Return the sign of an INT128 value (returns -1, 0, or +1). + */ +static inline int +int128_sign(INT128 x) +{ +#if USE_NATIVE_INT128 + if (x < 0) + return -1; + if (x > 0) + return 1; + return 0; +#else + if (x.hi < 0) + return -1; + if (x.hi > 0) + return 1; + if (x.lo > 0) + return 1; + return 0; +#endif +} + /* * Compare two INT128 values, return -1, 0, or +1. */ diff --git a/src/test/modules/test_int128/test_int128.c b/src/test/modules/test_int128/test_int128.c index caa06541a1fe3..c9c17a73a4e85 100644 --- a/src/test/modules/test_int128/test_int128.c +++ b/src/test/modules/test_int128/test_int128.c @@ -93,8 +93,13 @@ main(int argc, char **argv) int64 x = pg_prng_uint64(&pg_global_prng_state); int64 y = pg_prng_uint64(&pg_global_prng_state); int64 z = pg_prng_uint64(&pg_global_prng_state); + int64 w = pg_prng_uint64(&pg_global_prng_state); + int32 z32 = (int32) z; test128 t1; test128 t2; + test128 t3; + int32 r1; + int32 r2; /* check unsigned addition */ t1.hl.hi = x; @@ -126,25 +131,111 @@ main(int argc, char **argv) return 1; } - /* check multiplication */ - t1.i128 = (int128) x * (int128) y; + /* check 128-bit signed addition */ + t1.hl.hi = x; + t1.hl.lo = y; + t2 = t1; + t3.hl.hi = z; + t3.hl.lo = w; + t1.i128 += t3.i128; + int128_add_int128(&t2.I128, t3.I128); - t2.hl.hi = t2.hl.lo = 0; - int128_add_int64_mul_int64(&t2.I128, x, y); + if (t1.hl.hi != t2.hl.hi || t1.hl.lo != t2.hl.lo) + { + printf(INT128_HEX_FORMAT " + " INT128_HEX_FORMAT "\n", x, y, z, w); + printf("native = " INT128_HEX_FORMAT "\n", t1.hl.hi, t1.hl.lo); + printf("result = " INT128_HEX_FORMAT "\n", t2.hl.hi, t2.hl.lo); + return 1; + } + + /* check unsigned subtraction */ + t1.hl.hi = x; + t1.hl.lo = y; + t2 = t1; + t1.i128 -= (int128) (uint64) z; + int128_sub_uint64(&t2.I128, (uint64) z); if (t1.hl.hi != t2.hl.hi || t1.hl.lo != t2.hl.lo) { - printf("%016" PRIx64 " * %016" PRIx64 "\n", x, y); + printf(INT128_HEX_FORMAT " - unsigned %016" PRIx64 "\n", x, y, z); printf("native = " INT128_HEX_FORMAT "\n", t1.hl.hi, t1.hl.lo); printf("result = " INT128_HEX_FORMAT "\n", t2.hl.hi, t2.hl.lo); return 1; } + /* check signed subtraction */ + t1.hl.hi = x; + t1.hl.lo = y; + t2 = t1; + t1.i128 -= (int128) z; + int128_sub_int64(&t2.I128, z); + + if (t1.hl.hi != t2.hl.hi || t1.hl.lo != t2.hl.lo) + { + printf(INT128_HEX_FORMAT " - signed %016" PRIx64 "\n", x, y, z); + printf("native = " INT128_HEX_FORMAT "\n", t1.hl.hi, t1.hl.lo); + printf("result = " INT128_HEX_FORMAT "\n", t2.hl.hi, t2.hl.lo); + return 1; + } + + /* check 64x64-bit multiply-add */ + t1.hl.hi = x; + t1.hl.lo = y; + t2 = t1; + t1.i128 += (int128) z * (int128) w; + int128_add_int64_mul_int64(&t2.I128, z, w); + + if (t1.hl.hi != t2.hl.hi || t1.hl.lo != t2.hl.lo) + { + printf(INT128_HEX_FORMAT " + %016" PRIx64 " * %016" PRIx64 "\n", x, y, z, w); + printf("native = " INT128_HEX_FORMAT "\n", t1.hl.hi, t1.hl.lo); + printf("result = " INT128_HEX_FORMAT "\n", t2.hl.hi, t2.hl.lo); + return 1; + } + + /* check 64x64-bit multiply-subtract */ + t1.hl.hi = x; + t1.hl.lo = y; + t2 = t1; + t1.i128 -= (int128) z * (int128) w; + int128_sub_int64_mul_int64(&t2.I128, z, w); + + if (t1.hl.hi != t2.hl.hi || t1.hl.lo != t2.hl.lo) + { + printf(INT128_HEX_FORMAT " - %016" PRIx64 " * %016" PRIx64 "\n", x, y, z, w); + printf("native = " INT128_HEX_FORMAT "\n", t1.hl.hi, t1.hl.lo); + printf("result = " INT128_HEX_FORMAT "\n", t2.hl.hi, t2.hl.lo); + return 1; + } + + /* check 128/32-bit division */ + t3.hl.hi = x; + t3.hl.lo = y; + t1.i128 = t3.i128 / z32; + r1 = (int32) (t3.i128 % z32); + t2 = t3; + int128_div_mod_int32(&t2.I128, z32, &r2); + + if (t1.hl.hi != t2.hl.hi || t1.hl.lo != t2.hl.lo) + { + printf(INT128_HEX_FORMAT " / signed %08X\n", t3.hl.hi, t3.hl.lo, z32); + printf("native = " INT128_HEX_FORMAT "\n", t1.hl.hi, t1.hl.lo); + printf("result = " INT128_HEX_FORMAT "\n", t2.hl.hi, t2.hl.lo); + return 1; + } + if (r1 != r2) + { + printf(INT128_HEX_FORMAT " %% signed %08X\n", t3.hl.hi, t3.hl.lo, z32); + printf("native = %08X\n", r1); + printf("result = %08X\n", r2); + return 1; + } + /* check comparison */ t1.hl.hi = x; t1.hl.lo = y; t2.hl.hi = z; - t2.hl.lo = pg_prng_uint64(&pg_global_prng_state); + t2.hl.lo = w; if (my_int128_compare(t1.i128, t2.i128) != int128_compare(t1.I128, t2.I128)) diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out index 1f1ce2380af68..7319945ffe325 100644 --- a/src/test/regress/expected/aggregates.out +++ b/src/test/regress/expected/aggregates.out @@ -680,6 +680,25 @@ SELECT sum2(q1,q2) FROM int8_tbl; 18271560493827981 (1 row) +-- sanity checks +SELECT sum(q1+q2), sum(q1)+sum(q2) FROM int8_tbl; + sum | ?column? +-------------------+------------------- + 18271560493827981 | 18271560493827981 +(1 row) + +SELECT sum(q1-q2), sum(q2-q1), sum(q1)-sum(q2) FROM int8_tbl; + sum | sum | ?column? +------------------+-------------------+------------------ + 9135780246913245 | -9135780246913245 | 9135780246913245 +(1 row) + +SELECT sum(q1*2000), sum(-q1*2000), 2000*sum(q1) FROM int8_tbl; + sum | sum | ?column? +----------------------+-----------------------+---------------------- + 27407340740741226000 | -27407340740741226000 | 27407340740741226000 +(1 row) + -- test for outer-level aggregates -- this should work select ten, sum(distinct four) from onek a diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql index 277b4b198ccc4..dde85d0dfb235 100644 --- a/src/test/regress/sql/aggregates.sql +++ b/src/test/regress/sql/aggregates.sql @@ -182,6 +182,11 @@ SELECT newcnt(*) AS cnt_1000 FROM onek; SELECT oldcnt(*) AS cnt_1000 FROM onek; SELECT sum2(q1,q2) FROM int8_tbl; +-- sanity checks +SELECT sum(q1+q2), sum(q1)+sum(q2) FROM int8_tbl; +SELECT sum(q1-q2), sum(q2-q1), sum(q1)-sum(q2) FROM int8_tbl; +SELECT sum(q1*2000), sum(-q1*2000), 2000*sum(q1) FROM int8_tbl; + -- test for outer-level aggregates -- this should work From 04b7ff3cd33c0b9574eed3b662c16d810a903bd4 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Thu, 7 Aug 2025 18:04:45 -0400 Subject: [PATCH 465/602] doc: add float as an alias for double precision. Although the "Floating-Point Types" section says that "float" data type is taken to mean "double precision", this information was not reflected in the data type table that lists all data type aliases. Reported-by: alexander.kjall@hafslund.no Author: Euler Taveira Reviewed-by: Tom Lane Discussion: https://postgr.es/m/175456294638.800.12038559679827947313@wrigleys.postgresql.org Backpatch-through: 13 --- doc/src/sgml/datatype.sgml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/src/sgml/datatype.sgml b/doc/src/sgml/datatype.sgml index 0994e08931155..7458f216e5094 100644 --- a/doc/src/sgml/datatype.sgml +++ b/doc/src/sgml/datatype.sgml @@ -117,7 +117,7 @@ double precision - float8 + float, float8 double precision floating-point number (8 bytes) From 4ae03be5473417ba5850bb0c1f0e60c1a0998dc9 Mon Sep 17 00:00:00 2001 From: Jacob Champion Date: Thu, 7 Aug 2025 15:31:28 -0700 Subject: [PATCH 466/602] meson: Fix install-quiet after clean libpq-oauth was missing from the installed_targets list, so $ ninja clean && ninja install-quiet failed with the error message ERROR: File 'src/interfaces/libpq-oauth/libpq-oauth.a' could not be found It seems a little odd to have to tell Meson what's missing, since it clearly knows how to build that file during regular installation. But the "quiet" variant we've created must use --no-rebuild, to avoid spawning concurrent ninja processes that would step on each other. Reported-by: Andres Freund Backpatch-through: 18 Discussion: https://postgr.es/m/hbpqdwxkfnqijaxzgdpvdtp57s7gwxa5d6sbxswovjrournlk6%404jnb2gzan4em --- meson.build | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/meson.build b/meson.build index ca423dc8e12f3..431fe9ccc9baf 100644 --- a/meson.build +++ b/meson.build @@ -3472,6 +3472,13 @@ installed_targets = [ ecpg_targets, ] +if oauth_flow_supported + installed_targets += [ + libpq_oauth_so, + libpq_oauth_st, + ] +endif + # all targets that require building code all_built = [ installed_targets, From 84b32fd2283090b2ab87dc38445d487e98e9e231 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Fri, 8 Aug 2025 09:07:10 +0900 Subject: [PATCH 467/602] Add information about "generation" when dropping twice pgstats entry Dropping twice a pgstats entry should not happen, and the error report generated was missing the "generation" counter (tracking when an entry is reused) that has been added in 818119afccd3. Like d92573adcb02, backpatch down to v15 where this information is useful to have, to gather more information from instances where the problem shows up. A report has shown that this error path has been reached on a standby based on 17.3, for a relation stats entry and an OID close to wraparound. Author: Bertrand Drouvot Discussion: https://postgr.es/m/CAN4RuQvYth942J2+FcLmJKgdpq6fE5eqyFvb_PuskxF2eL=Wzg@mail.gmail.com Backpatch-through: 15 --- src/backend/utils/activity/pgstat_shmem.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/backend/utils/activity/pgstat_shmem.c b/src/backend/utils/activity/pgstat_shmem.c index 53e7d534270ac..cca4277f2343a 100644 --- a/src/backend/utils/activity/pgstat_shmem.c +++ b/src/backend/utils/activity/pgstat_shmem.c @@ -874,11 +874,12 @@ pgstat_drop_entry_internal(PgStatShared_HashEntry *shent, */ if (shent->dropped) elog(ERROR, - "trying to drop stats entry already dropped: kind=%s dboid=%u objid=%" PRIu64 " refcount=%u", + "trying to drop stats entry already dropped: kind=%s dboid=%u objid=%" PRIu64 " refcount=%u generation=%u", pgstat_get_kind_info(shent->key.kind)->name, shent->key.dboid, shent->key.objid, - pg_atomic_read_u32(&shent->refcount)); + pg_atomic_read_u32(&shent->refcount), + pg_atomic_read_u32(&shent->generation)); shent->dropped = true; /* release refcount marking entry as not dropped */ From 62a1211d3389095b685a4f80f7b864794cf114f6 Mon Sep 17 00:00:00 2001 From: Etsuro Fujita Date: Fri, 8 Aug 2025 10:50:00 +0900 Subject: [PATCH 468/602] Disallow collecting transition tuples from child foreign tables. Commit 9e6104c66 disallowed transition tables on foreign tables, but failed to account for cases where a foreign table is a child table of a partitioned/inherited table on which transition tables exist, leading to incorrect transition tuples collected from such foreign tables for queries on the parent table triggering transition capture. This occurred not only for inherited UPDATE/DELETE but for partitioned INSERT later supported by commit 3d956d956, which should have handled it at least for the INSERT case, but didn't. To fix, modify ExecAR*Triggers to throw an error if the given relation is a foreign table requesting transition capture. Also, this commit fixes make_modifytable so that in case of an inherited UPDATE/DELETE triggering transition capture, FDWs choose normal operations to modify child foreign tables, not DirectModify; which is needed because they would otherwise skip the calls to ExecAR*Triggers at execution, causing unexpected behavior. Author: Etsuro Fujita Reviewed-by: Amit Langote Discussion: https://postgr.es/m/CAPmGK14QJYikKzBDCe3jMbpGENnQ7popFmbEgm-XTNuk55oyHg%40mail.gmail.com Backpatch-through: 13 --- .../postgres_fdw/expected/postgres_fdw.out | 113 ++++++++++++++++++ contrib/postgres_fdw/sql/postgres_fdw.sql | 78 ++++++++++++ src/backend/commands/trigger.c | 28 +++++ src/backend/optimizer/plan/createplan.c | 24 +++- src/backend/optimizer/util/plancat.c | 54 +++++++++ src/include/optimizer/plancat.h | 2 + 6 files changed, 295 insertions(+), 4 deletions(-) diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out index 4b6e49a5d950d..a434eb1395e03 100644 --- a/contrib/postgres_fdw/expected/postgres_fdw.out +++ b/contrib/postgres_fdw/expected/postgres_fdw.out @@ -8233,6 +8233,119 @@ DELETE FROM rem1; -- can't be pushed down (5 rows) DROP TRIGGER trig_row_after_delete ON rem1; +-- We are allowed to create transition-table triggers on both kinds of +-- inheritance even if they contain foreign tables as children, but currently +-- collecting transition tuples from such foreign tables is not supported. +CREATE TABLE local_tbl (a text, b int); +CREATE FOREIGN TABLE foreign_tbl (a text, b int) + SERVER loopback OPTIONS (table_name 'local_tbl'); +INSERT INTO foreign_tbl VALUES ('AAA', 42); +-- Test case for partition hierarchy +CREATE TABLE parent_tbl (a text, b int) PARTITION BY LIST (a); +ALTER TABLE parent_tbl ATTACH PARTITION foreign_tbl FOR VALUES IN ('AAA'); +CREATE TRIGGER parent_tbl_insert_trig + AFTER INSERT ON parent_tbl REFERENCING NEW TABLE AS new_table + FOR EACH STATEMENT EXECUTE PROCEDURE trigger_func(); +CREATE TRIGGER parent_tbl_update_trig + AFTER UPDATE ON parent_tbl REFERENCING OLD TABLE AS old_table NEW TABLE AS new_table + FOR EACH STATEMENT EXECUTE PROCEDURE trigger_func(); +CREATE TRIGGER parent_tbl_delete_trig + AFTER DELETE ON parent_tbl REFERENCING OLD TABLE AS old_table + FOR EACH STATEMENT EXECUTE PROCEDURE trigger_func(); +INSERT INTO parent_tbl VALUES ('AAA', 42); +ERROR: cannot collect transition tuples from child foreign tables +COPY parent_tbl (a, b) FROM stdin; +ERROR: cannot collect transition tuples from child foreign tables +CONTEXT: COPY parent_tbl, line 1: "AAA 42" +ALTER SERVER loopback OPTIONS (ADD batch_size '10'); +INSERT INTO parent_tbl VALUES ('AAA', 42); +ERROR: cannot collect transition tuples from child foreign tables +COPY parent_tbl (a, b) FROM stdin; +ERROR: cannot collect transition tuples from child foreign tables +CONTEXT: COPY parent_tbl, line 1: "AAA 42" +ALTER SERVER loopback OPTIONS (DROP batch_size); +EXPLAIN (VERBOSE, COSTS OFF) +UPDATE parent_tbl SET b = b + 1; + QUERY PLAN +------------------------------------------------------------------------------------------------ + Update on public.parent_tbl + Foreign Update on public.foreign_tbl parent_tbl_1 + Remote SQL: UPDATE public.local_tbl SET b = $2 WHERE ctid = $1 + -> Foreign Scan on public.foreign_tbl parent_tbl_1 + Output: (parent_tbl_1.b + 1), parent_tbl_1.tableoid, parent_tbl_1.ctid, parent_tbl_1.* + Remote SQL: SELECT a, b, ctid FROM public.local_tbl FOR UPDATE +(6 rows) + +UPDATE parent_tbl SET b = b + 1; +ERROR: cannot collect transition tuples from child foreign tables +EXPLAIN (VERBOSE, COSTS OFF) +DELETE FROM parent_tbl; + QUERY PLAN +------------------------------------------------------------------ + Delete on public.parent_tbl + Foreign Delete on public.foreign_tbl parent_tbl_1 + Remote SQL: DELETE FROM public.local_tbl WHERE ctid = $1 + -> Foreign Scan on public.foreign_tbl parent_tbl_1 + Output: parent_tbl_1.tableoid, parent_tbl_1.ctid + Remote SQL: SELECT ctid FROM public.local_tbl FOR UPDATE +(6 rows) + +DELETE FROM parent_tbl; +ERROR: cannot collect transition tuples from child foreign tables +ALTER TABLE parent_tbl DETACH PARTITION foreign_tbl; +DROP TABLE parent_tbl; +-- Test case for non-partition hierarchy +CREATE TABLE parent_tbl (a text, b int); +ALTER FOREIGN TABLE foreign_tbl INHERIT parent_tbl; +CREATE TRIGGER parent_tbl_update_trig + AFTER UPDATE ON parent_tbl REFERENCING OLD TABLE AS old_table NEW TABLE AS new_table + FOR EACH STATEMENT EXECUTE PROCEDURE trigger_func(); +CREATE TRIGGER parent_tbl_delete_trig + AFTER DELETE ON parent_tbl REFERENCING OLD TABLE AS old_table + FOR EACH STATEMENT EXECUTE PROCEDURE trigger_func(); +EXPLAIN (VERBOSE, COSTS OFF) +UPDATE parent_tbl SET b = b + 1; + QUERY PLAN +------------------------------------------------------------------------------------------------------ + Update on public.parent_tbl + Update on public.parent_tbl parent_tbl_1 + Foreign Update on public.foreign_tbl parent_tbl_2 + Remote SQL: UPDATE public.local_tbl SET b = $2 WHERE ctid = $1 + -> Result + Output: (parent_tbl.b + 1), parent_tbl.tableoid, parent_tbl.ctid, (NULL::record) + -> Append + -> Seq Scan on public.parent_tbl parent_tbl_1 + Output: parent_tbl_1.b, parent_tbl_1.tableoid, parent_tbl_1.ctid, NULL::record + -> Foreign Scan on public.foreign_tbl parent_tbl_2 + Output: parent_tbl_2.b, parent_tbl_2.tableoid, parent_tbl_2.ctid, parent_tbl_2.* + Remote SQL: SELECT a, b, ctid FROM public.local_tbl FOR UPDATE +(12 rows) + +UPDATE parent_tbl SET b = b + 1; +ERROR: cannot collect transition tuples from child foreign tables +EXPLAIN (VERBOSE, COSTS OFF) +DELETE FROM parent_tbl; + QUERY PLAN +------------------------------------------------------------------------ + Delete on public.parent_tbl + Delete on public.parent_tbl parent_tbl_1 + Foreign Delete on public.foreign_tbl parent_tbl_2 + Remote SQL: DELETE FROM public.local_tbl WHERE ctid = $1 + -> Append + -> Seq Scan on public.parent_tbl parent_tbl_1 + Output: parent_tbl_1.tableoid, parent_tbl_1.ctid + -> Foreign Scan on public.foreign_tbl parent_tbl_2 + Output: parent_tbl_2.tableoid, parent_tbl_2.ctid + Remote SQL: SELECT ctid FROM public.local_tbl FOR UPDATE +(10 rows) + +DELETE FROM parent_tbl; +ERROR: cannot collect transition tuples from child foreign tables +ALTER FOREIGN TABLE foreign_tbl NO INHERIT parent_tbl; +DROP TABLE parent_tbl; +-- Cleanup +DROP FOREIGN TABLE foreign_tbl; +DROP TABLE local_tbl; -- =================================================================== -- test inheritance features -- =================================================================== diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql index 31b6c685b551b..d9bed565c81fd 100644 --- a/contrib/postgres_fdw/sql/postgres_fdw.sql +++ b/contrib/postgres_fdw/sql/postgres_fdw.sql @@ -2281,6 +2281,84 @@ EXPLAIN (verbose, costs off) DELETE FROM rem1; -- can't be pushed down DROP TRIGGER trig_row_after_delete ON rem1; + +-- We are allowed to create transition-table triggers on both kinds of +-- inheritance even if they contain foreign tables as children, but currently +-- collecting transition tuples from such foreign tables is not supported. + +CREATE TABLE local_tbl (a text, b int); +CREATE FOREIGN TABLE foreign_tbl (a text, b int) + SERVER loopback OPTIONS (table_name 'local_tbl'); + +INSERT INTO foreign_tbl VALUES ('AAA', 42); + +-- Test case for partition hierarchy +CREATE TABLE parent_tbl (a text, b int) PARTITION BY LIST (a); +ALTER TABLE parent_tbl ATTACH PARTITION foreign_tbl FOR VALUES IN ('AAA'); + +CREATE TRIGGER parent_tbl_insert_trig + AFTER INSERT ON parent_tbl REFERENCING NEW TABLE AS new_table + FOR EACH STATEMENT EXECUTE PROCEDURE trigger_func(); +CREATE TRIGGER parent_tbl_update_trig + AFTER UPDATE ON parent_tbl REFERENCING OLD TABLE AS old_table NEW TABLE AS new_table + FOR EACH STATEMENT EXECUTE PROCEDURE trigger_func(); +CREATE TRIGGER parent_tbl_delete_trig + AFTER DELETE ON parent_tbl REFERENCING OLD TABLE AS old_table + FOR EACH STATEMENT EXECUTE PROCEDURE trigger_func(); + +INSERT INTO parent_tbl VALUES ('AAA', 42); + +COPY parent_tbl (a, b) FROM stdin; +AAA 42 +\. + +ALTER SERVER loopback OPTIONS (ADD batch_size '10'); + +INSERT INTO parent_tbl VALUES ('AAA', 42); + +COPY parent_tbl (a, b) FROM stdin; +AAA 42 +\. + +ALTER SERVER loopback OPTIONS (DROP batch_size); + +EXPLAIN (VERBOSE, COSTS OFF) +UPDATE parent_tbl SET b = b + 1; +UPDATE parent_tbl SET b = b + 1; + +EXPLAIN (VERBOSE, COSTS OFF) +DELETE FROM parent_tbl; +DELETE FROM parent_tbl; + +ALTER TABLE parent_tbl DETACH PARTITION foreign_tbl; +DROP TABLE parent_tbl; + +-- Test case for non-partition hierarchy +CREATE TABLE parent_tbl (a text, b int); +ALTER FOREIGN TABLE foreign_tbl INHERIT parent_tbl; + +CREATE TRIGGER parent_tbl_update_trig + AFTER UPDATE ON parent_tbl REFERENCING OLD TABLE AS old_table NEW TABLE AS new_table + FOR EACH STATEMENT EXECUTE PROCEDURE trigger_func(); +CREATE TRIGGER parent_tbl_delete_trig + AFTER DELETE ON parent_tbl REFERENCING OLD TABLE AS old_table + FOR EACH STATEMENT EXECUTE PROCEDURE trigger_func(); + +EXPLAIN (VERBOSE, COSTS OFF) +UPDATE parent_tbl SET b = b + 1; +UPDATE parent_tbl SET b = b + 1; + +EXPLAIN (VERBOSE, COSTS OFF) +DELETE FROM parent_tbl; +DELETE FROM parent_tbl; + +ALTER FOREIGN TABLE foreign_tbl NO INHERIT parent_tbl; +DROP TABLE parent_tbl; + +-- Cleanup +DROP FOREIGN TABLE foreign_tbl; +DROP TABLE local_tbl; + -- =================================================================== -- test inheritance features -- =================================================================== diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c index 7dc121f73f17e..93b7ac1a44216 100644 --- a/src/backend/commands/trigger.c +++ b/src/backend/commands/trigger.c @@ -2545,6 +2545,15 @@ ExecARInsertTriggers(EState *estate, ResultRelInfo *relinfo, { TriggerDesc *trigdesc = relinfo->ri_TrigDesc; + if (relinfo->ri_FdwRoutine && transition_capture && + transition_capture->tcs_insert_new_table) + { + Assert(relinfo->ri_RootResultRelInfo); + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot collect transition tuples from child foreign tables"))); + } + if ((trigdesc && trigdesc->trig_insert_after_row) || (transition_capture && transition_capture->tcs_insert_new_table)) AfterTriggerSaveEvent(estate, relinfo, NULL, NULL, @@ -2797,6 +2806,15 @@ ExecARDeleteTriggers(EState *estate, { TriggerDesc *trigdesc = relinfo->ri_TrigDesc; + if (relinfo->ri_FdwRoutine && transition_capture && + transition_capture->tcs_delete_old_table) + { + Assert(relinfo->ri_RootResultRelInfo); + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot collect transition tuples from child foreign tables"))); + } + if ((trigdesc && trigdesc->trig_delete_after_row) || (transition_capture && transition_capture->tcs_delete_old_table)) { @@ -3134,6 +3152,16 @@ ExecARUpdateTriggers(EState *estate, ResultRelInfo *relinfo, { TriggerDesc *trigdesc = relinfo->ri_TrigDesc; + if (relinfo->ri_FdwRoutine && transition_capture && + (transition_capture->tcs_update_old_table || + transition_capture->tcs_update_new_table)) + { + Assert(relinfo->ri_RootResultRelInfo); + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot collect transition tuples from child foreign tables"))); + } + if ((trigdesc && trigdesc->trig_update_after_row) || (transition_capture && (transition_capture->tcs_update_old_table || diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index bfefc7dbea106..9fd5c31edf228 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -7233,6 +7233,8 @@ make_modifytable(PlannerInfo *root, Plan *subplan, ModifyTable *node = makeNode(ModifyTable); bool returning_old_or_new = false; bool returning_old_or_new_valid = false; + bool transition_tables = false; + bool transition_tables_valid = false; List *fdw_private_list; Bitmapset *direct_modify_plans; ListCell *lc; @@ -7379,8 +7381,8 @@ make_modifytable(PlannerInfo *root, Plan *subplan, * callback functions needed for that and (2) there are no local * structures that need to be run for each modified row: row-level * triggers on the foreign table, stored generated columns, WITH CHECK - * OPTIONs from parent views, or Vars returning OLD/NEW in the - * RETURNING list. + * OPTIONs from parent views, Vars returning OLD/NEW in the RETURNING + * list, or transition tables on the named relation. */ direct_modify = false; if (fdwroutine != NULL && @@ -7392,7 +7394,10 @@ make_modifytable(PlannerInfo *root, Plan *subplan, !has_row_triggers(root, rti, operation) && !has_stored_generated_columns(root, rti)) { - /* returning_old_or_new is the same for all result relations */ + /* + * returning_old_or_new and transition_tables are the same for all + * result relations, respectively + */ if (!returning_old_or_new_valid) { returning_old_or_new = @@ -7401,7 +7406,18 @@ make_modifytable(PlannerInfo *root, Plan *subplan, returning_old_or_new_valid = true; } if (!returning_old_or_new) - direct_modify = fdwroutine->PlanDirectModify(root, node, rti, i); + { + if (!transition_tables_valid) + { + transition_tables = has_transition_tables(root, + nominalRelation, + operation); + transition_tables_valid = true; + } + if (!transition_tables) + direct_modify = fdwroutine->PlanDirectModify(root, node, + rti, i); + } } if (direct_modify) direct_modify_plans = bms_add_member(direct_modify_plans, i); diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index c6a58afc5e506..6ce4efea11845 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -2388,6 +2388,60 @@ has_row_triggers(PlannerInfo *root, Index rti, CmdType event) return result; } +/* + * has_transition_tables + * + * Detect whether the specified relation has any transition tables for event. + */ +bool +has_transition_tables(PlannerInfo *root, Index rti, CmdType event) +{ + RangeTblEntry *rte = planner_rt_fetch(rti, root); + Relation relation; + TriggerDesc *trigDesc; + bool result = false; + + Assert(rte->rtekind == RTE_RELATION); + + /* Currently foreign tables cannot have transition tables */ + if (rte->relkind == RELKIND_FOREIGN_TABLE) + return result; + + /* Assume we already have adequate lock */ + relation = table_open(rte->relid, NoLock); + + trigDesc = relation->trigdesc; + switch (event) + { + case CMD_INSERT: + if (trigDesc && + trigDesc->trig_insert_new_table) + result = true; + break; + case CMD_UPDATE: + if (trigDesc && + (trigDesc->trig_update_old_table || + trigDesc->trig_update_new_table)) + result = true; + break; + case CMD_DELETE: + if (trigDesc && + trigDesc->trig_delete_old_table) + result = true; + break; + /* There is no separate event for MERGE, only INSERT/UPDATE/DELETE */ + case CMD_MERGE: + result = false; + break; + default: + elog(ERROR, "unrecognized CmdType: %d", (int) event); + break; + } + + table_close(relation, NoLock); + return result; +} + /* * has_stored_generated_columns * diff --git a/src/include/optimizer/plancat.h b/src/include/optimizer/plancat.h index d6f6f4ad2d788..dd8f2cd157f6f 100644 --- a/src/include/optimizer/plancat.h +++ b/src/include/optimizer/plancat.h @@ -76,6 +76,8 @@ extern double get_function_rows(PlannerInfo *root, Oid funcid, Node *node); extern bool has_row_triggers(PlannerInfo *root, Index rti, CmdType event); +extern bool has_transition_tables(PlannerInfo *root, Index rti, CmdType event); + extern bool has_stored_generated_columns(PlannerInfo *root, Index rti); extern Bitmapset *get_dependent_generated_columns(PlannerInfo *root, Index rti, From 85ccd7e30a6dd378e025a918916f7b1d72004001 Mon Sep 17 00:00:00 2001 From: Fujii Masao Date: Fri, 8 Aug 2025 14:36:39 +0900 Subject: [PATCH 469/602] pg_dump: Fix incorrect parsing of object types in pg_dump --filter. Previously, pg_dump --filter could misinterpret invalid object types in the filter file as valid ones. For example, the invalid object type "table-data" (likely a typo for the valid "table_data") could be mistakenly recognized as "table", causing pg_dump to succeed when it should have failed. This happened because pg_dump identified keywords as sequences of ASCII alphabetic characters, treating non-alphabetic characters (like hyphens) as keyword boundaries. As a result, "table-data" was parsed as "table". To fix this, pg_dump --filter now treats keywords as strings of non-whitespace characters, ensuring invalid types like "table-data" are correctly rejected. Back-patch to v17, where the --filter option was introduced. Author: Fujii Masao Reviewed-by: Xuneng Zhou Reviewed-by: Srinath Reddy Reviewed-by: Daniel Gustafsson Discussion: https://postgr.es/m/CAHGQGwFzPKUwiV5C-NLBqz1oK1+z9K8cgrF+LcxFem-p3_Ftug@mail.gmail.com Backpatch-through: 17 --- src/bin/pg_dump/filter.c | 13 ++++++++----- src/bin/pg_dump/t/005_pg_dump_filterfile.pl | 14 ++++++++++---- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/src/bin/pg_dump/filter.c b/src/bin/pg_dump/filter.c index 7214d51413771..e3cdcf4097563 100644 --- a/src/bin/pg_dump/filter.c +++ b/src/bin/pg_dump/filter.c @@ -171,9 +171,8 @@ pg_log_filter_error(FilterStateData *fstate, const char *fmt,...) /* * filter_get_keyword - read the next filter keyword from buffer * - * Search for keywords (limited to ascii alphabetic characters) in - * the passed in line buffer. Returns NULL when the buffer is empty or the first - * char is not alpha. The char '_' is allowed, except as the first character. + * Search for keywords (strings of non-whitespace characters) in the passed + * in line buffer. Returns NULL when the buffer is empty or no keyword exists. * The length of the found keyword is returned in the size parameter. */ static const char * @@ -182,6 +181,9 @@ filter_get_keyword(const char **line, int *size) const char *ptr = *line; const char *result = NULL; + /* The passed buffer must not be NULL */ + Assert(*line != NULL); + /* Set returned length preemptively in case no keyword is found */ *size = 0; @@ -189,11 +191,12 @@ filter_get_keyword(const char **line, int *size) while (isspace((unsigned char) *ptr)) ptr++; - if (isalpha((unsigned char) *ptr)) + /* Grab one keyword that's the string of non-whitespace characters */ + if (*ptr != '\0' && !isspace((unsigned char) *ptr)) { result = ptr++; - while (isalpha((unsigned char) *ptr) || *ptr == '_') + while (*ptr != '\0' && !isspace((unsigned char) *ptr)) ptr++; *size = ptr - result; diff --git a/src/bin/pg_dump/t/005_pg_dump_filterfile.pl b/src/bin/pg_dump/t/005_pg_dump_filterfile.pl index f05e8a20e0559..5c69ec31c393f 100644 --- a/src/bin/pg_dump/t/005_pg_dump_filterfile.pl +++ b/src/bin/pg_dump/t/005_pg_dump_filterfile.pl @@ -418,10 +418,16 @@ qr/invalid filter command/, "invalid syntax: incorrect filter command"); -# Test invalid object type +# Test invalid object type. +# +# This test also verifies that keywords are correctly recognized as strings of +# non-whitespace characters. If the parser incorrectly treats non-whitespace +# delimiters (like hyphens) as keyword boundaries, "table-data" might be +# misread as the valid object type "table". To catch such issues, +# "table-data" is used here as an intentionally invalid object type. open $inputfile, '>', "$tempdir/inputfile.txt" or die "unable to open filterfile for writing"; -print $inputfile "include xxx"; +print $inputfile "exclude table-data one"; close $inputfile; command_fails_like( @@ -432,8 +438,8 @@ '--filter' => "$tempdir/inputfile.txt", 'postgres' ], - qr/unsupported filter object type: "xxx"/, - "invalid syntax: invalid object type specified, should be table, schema, foreign_data or data" + qr/unsupported filter object type: "table-data"/, + "invalid syntax: invalid object type specified" ); # Test missing object identifier pattern From 9e63f83a7e7f8211581bfe9e9d05b72695261f4f Mon Sep 17 00:00:00 2001 From: Etsuro Fujita Date: Fri, 8 Aug 2025 17:35:00 +0900 Subject: [PATCH 470/602] Fix oversight in FindTriggerIncompatibleWithInheritance. This function is called from ATExecAttachPartition/ATExecAddInherit, which prevent tables with row-level triggers with transition tables from becoming partitions or inheritance children, to check if there is such a trigger on the given table, but failed to check if a found trigger is row-level, causing the caller functions to needlessly prevent a table with only a statement-level trigger with transition tables from becoming a partition or inheritance child. Repair. Oversight in commit 501ed02cf. Author: Etsuro Fujita Discussion: https://postgr.es/m/CAPmGK167mXzwzzmJ_0YZ3EZrbwiCxtM1vogH_8drqsE6PtxRYw%40mail.gmail.com Backpatch-through: 13 --- src/backend/commands/trigger.c | 2 ++ src/test/regress/expected/triggers.out | 8 ++++++++ src/test/regress/sql/triggers.sql | 10 ++++++++++ 3 files changed, 20 insertions(+) diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c index 93b7ac1a44216..71014c65b4950 100644 --- a/src/backend/commands/trigger.c +++ b/src/backend/commands/trigger.c @@ -2285,6 +2285,8 @@ FindTriggerIncompatibleWithInheritance(TriggerDesc *trigdesc) { Trigger *trigger = &trigdesc->triggers[i]; + if (!TRIGGER_FOR_ROW(trigger->tgtype)) + continue; if (trigger->tgoldtable != NULL || trigger->tgnewtable != NULL) return trigger->tgname; } diff --git a/src/test/regress/expected/triggers.out b/src/test/regress/expected/triggers.out index 872b9100e1a01..1eb8fba095370 100644 --- a/src/test/regress/expected/triggers.out +++ b/src/test/regress/expected/triggers.out @@ -2769,6 +2769,10 @@ NOTICE: trigger = child3_delete_trig, old table = (42,CCC) -- copy into parent sees parent-format tuples copy parent (a, b) from stdin; NOTICE: trigger = parent_insert_trig, new table = (AAA,42), (BBB,42), (CCC,42) +-- check detach/reattach behavior; statement triggers with transition tables +-- should not prevent a table from becoming a partition again +alter table parent detach partition child1; +alter table parent attach partition child1 for values in ('AAA'); -- DML affecting parent sees tuples collected from children even if -- there is no transition table trigger on the children drop trigger child1_insert_trig on child1; @@ -2966,6 +2970,10 @@ NOTICE: trigger = parent_insert_trig, new table = (AAA,42), (BBB,42), (CCC,42) create index on parent(b); copy parent (a, b) from stdin; NOTICE: trigger = parent_insert_trig, new table = (DDD,42) +-- check disinherit/reinherit behavior; statement triggers with transition +-- tables should not prevent a table from becoming an inheritance child again +alter table child1 no inherit parent; +alter table child1 inherit parent; -- DML affecting parent sees tuples collected from children even if -- there is no transition table trigger on the children drop trigger child1_insert_trig on child1; diff --git a/src/test/regress/sql/triggers.sql b/src/test/regress/sql/triggers.sql index d674b25c83be4..5f7f75d7ba5d9 100644 --- a/src/test/regress/sql/triggers.sql +++ b/src/test/regress/sql/triggers.sql @@ -1935,6 +1935,11 @@ BBB 42 CCC 42 \. +-- check detach/reattach behavior; statement triggers with transition tables +-- should not prevent a table from becoming a partition again +alter table parent detach partition child1; +alter table parent attach partition child1 for values in ('AAA'); + -- DML affecting parent sees tuples collected from children even if -- there is no transition table trigger on the children drop trigger child1_insert_trig on child1; @@ -2154,6 +2159,11 @@ copy parent (a, b) from stdin; DDD 42 \. +-- check disinherit/reinherit behavior; statement triggers with transition +-- tables should not prevent a table from becoming an inheritance child again +alter table child1 no inherit parent; +alter table child1 inherit parent; + -- DML affecting parent sees tuples collected from children even if -- there is no transition table trigger on the children drop trigger child1_insert_trig on child1; From fd2ab03fea23ad6183fe694e750c901c6ff38479 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Fri, 8 Aug 2025 12:06:06 +0200 Subject: [PATCH 471/602] Fix incorrect lack of Datum conversion in _int_matchsel() The code used return (Selectivity) 0.0; where PG_RETURN_FLOAT8(0.0); would be correct. On 64-bit systems, these are pretty much equivalent, but on 32-bit systems, PG_RETURN_FLOAT8() correctly produces a pointer, but the old wrong code would return a null pointer, possibly leading to a crash elsewhere. We think this code is actually not reachable because bqarr_in won't accept an empty query, and there is no other function that will create query_int values. But better be safe and not let such incorrect code lie around. Reviewed-by: Tom Lane Discussion: https://www.postgresql.org/message-id/flat/8246d7ff-f4b7-4363-913e-827dadfeb145%40eisentraut.org --- contrib/intarray/_int_selfuncs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/intarray/_int_selfuncs.c b/contrib/intarray/_int_selfuncs.c index 6c3b7ace146aa..9bf6448624254 100644 --- a/contrib/intarray/_int_selfuncs.c +++ b/contrib/intarray/_int_selfuncs.c @@ -177,7 +177,7 @@ _int_matchsel(PG_FUNCTION_ARGS) if (query->size == 0) { ReleaseVariableStats(vardata); - return (Selectivity) 0.0; + PG_RETURN_FLOAT8(0.0); } /* From b5cd74612c26ec3f96dfe3689acd634db9385d2e Mon Sep 17 00:00:00 2001 From: Thomas Munro Date: Sat, 9 Aug 2025 01:40:17 +1200 Subject: [PATCH 472/602] Remove obsolete comment. Remove a comment about potential for AIO in StartReadBuffersImpl(), because that change happened. --- src/backend/storage/buffer/bufmgr.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 67431208e7f5f..6c9ae83a7a869 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -1484,11 +1484,6 @@ StartReadBuffersImpl(ReadBuffersOperation *operation, * buffers must remain valid until WaitReadBuffers() is called, and any * forwarded buffers must also be preserved for a continuing call unless * they are explicitly released. - * - * Currently the I/O is only started with optional operating system advice if - * requested by the caller with READ_BUFFERS_ISSUE_ADVICE, and the real I/O - * happens synchronously in WaitReadBuffers(). In future work, true I/O could - * be initiated here. */ bool StartReadBuffers(ReadBuffersOperation *operation, From ff5b0824b3b0c58bbb322719628c42b7a4751dd9 Mon Sep 17 00:00:00 2001 From: Jacob Champion Date: Fri, 8 Aug 2025 08:44:37 -0700 Subject: [PATCH 473/602] oauth: Remove stale events from the kqueue multiplexer If a socket is added to the kqueue, becomes readable/writable, and subsequently becomes non-readable/writable again, the kqueue itself will remain readable until either the socket registration is removed, or the stale event is cleared via a call to kevent(). In many simple cases, Curl itself will remove the socket registration quickly, but in real-world usage, this is not guaranteed to happen. The kqueue can then remain stuck in a permanently readable state until the request ends, which results in pointless wakeups for the client and wasted CPU time. Implement comb_multiplexer() to call kevent() and unstick any stale events that would cause unnecessary callbacks. This is called right after drive_request(), before we return control to the client to wait. Suggested-by: Thomas Munro Co-authored-by: Thomas Munro Reviewed-by: Thomas Munro Backpatch-through: 18 Discussion: https://postgr.es/m/CAOYmi+nDZxJHaWj9_jRSyf8uMToCADAmOfJEggsKW-kY7aUwHA@mail.gmail.com --- src/interfaces/libpq-oauth/oauth-curl.c | 67 ++++++++++++++++++++++--- 1 file changed, 61 insertions(+), 6 deletions(-) diff --git a/src/interfaces/libpq-oauth/oauth-curl.c b/src/interfaces/libpq-oauth/oauth-curl.c index dba9a684fa8a5..433135cb86d59 100644 --- a/src/interfaces/libpq-oauth/oauth-curl.c +++ b/src/interfaces/libpq-oauth/oauth-curl.c @@ -1376,6 +1376,53 @@ register_socket(CURL *curl, curl_socket_t socket, int what, void *ctx, #endif } +/* + * If there is no work to do on any of the descriptors in the multiplexer, then + * this function must ensure that the multiplexer is not readable. + * + * Unlike epoll descriptors, kqueue descriptors only transition from readable to + * unreadable when kevent() is called and finds nothing, after removing + * level-triggered conditions that have gone away. We therefore need a dummy + * kevent() call after operations might have been performed on the monitored + * sockets or timer_fd. Any event returned is ignored here, but it also remains + * queued (being level-triggered) and leaves the descriptor readable. This is a + * no-op for epoll descriptors. + */ +static bool +comb_multiplexer(struct async_ctx *actx) +{ +#if defined(HAVE_SYS_EPOLL_H) + /* The epoll implementation doesn't hold onto stale events. */ + return true; +#elif defined(HAVE_SYS_EVENT_H) + struct timespec timeout = {0}; + struct kevent ev; + + /* + * Try to read a single pending event. We can actually ignore the result: + * either we found an event to process, in which case the multiplexer is + * correctly readable for that event at minimum, and it doesn't matter if + * there are any stale events; or we didn't find any, in which case the + * kernel will have discarded any stale events as it traveled to the end + * of the queue. + * + * Note that this depends on our registrations being level-triggered -- + * even the timer, so we use a chained kqueue for that instead of an + * EVFILT_TIMER on the top-level mux. If we used edge-triggered events, + * this call would improperly discard them. + */ + if (kevent(actx->mux, NULL, 0, &ev, 1, &timeout) < 0) + { + actx_error(actx, "could not comb kqueue: %m"); + return false; + } + + return true; +#else +#error comb_multiplexer is not implemented on this platform +#endif +} + /* * Enables or disables the timer in the multiplexer set. The timeout value is * in milliseconds (negative values disable the timer). @@ -2755,13 +2802,21 @@ pg_fe_run_oauth_flow_impl(PGconn *conn) if (status == PGRES_POLLING_FAILED) goto error_return; - else if (status != PGRES_POLLING_OK) - { - /* not done yet */ - return status; - } + else if (status == PGRES_POLLING_OK) + break; /* done! */ - break; + /* + * This request is still running. + * + * Make sure that stale events don't cause us to come back + * early. (Currently, this can occur only with kqueue.) If + * this is forgotten, the multiplexer can get stuck in a + * signaled state and we'll burn CPU cycles pointlessly. + */ + if (!comb_multiplexer(actx)) + goto error_return; + + return status; } case OAUTH_STEP_WAIT_INTERVAL: From 3d9c03429a82c199a77563ae5d57c4c9cefa3d41 Mon Sep 17 00:00:00 2001 From: Jacob Champion Date: Fri, 8 Aug 2025 08:44:46 -0700 Subject: [PATCH 474/602] oauth: Ensure unused socket registrations are removed If Curl needs to switch the direction of a socket's registration (e.g. from CURL_POLL_IN to CURL_POLL_OUT), it expects the old registration to be discarded. For epoll, this happened via EPOLL_CTL_MOD, but for kqueue, the old registration would remain if it was not explicitly removed by Curl. Explicitly remove the opposite-direction event during registrations. (If that event doesn't exist, we'll just get an ENOENT, which will be ignored by the same code that handles CURL_POLL_REMOVE.) A few assertions are also added to strengthen the relationship between the number of events added, the number of events pulled off the queue, and the lengths of the kevent arrays. Reviewed-by: Thomas Munro Backpatch-through: 18 Discussion: https://postgr.es/m/CAOYmi+nDZxJHaWj9_jRSyf8uMToCADAmOfJEggsKW-kY7aUwHA@mail.gmail.com --- src/interfaces/libpq-oauth/oauth-curl.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/src/interfaces/libpq-oauth/oauth-curl.c b/src/interfaces/libpq-oauth/oauth-curl.c index 433135cb86d59..97c33299a79ac 100644 --- a/src/interfaces/libpq-oauth/oauth-curl.c +++ b/src/interfaces/libpq-oauth/oauth-curl.c @@ -1291,22 +1291,31 @@ register_socket(CURL *curl, curl_socket_t socket, int what, void *ctx, return 0; #elif defined(HAVE_SYS_EVENT_H) - struct kevent ev[2] = {0}; + struct kevent ev[2]; struct kevent ev_out[2]; struct timespec timeout = {0}; int nev = 0; int res; + /* + * We don't know which of the events is currently registered, perhaps + * both, so we always try to remove unneeded events. This means we need to + * tolerate ENOENT below. + */ switch (what) { case CURL_POLL_IN: EV_SET(&ev[nev], socket, EVFILT_READ, EV_ADD | EV_RECEIPT, 0, 0, 0); nev++; + EV_SET(&ev[nev], socket, EVFILT_WRITE, EV_DELETE | EV_RECEIPT, 0, 0, 0); + nev++; break; case CURL_POLL_OUT: EV_SET(&ev[nev], socket, EVFILT_WRITE, EV_ADD | EV_RECEIPT, 0, 0, 0); nev++; + EV_SET(&ev[nev], socket, EVFILT_READ, EV_DELETE | EV_RECEIPT, 0, 0, 0); + nev++; break; case CURL_POLL_INOUT: @@ -1317,12 +1326,6 @@ register_socket(CURL *curl, curl_socket_t socket, int what, void *ctx, break; case CURL_POLL_REMOVE: - - /* - * We don't know which of these is currently registered, perhaps - * both, so we try to remove both. This means we need to tolerate - * ENOENT below. - */ EV_SET(&ev[nev], socket, EVFILT_READ, EV_DELETE | EV_RECEIPT, 0, 0, 0); nev++; EV_SET(&ev[nev], socket, EVFILT_WRITE, EV_DELETE | EV_RECEIPT, 0, 0, 0); @@ -1334,7 +1337,10 @@ register_socket(CURL *curl, curl_socket_t socket, int what, void *ctx, return -1; } - res = kevent(actx->mux, ev, nev, ev_out, lengthof(ev_out), &timeout); + Assert(nev <= lengthof(ev)); + Assert(nev <= lengthof(ev_out)); + + res = kevent(actx->mux, ev, nev, ev_out, nev, &timeout); if (res < 0) { actx_error(actx, "could not modify kqueue: %m"); From 1749a12f0d2005ba23236089f0a32e6c6c1533f0 Mon Sep 17 00:00:00 2001 From: Jacob Champion Date: Fri, 8 Aug 2025 08:44:52 -0700 Subject: [PATCH 475/602] oauth: Remove expired timers from the multiplexer In a case similar to the previous commit, an expired timer can remain permanently readable if Curl does not remove the timeout itself. Since that removal isn't guaranteed to happen in real-world situations, implement drain_timer_events() to reset the timer before calling into drive_request(). Moving to drain_timer_events() happens to fix a logic bug in the previous caller of timer_expired(), which treated an error condition as if the timer were expired instead of bailing out. The previous implementation of timer_expired() gave differing results for epoll and kqueue if the timer was reset. (For epoll, a reset timer was considered to be expired, and for kqueue it was not.) This didn't previously cause problems, since timer_expired() was only called while the timer was known to be set, but both implementations now use the kqueue logic. Reviewed-by: Thomas Munro Backpatch-through: 18 Discussion: https://postgr.es/m/CAOYmi+nDZxJHaWj9_jRSyf8uMToCADAmOfJEggsKW-kY7aUwHA@mail.gmail.com --- src/interfaces/libpq-oauth/oauth-curl.c | 108 +++++++++++++++--------- 1 file changed, 68 insertions(+), 40 deletions(-) diff --git a/src/interfaces/libpq-oauth/oauth-curl.c b/src/interfaces/libpq-oauth/oauth-curl.c index 97c33299a79ac..aa5d2bfd96c05 100644 --- a/src/interfaces/libpq-oauth/oauth-curl.c +++ b/src/interfaces/libpq-oauth/oauth-curl.c @@ -1536,40 +1536,20 @@ set_timer(struct async_ctx *actx, long timeout) /* * Returns 1 if the timeout in the multiplexer set has expired since the last - * call to set_timer(), 0 if the timer is still running, or -1 (with an - * actx_error() report) if the timer cannot be queried. + * call to set_timer(), 0 if the timer is either still running or disarmed, or + * -1 (with an actx_error() report) if the timer cannot be queried. */ static int timer_expired(struct async_ctx *actx) { -#if defined(HAVE_SYS_EPOLL_H) - struct itimerspec spec = {0}; - - if (timerfd_gettime(actx->timerfd, &spec) < 0) - { - actx_error(actx, "getting timerfd value: %m"); - return -1; - } - - /* - * This implementation assumes we're using single-shot timers. If you - * change to using intervals, you'll need to reimplement this function - * too, possibly with the read() or select() interfaces for timerfd. - */ - Assert(spec.it_interval.tv_sec == 0 - && spec.it_interval.tv_nsec == 0); - - /* If the remaining time to expiration is zero, we're done. */ - return (spec.it_value.tv_sec == 0 - && spec.it_value.tv_nsec == 0); -#elif defined(HAVE_SYS_EVENT_H) +#if defined(HAVE_SYS_EPOLL_H) || defined(HAVE_SYS_EVENT_H) int res; - /* Is the timer queue ready? */ + /* Is the timer ready? */ res = PQsocketPoll(actx->timerfd, 1 /* forRead */ , 0, 0); if (res < 0) { - actx_error(actx, "checking kqueue for timeout: %m"); + actx_error(actx, "checking timer expiration: %m"); return -1; } @@ -1601,6 +1581,36 @@ register_timer(CURLM *curlm, long timeout, void *ctx) return 0; } +/* + * Removes any expired-timer event from the multiplexer. If was_expired is not + * NULL, it will contain whether or not the timer was expired at time of call. + */ +static bool +drain_timer_events(struct async_ctx *actx, bool *was_expired) +{ + int res; + + res = timer_expired(actx); + if (res < 0) + return false; + + if (res > 0) + { + /* + * Timer is expired. We could drain the event manually from the + * timerfd, but it's easier to simply disable it; that keeps the + * platform-specific code in set_timer(). + */ + if (!set_timer(actx, -1)) + return false; + } + + if (was_expired) + *was_expired = (res > 0); + + return true; +} + /* * Prints Curl request debugging information to stderr. * @@ -2804,6 +2814,22 @@ pg_fe_run_oauth_flow_impl(PGconn *conn) { PostgresPollingStatusType status; + /* + * Clear any expired timeout before calling back into + * Curl. Curl is not guaranteed to do this for us, because + * its API expects us to use single-shot (i.e. + * edge-triggered) timeouts, and ours are level-triggered + * via the mux. + * + * This can't be combined with the comb_multiplexer() call + * below: we might accidentally clear a short timeout that + * was both set and expired during the call to + * drive_request(). + */ + if (!drain_timer_events(actx, NULL)) + goto error_return; + + /* Move the request forward. */ status = drive_request(actx); if (status == PGRES_POLLING_FAILED) @@ -2826,24 +2852,26 @@ pg_fe_run_oauth_flow_impl(PGconn *conn) } case OAUTH_STEP_WAIT_INTERVAL: - - /* - * The client application is supposed to wait until our timer - * expires before calling PQconnectPoll() again, but that - * might not happen. To avoid sending a token request early, - * check the timer before continuing. - */ - if (!timer_expired(actx)) { - set_conn_altsock(conn, actx->timerfd); - return PGRES_POLLING_READING; - } + bool expired; - /* Disable the expired timer. */ - if (!set_timer(actx, -1)) - goto error_return; + /* + * The client application is supposed to wait until our + * timer expires before calling PQconnectPoll() again, but + * that might not happen. To avoid sending a token request + * early, check the timer before continuing. + */ + if (!drain_timer_events(actx, &expired)) + goto error_return; - break; + if (!expired) + { + set_conn_altsock(conn, actx->timerfd); + return PGRES_POLLING_READING; + } + + break; + } } /* From 3e311664e4974ea0eef708d54256aacbb0239c95 Mon Sep 17 00:00:00 2001 From: Jacob Champion Date: Fri, 8 Aug 2025 08:44:56 -0700 Subject: [PATCH 476/602] oauth: Track total call count during a client flow Tracking down the bugs that led to the addition of comb_multiplexer() and drain_timer_events() was difficult, because an inefficient flow is not visibly different from one that is working properly. To help maintainers notice when something has gone wrong, track the number of calls into the flow as part of debug mode, and print the total when the flow finishes. A new test makes sure the total count is less than 100. (We expect something on the order of 10.) This isn't foolproof, but it is able to catch several regressions in the logic of the prior two commits, and future work to add TLS support to the oauth_validator test server should strengthen it as well. Backpatch-through: 18 Discussion: https://postgr.es/m/CAOYmi+nDZxJHaWj9_jRSyf8uMToCADAmOfJEggsKW-kY7aUwHA@mail.gmail.com --- src/interfaces/libpq-oauth/oauth-curl.c | 22 +++++++++++++ .../modules/oauth_validator/t/001_server.pl | 31 ++++++++++++++++++- 2 files changed, 52 insertions(+), 1 deletion(-) diff --git a/src/interfaces/libpq-oauth/oauth-curl.c b/src/interfaces/libpq-oauth/oauth-curl.c index aa5d2bfd96c05..aa50b00d05383 100644 --- a/src/interfaces/libpq-oauth/oauth-curl.c +++ b/src/interfaces/libpq-oauth/oauth-curl.c @@ -278,6 +278,7 @@ struct async_ctx bool user_prompted; /* have we already sent the authz prompt? */ bool used_basic_auth; /* did we send a client secret? */ bool debugging; /* can we give unsafe developer assistance? */ + int dbg_num_calls; /* (debug mode) how many times were we called? */ }; /* @@ -3021,6 +3022,8 @@ PostgresPollingStatusType pg_fe_run_oauth_flow(PGconn *conn) { PostgresPollingStatusType result; + fe_oauth_state *state = conn_sasl_state(conn); + struct async_ctx *actx; #ifndef WIN32 sigset_t osigset; bool sigpipe_pending; @@ -3049,6 +3052,25 @@ pg_fe_run_oauth_flow(PGconn *conn) result = pg_fe_run_oauth_flow_impl(conn); + /* + * To assist with finding bugs in comb_multiplexer() and + * drain_timer_events(), when we're in debug mode, track the total number + * of calls to this function and print that at the end of the flow. + * + * Be careful that state->async_ctx could be NULL if early initialization + * fails during the first call. + */ + actx = state->async_ctx; + Assert(actx || result == PGRES_POLLING_FAILED); + + if (actx && actx->debugging) + { + actx->dbg_num_calls++; + if (result == PGRES_POLLING_OK || result == PGRES_POLLING_FAILED) + fprintf(stderr, "[libpq] total number of polls: %d\n", + actx->dbg_num_calls); + } + #ifndef WIN32 if (masked) { diff --git a/src/test/modules/oauth_validator/t/001_server.pl b/src/test/modules/oauth_validator/t/001_server.pl index 41672ebd5c6dc..c0dafb8be7642 100644 --- a/src/test/modules/oauth_validator/t/001_server.pl +++ b/src/test/modules/oauth_validator/t/001_server.pl @@ -418,6 +418,35 @@ sub connstr qr/failed to obtain access token: mutual TLS required for client \(invalid_client\)/ ); +# Count the number of calls to the internal flow when multiple retries are +# triggered. The exact number depends on many things -- the TCP stack, the +# version of Curl in use, random chance -- but a ridiculously high number +# suggests something is wrong with our ability to clear multiplexer events after +# they're no longer applicable. +my ($ret, $stdout, $stderr) = $node->psql( + 'postgres', + "SELECT 'connected for call count'", + extra_params => ['-w'], + connstr => connstr(stage => 'token', retries => 2), + on_error_stop => 0); + +is($ret, 0, "call count connection succeeds"); +like( + $stderr, + qr@Visit https://example\.com/ and enter the code: postgresuser@, + "call count: stderr matches"); + +my $count_pattern = qr/\[libpq\] total number of polls: (\d+)/; +if (like($stderr, $count_pattern, "call count: count is printed")) +{ + # For reference, a typical flow with two retries might take between 5-15 + # calls to the client implementation. And while this will probably continue + # to change across OSes and Curl updates, we're likely in trouble if we see + # hundreds or thousands of calls. + $stderr =~ $count_pattern; + cmp_ok($1, '<', 100, "call count is reasonably small"); +} + # Stress test: make sure our builtin flow operates correctly even if the client # application isn't respecting PGRES_POLLING_READING/WRITING signals returned # from PQconnectPoll(). @@ -428,7 +457,7 @@ sub connstr connstr(stage => 'all', retries => 1, interval => 1)); note "running '" . join("' '", @cmd) . "'"; -my ($stdout, $stderr) = run_command(\@cmd); +($stdout, $stderr) = run_command(\@cmd); like($stdout, qr/connection succeeded/, "stress-async: stdout matches"); unlike( From 1443b6c0eaa2b464affc0c3aacb3c3bf09efcd6d Mon Sep 17 00:00:00 2001 From: Jacob Champion Date: Fri, 8 Aug 2025 08:45:01 -0700 Subject: [PATCH 477/602] oauth: Add unit tests for multiplexer handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To better record the internal behaviors of oauth-curl.c, add a unit test suite for the socket and timer handling code. This is all based on TAP and driven by our existing Test::More infrastructure. Reviewed-by: Dagfinn Ilmari Mannsåker Discussion: https://postgr.es/m/CAOYmi+nDZxJHaWj9_jRSyf8uMToCADAmOfJEggsKW-kY7aUwHA@mail.gmail.com --- src/interfaces/libpq-oauth/Makefile | 14 + src/interfaces/libpq-oauth/meson.build | 35 ++ src/interfaces/libpq-oauth/t/001_oauth.pl | 24 + src/interfaces/libpq-oauth/test-oauth-curl.c | 527 +++++++++++++++++++ 4 files changed, 600 insertions(+) create mode 100644 src/interfaces/libpq-oauth/t/001_oauth.pl create mode 100644 src/interfaces/libpq-oauth/test-oauth-curl.c diff --git a/src/interfaces/libpq-oauth/Makefile b/src/interfaces/libpq-oauth/Makefile index 682f17413b3a4..e73573694b9a9 100644 --- a/src/interfaces/libpq-oauth/Makefile +++ b/src/interfaces/libpq-oauth/Makefile @@ -79,5 +79,19 @@ uninstall: rm -f '$(DESTDIR)$(libdir)/$(stlib)' rm -f '$(DESTDIR)$(libdir)/$(shlib)' +.PHONY: all-tests +all-tests: oauth_tests$(X) + +oauth_tests$(X): test-oauth-curl.o oauth-utils.o $(WIN32RES) | submake-libpgport submake-libpq + $(CC) $(CFLAGS) $^ $(LDFLAGS) $(LDFLAGS_EX) $(SHLIB_LINK) -o $@ + +check: all-tests + $(prove_check) + +installcheck: all-tests + $(prove_installcheck) + clean distclean: clean-lib rm -f $(OBJS) $(OBJS_STATIC) $(OBJS_SHLIB) + rm -f test-oauth-curl.o oauth_tests$(X) + rm -rf tmp_check diff --git a/src/interfaces/libpq-oauth/meson.build b/src/interfaces/libpq-oauth/meson.build index df064c59a4070..505e1671b8637 100644 --- a/src/interfaces/libpq-oauth/meson.build +++ b/src/interfaces/libpq-oauth/meson.build @@ -47,3 +47,38 @@ libpq_oauth_so = shared_module(libpq_oauth_name, link_args: export_fmt.format(export_file.full_path()), kwargs: default_lib_args, ) + +libpq_oauth_test_deps = [] + +oauth_test_sources = files('test-oauth-curl.c') + libpq_oauth_so_sources + +if host_system == 'windows' + oauth_test_sources += rc_bin_gen.process(win32ver_rc, extra_args: [ + '--NAME', 'oauth_tests', + '--FILEDESC', 'OAuth unit test program',]) +endif + +libpq_oauth_test_deps += executable('oauth_tests', + oauth_test_sources, + dependencies: [frontend_shlib_code, libpq, libpq_oauth_deps], + kwargs: default_bin_args + { + 'c_args': default_bin_args.get('c_args', []) + libpq_oauth_so_c_args, + 'c_pch': pch_postgres_fe_h, + 'include_directories': [libpq_inc, postgres_inc], + 'install': false, + } +) + +testprep_targets += libpq_oauth_test_deps + +tests += { + 'name': 'libpq-oauth', + 'sd': meson.current_source_dir(), + 'bd': meson.current_build_dir(), + 'tap': { + 'tests': [ + 't/001_oauth.pl', + ], + 'deps': libpq_oauth_test_deps, + }, +} diff --git a/src/interfaces/libpq-oauth/t/001_oauth.pl b/src/interfaces/libpq-oauth/t/001_oauth.pl new file mode 100644 index 0000000000000..5af6c860768a6 --- /dev/null +++ b/src/interfaces/libpq-oauth/t/001_oauth.pl @@ -0,0 +1,24 @@ +# Copyright (c) 2025, PostgreSQL Global Development Group +use strict; +use warnings FATAL => 'all'; + +use PostgreSQL::Test::Utils; +use Test::More; + +# Defer entirely to the oauth_tests executable. stdout/err is routed through +# Test::More so that our logging infrastructure can handle it correctly. Using +# IPC::Run::new_chunker seems to help interleave the two streams a little better +# than without. +# +# TODO: prove can also deal with native executables itself, which we could +# probably make use of via PROVE_TESTS on the Makefile side. But the Meson setup +# calls Perl directly, which would require more code to work around... and +# there's still the matter of logging. +my $builder = Test::More->builder; +my $out = $builder->output; +my $err = $builder->failure_output; + +IPC::Run::run ['oauth_tests'], + '>', IPC::Run::new_chunker, sub { $out->print($_[0]) }, + '2>', IPC::Run::new_chunker, sub { $err->print($_[0]) } + or die "oauth_tests returned $?"; diff --git a/src/interfaces/libpq-oauth/test-oauth-curl.c b/src/interfaces/libpq-oauth/test-oauth-curl.c new file mode 100644 index 0000000000000..8263aff2f4ad4 --- /dev/null +++ b/src/interfaces/libpq-oauth/test-oauth-curl.c @@ -0,0 +1,527 @@ +/* + * test-oauth-curl.c + * + * A unit test driver for libpq-oauth. This #includes oauth-curl.c, which lets + * the tests reference static functions and other internals. + * + * USE_ASSERT_CHECKING is required, to make it easy for tests to wrap + * must-succeed code as part of test setup. + * + * Copyright (c) 2025, PostgreSQL Global Development Group + */ + +#include "oauth-curl.c" + +#include + +#ifdef USE_ASSERT_CHECKING + +/* + * TAP Helpers + */ + +static int num_tests = 0; + +/* + * Reports ok/not ok to the TAP stream on stdout. + */ +#define ok(OK, TEST) \ + ok_impl(OK, TEST, #OK, __FILE__, __LINE__) + +static bool +ok_impl(bool ok, const char *test, const char *teststr, const char *file, int line) +{ + printf("%sok %d - %s\n", ok ? "" : "not ", ++num_tests, test); + + if (!ok) + { + printf("# at %s:%d:\n", file, line); + printf("# expression is false: %s\n", teststr); + } + + return ok; +} + +/* + * Like ok(this == that), but with more diagnostics on failure. + * + * Only works on ints, but luckily that's all we need here. Note that the much + * simpler-looking macro implementation + * + * is_diag(ok(THIS == THAT, TEST), THIS, #THIS, THAT, #THAT) + * + * suffers from multiple evaluation of the macro arguments... + */ +#define is(THIS, THAT, TEST) \ + do { \ + int this_ = (THIS), \ + that_ = (THAT); \ + is_diag( \ + ok_impl(this_ == that_, TEST, #THIS " == " #THAT, __FILE__, __LINE__), \ + this_, #THIS, that_, #THAT \ + ); \ + } while (0) + +static bool +is_diag(bool ok, int this, const char *thisstr, int that, const char *thatstr) +{ + if (!ok) + printf("# %s = %d; %s = %d\n", thisstr, this, thatstr, that); + + return ok; +} + +/* + * Utilities + */ + +/* + * Creates a partially-initialized async_ctx for the purposes of testing. Free + * with free_test_actx(). + */ +static struct async_ctx * +init_test_actx(void) +{ + struct async_ctx *actx; + + actx = calloc(1, sizeof(*actx)); + Assert(actx); + + actx->mux = PGINVALID_SOCKET; + actx->timerfd = -1; + actx->debugging = true; + + initPQExpBuffer(&actx->errbuf); + + Assert(setup_multiplexer(actx)); + + return actx; +} + +static void +free_test_actx(struct async_ctx *actx) +{ + termPQExpBuffer(&actx->errbuf); + + if (actx->mux != PGINVALID_SOCKET) + close(actx->mux); + if (actx->timerfd >= 0) + close(actx->timerfd); + + free(actx); +} + +static char dummy_buf[4 * 1024]; /* for fill_pipe/drain_pipe */ + +/* + * Writes to the write side of a pipe until it won't take any more data. Returns + * the amount written. + */ +static ssize_t +fill_pipe(int fd) +{ + int mode; + ssize_t written = 0; + + /* Don't block. */ + Assert((mode = fcntl(fd, F_GETFL)) != -1); + Assert(fcntl(fd, F_SETFL, mode | O_NONBLOCK) == 0); + + while (true) + { + ssize_t w; + + w = write(fd, dummy_buf, sizeof(dummy_buf)); + if (w < 0) + { + if (errno != EAGAIN && errno != EWOULDBLOCK) + { + perror("write to pipe"); + written = -1; + } + break; + } + + written += w; + } + + /* Reset the descriptor flags. */ + Assert(fcntl(fd, F_SETFD, mode) == 0); + + return written; +} + +/* + * Drains the requested amount of data from the read side of a pipe. + */ +static bool +drain_pipe(int fd, ssize_t n) +{ + Assert(n > 0); + + while (n) + { + size_t to_read = (n <= sizeof(dummy_buf)) ? n : sizeof(dummy_buf); + ssize_t drained; + + drained = read(fd, dummy_buf, to_read); + if (drained < 0) + { + perror("read from pipe"); + return false; + } + + n -= drained; + } + + return true; +} + +/* + * Tests whether the multiplexer is marked ready by the deadline. This is a + * macro so that file/line information makes sense during failures. + * + * NB: our current multiplexer implementations (epoll/kqueue) are *readable* + * when the underlying libcurl sockets are *writable*. This behavior is pinned + * here to record that expectation; PGRES_POLLING_READING is hardcoded + * throughout the flow and would need to be changed if a new multiplexer does + * something different. + */ +#define mux_is_ready(MUX, DEADLINE, TEST) \ + do { \ + int res_ = PQsocketPoll(MUX, 1, 0, DEADLINE); \ + Assert(res_ != -1); \ + ok(res_ > 0, "multiplexer is ready " TEST); \ + } while (0) + +/* + * The opposite of mux_is_ready(). + */ +#define mux_is_not_ready(MUX, TEST) \ + do { \ + int res_ = PQsocketPoll(MUX, 1, 0, 0); \ + Assert(res_ != -1); \ + is(res_, 0, "multiplexer is not ready " TEST); \ + } while (0) + +/* + * Test Suites + */ + +/* Per-suite timeout. Set via the PG_TEST_TIMEOUT_DEFAULT envvar. */ +static pg_usec_time_t timeout_us = 180 * 1000 * 1000; + +static void +test_set_timer(void) +{ + struct async_ctx *actx = init_test_actx(); + const pg_usec_time_t deadline = PQgetCurrentTimeUSec() + timeout_us; + + printf("# test_set_timer\n"); + + /* A zero-duration timer should result in a near-immediate ready signal. */ + Assert(set_timer(actx, 0)); + mux_is_ready(actx->mux, deadline, "when timer expires"); + is(timer_expired(actx), 1, "timer_expired() returns 1 when timer expires"); + + /* Resetting the timer far in the future should unset the ready signal. */ + Assert(set_timer(actx, INT_MAX)); + mux_is_not_ready(actx->mux, "when timer is reset to the future"); + is(timer_expired(actx), 0, "timer_expired() returns 0 with unexpired timer"); + + /* Setting another zero-duration timer should override the previous one. */ + Assert(set_timer(actx, 0)); + mux_is_ready(actx->mux, deadline, "when timer is re-expired"); + is(timer_expired(actx), 1, "timer_expired() returns 1 when timer is re-expired"); + + /* And disabling that timer should once again unset the ready signal. */ + Assert(set_timer(actx, -1)); + mux_is_not_ready(actx->mux, "when timer is unset"); + is(timer_expired(actx), 0, "timer_expired() returns 0 when timer is unset"); + + { + bool expired; + + /* Make sure drain_timer_events() functions correctly as well. */ + Assert(set_timer(actx, 0)); + mux_is_ready(actx->mux, deadline, "when timer is re-expired (drain_timer_events)"); + + Assert(drain_timer_events(actx, &expired)); + mux_is_not_ready(actx->mux, "when timer is drained after expiring"); + is(expired, 1, "drain_timer_events() reports expiration"); + is(timer_expired(actx), 0, "timer_expired() returns 0 after timer is drained"); + + /* A second drain should do nothing. */ + Assert(drain_timer_events(actx, &expired)); + mux_is_not_ready(actx->mux, "when timer is drained a second time"); + is(expired, 0, "drain_timer_events() reports no expiration"); + is(timer_expired(actx), 0, "timer_expired() still returns 0"); + } + + free_test_actx(actx); +} + +static void +test_register_socket(void) +{ + struct async_ctx *actx = init_test_actx(); + int pipefd[2]; + int rfd, + wfd; + bool bidirectional; + + /* Create a local pipe for communication. */ + Assert(pipe(pipefd) == 0); + rfd = pipefd[0]; + wfd = pipefd[1]; + + /* + * Some platforms (FreeBSD) implement bidirectional pipes, affecting the + * behavior of some of these tests. Store that knowledge for later. + */ + bidirectional = PQsocketPoll(rfd /* read */ , 0, 1 /* write */ , 0) > 0; + + /* + * This suite runs twice -- once using CURL_POLL_IN/CURL_POLL_OUT for + * read/write operations, respectively, and once using CURL_POLL_INOUT for + * both sides. + */ + for (int inout = 0; inout < 2; inout++) + { + const int in_event = inout ? CURL_POLL_INOUT : CURL_POLL_IN; + const int out_event = inout ? CURL_POLL_INOUT : CURL_POLL_OUT; + const pg_usec_time_t deadline = PQgetCurrentTimeUSec() + timeout_us; + size_t bidi_pipe_size = 0; /* silence compiler warnings */ + + printf("# test_register_socket %s\n", inout ? "(INOUT)" : ""); + + /* + * At the start of the test, the read side should be blocked and the + * write side should be open. (There's a mistake at the end of this + * loop otherwise.) + */ + Assert(PQsocketPoll(rfd, 1, 0, 0) == 0); + Assert(PQsocketPoll(wfd, 0, 1, 0) > 0); + + /* + * For bidirectional systems, emulate unidirectional behavior here by + * filling up the "read side" of the pipe. + */ + if (bidirectional) + Assert((bidi_pipe_size = fill_pipe(rfd)) > 0); + + /* Listen on the read side. The multiplexer shouldn't be ready yet. */ + Assert(register_socket(NULL, rfd, in_event, actx, NULL) == 0); + mux_is_not_ready(actx->mux, "when fd is not readable"); + + /* Writing to the pipe should result in a read-ready multiplexer. */ + Assert(write(wfd, "x", 1) == 1); + mux_is_ready(actx->mux, deadline, "when fd is readable"); + + /* + * Update the registration to wait on write events instead. The + * multiplexer should be unset. + */ + Assert(register_socket(NULL, rfd, CURL_POLL_OUT, actx, NULL) == 0); + mux_is_not_ready(actx->mux, "when waiting for writes on readable fd"); + + /* Re-register for read events. */ + Assert(register_socket(NULL, rfd, in_event, actx, NULL) == 0); + mux_is_ready(actx->mux, deadline, "when waiting for reads again"); + + /* Stop listening. The multiplexer should be unset. */ + Assert(register_socket(NULL, rfd, CURL_POLL_REMOVE, actx, NULL) == 0); + mux_is_not_ready(actx->mux, "when readable fd is removed"); + + /* Listen again. */ + Assert(register_socket(NULL, rfd, in_event, actx, NULL) == 0); + mux_is_ready(actx->mux, deadline, "when readable fd is re-added"); + + /* + * Draining the pipe should unset the multiplexer again, once the old + * event is cleared. + */ + Assert(drain_pipe(rfd, 1)); + Assert(comb_multiplexer(actx)); + mux_is_not_ready(actx->mux, "when fd is drained"); + + /* Undo any unidirectional emulation. */ + if (bidirectional) + Assert(drain_pipe(wfd, bidi_pipe_size)); + + /* Listen on the write side. An empty buffer should be writable. */ + Assert(register_socket(NULL, rfd, CURL_POLL_REMOVE, actx, NULL) == 0); + Assert(register_socket(NULL, wfd, out_event, actx, NULL) == 0); + mux_is_ready(actx->mux, deadline, "when fd is writable"); + + /* As above, wait on read events instead. */ + Assert(register_socket(NULL, wfd, CURL_POLL_IN, actx, NULL) == 0); + mux_is_not_ready(actx->mux, "when waiting for reads on writable fd"); + + /* Re-register for write events. */ + Assert(register_socket(NULL, wfd, out_event, actx, NULL) == 0); + mux_is_ready(actx->mux, deadline, "when waiting for writes again"); + + { + ssize_t written; + + /* + * Fill the pipe. Once the old writable event is cleared, the mux + * should not be ready. + */ + Assert((written = fill_pipe(wfd)) > 0); + printf("# pipe buffer is full at %zd bytes\n", written); + + Assert(comb_multiplexer(actx)); + mux_is_not_ready(actx->mux, "when fd buffer is full"); + + /* Drain the pipe again. */ + Assert(drain_pipe(rfd, written)); + mux_is_ready(actx->mux, deadline, "when fd buffer is drained"); + } + + /* Stop listening. */ + Assert(register_socket(NULL, wfd, CURL_POLL_REMOVE, actx, NULL) == 0); + mux_is_not_ready(actx->mux, "when fd is removed"); + + /* Make sure an expired timer doesn't interfere with event draining. */ + { + bool expired; + + /* Make the rfd appear unidirectional if necessary. */ + if (bidirectional) + Assert((bidi_pipe_size = fill_pipe(rfd)) > 0); + + /* Set the timer and wait for it to expire. */ + Assert(set_timer(actx, 0)); + Assert(PQsocketPoll(actx->timerfd, 1, 0, deadline) > 0); + is(timer_expired(actx), 1, "timer is expired"); + + /* Register for read events and make the fd readable. */ + Assert(register_socket(NULL, rfd, in_event, actx, NULL) == 0); + Assert(write(wfd, "x", 1) == 1); + mux_is_ready(actx->mux, deadline, "when fd is readable and timer expired"); + + /* + * Draining the pipe should unset the multiplexer again, once the + * old event is drained and the timer is reset. + * + * Order matters, since comb_multiplexer() doesn't have to remove + * stale events when active events exist. Follow the call sequence + * used in the code: drain the timer expiration, drain the pipe, + * then clear the stale events. + */ + Assert(drain_timer_events(actx, &expired)); + Assert(drain_pipe(rfd, 1)); + Assert(comb_multiplexer(actx)); + + is(expired, 1, "drain_timer_events() reports expiration"); + is(timer_expired(actx), 0, "timer is no longer expired"); + mux_is_not_ready(actx->mux, "when fd is drained and timer reset"); + + /* Stop listening. */ + Assert(register_socket(NULL, rfd, CURL_POLL_REMOVE, actx, NULL) == 0); + + /* Undo any unidirectional emulation. */ + if (bidirectional) + Assert(drain_pipe(wfd, bidi_pipe_size)); + } + + /* Ensure comb_multiplexer() can handle multiple stale events. */ + { + int rfd2, + wfd2; + + /* Create a second local pipe. */ + Assert(pipe(pipefd) == 0); + rfd2 = pipefd[0]; + wfd2 = pipefd[1]; + + /* Make both rfds appear unidirectional if necessary. */ + if (bidirectional) + { + Assert((bidi_pipe_size = fill_pipe(rfd)) > 0); + Assert(fill_pipe(rfd2) == bidi_pipe_size); + } + + /* Register for read events on both fds, and make them readable. */ + Assert(register_socket(NULL, rfd, in_event, actx, NULL) == 0); + Assert(register_socket(NULL, rfd2, in_event, actx, NULL) == 0); + + Assert(write(wfd, "x", 1) == 1); + Assert(write(wfd2, "x", 1) == 1); + + mux_is_ready(actx->mux, deadline, "when two fds are readable"); + + /* + * Drain both fds. comb_multiplexer() should then ensure that the + * mux is no longer readable. + */ + Assert(drain_pipe(rfd, 1)); + Assert(drain_pipe(rfd2, 1)); + Assert(comb_multiplexer(actx)); + mux_is_not_ready(actx->mux, "when two fds are drained"); + + /* Stop listening. */ + Assert(register_socket(NULL, rfd, CURL_POLL_REMOVE, actx, NULL) == 0); + Assert(register_socket(NULL, rfd2, CURL_POLL_REMOVE, actx, NULL) == 0); + + /* Undo any unidirectional emulation. */ + if (bidirectional) + { + Assert(drain_pipe(wfd, bidi_pipe_size)); + Assert(drain_pipe(wfd2, bidi_pipe_size)); + } + + close(rfd2); + close(wfd2); + } + } + + close(rfd); + close(wfd); + free_test_actx(actx); +} + +int +main(int argc, char *argv[]) +{ + const char *timeout; + + /* Grab the default timeout. */ + timeout = getenv("PG_TEST_TIMEOUT_DEFAULT"); + if (timeout) + { + int timeout_s = atoi(timeout); + + if (timeout_s > 0) + timeout_us = timeout_s * 1000 * 1000; + } + + /* + * Set up line buffering for our output, to let stderr interleave in the + * log files. + */ + setvbuf(stdout, NULL, PG_IOLBF, 0); + + test_set_timer(); + test_register_socket(); + + printf("1..%d\n", num_tests); + return 0; +} + +#else /* !USE_ASSERT_CHECKING */ + +/* + * Skip the test suite when we don't have assertions. + */ +int +main(int argc, char *argv[]) +{ + printf("1..0 # skip: cassert is not enabled\n"); + + return 0; +} + +#endif /* USE_ASSERT_CHECKING */ From ebaaf386adb133010c2024256521b993c5e53e98 Mon Sep 17 00:00:00 2001 From: Jacob Champion Date: Fri, 8 Aug 2025 10:16:37 -0700 Subject: [PATCH 478/602] Revert "oauth: Add unit tests for multiplexer handling" Commit 1443b6c0e introduced buildfarm breakage for Autoconf animals, which expect to be able to run `make installcheck` on the libpq-oauth directory even if libcurl support is disabled. Some other Meson animals complained of a missing -lm link as well. Since this is the day before a freeze, revert for now and come back later. Discussion: https://postgr.es/m/CAOYmi%2BnCkoh3zB%2BGkZad44%3DFNskwUg6F1kmuxqQZzng7Zgj5tw%40mail.gmail.com --- src/interfaces/libpq-oauth/Makefile | 14 - src/interfaces/libpq-oauth/meson.build | 35 -- src/interfaces/libpq-oauth/t/001_oauth.pl | 24 - src/interfaces/libpq-oauth/test-oauth-curl.c | 527 ------------------- 4 files changed, 600 deletions(-) delete mode 100644 src/interfaces/libpq-oauth/t/001_oauth.pl delete mode 100644 src/interfaces/libpq-oauth/test-oauth-curl.c diff --git a/src/interfaces/libpq-oauth/Makefile b/src/interfaces/libpq-oauth/Makefile index e73573694b9a9..682f17413b3a4 100644 --- a/src/interfaces/libpq-oauth/Makefile +++ b/src/interfaces/libpq-oauth/Makefile @@ -79,19 +79,5 @@ uninstall: rm -f '$(DESTDIR)$(libdir)/$(stlib)' rm -f '$(DESTDIR)$(libdir)/$(shlib)' -.PHONY: all-tests -all-tests: oauth_tests$(X) - -oauth_tests$(X): test-oauth-curl.o oauth-utils.o $(WIN32RES) | submake-libpgport submake-libpq - $(CC) $(CFLAGS) $^ $(LDFLAGS) $(LDFLAGS_EX) $(SHLIB_LINK) -o $@ - -check: all-tests - $(prove_check) - -installcheck: all-tests - $(prove_installcheck) - clean distclean: clean-lib rm -f $(OBJS) $(OBJS_STATIC) $(OBJS_SHLIB) - rm -f test-oauth-curl.o oauth_tests$(X) - rm -rf tmp_check diff --git a/src/interfaces/libpq-oauth/meson.build b/src/interfaces/libpq-oauth/meson.build index 505e1671b8637..df064c59a4070 100644 --- a/src/interfaces/libpq-oauth/meson.build +++ b/src/interfaces/libpq-oauth/meson.build @@ -47,38 +47,3 @@ libpq_oauth_so = shared_module(libpq_oauth_name, link_args: export_fmt.format(export_file.full_path()), kwargs: default_lib_args, ) - -libpq_oauth_test_deps = [] - -oauth_test_sources = files('test-oauth-curl.c') + libpq_oauth_so_sources - -if host_system == 'windows' - oauth_test_sources += rc_bin_gen.process(win32ver_rc, extra_args: [ - '--NAME', 'oauth_tests', - '--FILEDESC', 'OAuth unit test program',]) -endif - -libpq_oauth_test_deps += executable('oauth_tests', - oauth_test_sources, - dependencies: [frontend_shlib_code, libpq, libpq_oauth_deps], - kwargs: default_bin_args + { - 'c_args': default_bin_args.get('c_args', []) + libpq_oauth_so_c_args, - 'c_pch': pch_postgres_fe_h, - 'include_directories': [libpq_inc, postgres_inc], - 'install': false, - } -) - -testprep_targets += libpq_oauth_test_deps - -tests += { - 'name': 'libpq-oauth', - 'sd': meson.current_source_dir(), - 'bd': meson.current_build_dir(), - 'tap': { - 'tests': [ - 't/001_oauth.pl', - ], - 'deps': libpq_oauth_test_deps, - }, -} diff --git a/src/interfaces/libpq-oauth/t/001_oauth.pl b/src/interfaces/libpq-oauth/t/001_oauth.pl deleted file mode 100644 index 5af6c860768a6..0000000000000 --- a/src/interfaces/libpq-oauth/t/001_oauth.pl +++ /dev/null @@ -1,24 +0,0 @@ -# Copyright (c) 2025, PostgreSQL Global Development Group -use strict; -use warnings FATAL => 'all'; - -use PostgreSQL::Test::Utils; -use Test::More; - -# Defer entirely to the oauth_tests executable. stdout/err is routed through -# Test::More so that our logging infrastructure can handle it correctly. Using -# IPC::Run::new_chunker seems to help interleave the two streams a little better -# than without. -# -# TODO: prove can also deal with native executables itself, which we could -# probably make use of via PROVE_TESTS on the Makefile side. But the Meson setup -# calls Perl directly, which would require more code to work around... and -# there's still the matter of logging. -my $builder = Test::More->builder; -my $out = $builder->output; -my $err = $builder->failure_output; - -IPC::Run::run ['oauth_tests'], - '>', IPC::Run::new_chunker, sub { $out->print($_[0]) }, - '2>', IPC::Run::new_chunker, sub { $err->print($_[0]) } - or die "oauth_tests returned $?"; diff --git a/src/interfaces/libpq-oauth/test-oauth-curl.c b/src/interfaces/libpq-oauth/test-oauth-curl.c deleted file mode 100644 index 8263aff2f4ad4..0000000000000 --- a/src/interfaces/libpq-oauth/test-oauth-curl.c +++ /dev/null @@ -1,527 +0,0 @@ -/* - * test-oauth-curl.c - * - * A unit test driver for libpq-oauth. This #includes oauth-curl.c, which lets - * the tests reference static functions and other internals. - * - * USE_ASSERT_CHECKING is required, to make it easy for tests to wrap - * must-succeed code as part of test setup. - * - * Copyright (c) 2025, PostgreSQL Global Development Group - */ - -#include "oauth-curl.c" - -#include - -#ifdef USE_ASSERT_CHECKING - -/* - * TAP Helpers - */ - -static int num_tests = 0; - -/* - * Reports ok/not ok to the TAP stream on stdout. - */ -#define ok(OK, TEST) \ - ok_impl(OK, TEST, #OK, __FILE__, __LINE__) - -static bool -ok_impl(bool ok, const char *test, const char *teststr, const char *file, int line) -{ - printf("%sok %d - %s\n", ok ? "" : "not ", ++num_tests, test); - - if (!ok) - { - printf("# at %s:%d:\n", file, line); - printf("# expression is false: %s\n", teststr); - } - - return ok; -} - -/* - * Like ok(this == that), but with more diagnostics on failure. - * - * Only works on ints, but luckily that's all we need here. Note that the much - * simpler-looking macro implementation - * - * is_diag(ok(THIS == THAT, TEST), THIS, #THIS, THAT, #THAT) - * - * suffers from multiple evaluation of the macro arguments... - */ -#define is(THIS, THAT, TEST) \ - do { \ - int this_ = (THIS), \ - that_ = (THAT); \ - is_diag( \ - ok_impl(this_ == that_, TEST, #THIS " == " #THAT, __FILE__, __LINE__), \ - this_, #THIS, that_, #THAT \ - ); \ - } while (0) - -static bool -is_diag(bool ok, int this, const char *thisstr, int that, const char *thatstr) -{ - if (!ok) - printf("# %s = %d; %s = %d\n", thisstr, this, thatstr, that); - - return ok; -} - -/* - * Utilities - */ - -/* - * Creates a partially-initialized async_ctx for the purposes of testing. Free - * with free_test_actx(). - */ -static struct async_ctx * -init_test_actx(void) -{ - struct async_ctx *actx; - - actx = calloc(1, sizeof(*actx)); - Assert(actx); - - actx->mux = PGINVALID_SOCKET; - actx->timerfd = -1; - actx->debugging = true; - - initPQExpBuffer(&actx->errbuf); - - Assert(setup_multiplexer(actx)); - - return actx; -} - -static void -free_test_actx(struct async_ctx *actx) -{ - termPQExpBuffer(&actx->errbuf); - - if (actx->mux != PGINVALID_SOCKET) - close(actx->mux); - if (actx->timerfd >= 0) - close(actx->timerfd); - - free(actx); -} - -static char dummy_buf[4 * 1024]; /* for fill_pipe/drain_pipe */ - -/* - * Writes to the write side of a pipe until it won't take any more data. Returns - * the amount written. - */ -static ssize_t -fill_pipe(int fd) -{ - int mode; - ssize_t written = 0; - - /* Don't block. */ - Assert((mode = fcntl(fd, F_GETFL)) != -1); - Assert(fcntl(fd, F_SETFL, mode | O_NONBLOCK) == 0); - - while (true) - { - ssize_t w; - - w = write(fd, dummy_buf, sizeof(dummy_buf)); - if (w < 0) - { - if (errno != EAGAIN && errno != EWOULDBLOCK) - { - perror("write to pipe"); - written = -1; - } - break; - } - - written += w; - } - - /* Reset the descriptor flags. */ - Assert(fcntl(fd, F_SETFD, mode) == 0); - - return written; -} - -/* - * Drains the requested amount of data from the read side of a pipe. - */ -static bool -drain_pipe(int fd, ssize_t n) -{ - Assert(n > 0); - - while (n) - { - size_t to_read = (n <= sizeof(dummy_buf)) ? n : sizeof(dummy_buf); - ssize_t drained; - - drained = read(fd, dummy_buf, to_read); - if (drained < 0) - { - perror("read from pipe"); - return false; - } - - n -= drained; - } - - return true; -} - -/* - * Tests whether the multiplexer is marked ready by the deadline. This is a - * macro so that file/line information makes sense during failures. - * - * NB: our current multiplexer implementations (epoll/kqueue) are *readable* - * when the underlying libcurl sockets are *writable*. This behavior is pinned - * here to record that expectation; PGRES_POLLING_READING is hardcoded - * throughout the flow and would need to be changed if a new multiplexer does - * something different. - */ -#define mux_is_ready(MUX, DEADLINE, TEST) \ - do { \ - int res_ = PQsocketPoll(MUX, 1, 0, DEADLINE); \ - Assert(res_ != -1); \ - ok(res_ > 0, "multiplexer is ready " TEST); \ - } while (0) - -/* - * The opposite of mux_is_ready(). - */ -#define mux_is_not_ready(MUX, TEST) \ - do { \ - int res_ = PQsocketPoll(MUX, 1, 0, 0); \ - Assert(res_ != -1); \ - is(res_, 0, "multiplexer is not ready " TEST); \ - } while (0) - -/* - * Test Suites - */ - -/* Per-suite timeout. Set via the PG_TEST_TIMEOUT_DEFAULT envvar. */ -static pg_usec_time_t timeout_us = 180 * 1000 * 1000; - -static void -test_set_timer(void) -{ - struct async_ctx *actx = init_test_actx(); - const pg_usec_time_t deadline = PQgetCurrentTimeUSec() + timeout_us; - - printf("# test_set_timer\n"); - - /* A zero-duration timer should result in a near-immediate ready signal. */ - Assert(set_timer(actx, 0)); - mux_is_ready(actx->mux, deadline, "when timer expires"); - is(timer_expired(actx), 1, "timer_expired() returns 1 when timer expires"); - - /* Resetting the timer far in the future should unset the ready signal. */ - Assert(set_timer(actx, INT_MAX)); - mux_is_not_ready(actx->mux, "when timer is reset to the future"); - is(timer_expired(actx), 0, "timer_expired() returns 0 with unexpired timer"); - - /* Setting another zero-duration timer should override the previous one. */ - Assert(set_timer(actx, 0)); - mux_is_ready(actx->mux, deadline, "when timer is re-expired"); - is(timer_expired(actx), 1, "timer_expired() returns 1 when timer is re-expired"); - - /* And disabling that timer should once again unset the ready signal. */ - Assert(set_timer(actx, -1)); - mux_is_not_ready(actx->mux, "when timer is unset"); - is(timer_expired(actx), 0, "timer_expired() returns 0 when timer is unset"); - - { - bool expired; - - /* Make sure drain_timer_events() functions correctly as well. */ - Assert(set_timer(actx, 0)); - mux_is_ready(actx->mux, deadline, "when timer is re-expired (drain_timer_events)"); - - Assert(drain_timer_events(actx, &expired)); - mux_is_not_ready(actx->mux, "when timer is drained after expiring"); - is(expired, 1, "drain_timer_events() reports expiration"); - is(timer_expired(actx), 0, "timer_expired() returns 0 after timer is drained"); - - /* A second drain should do nothing. */ - Assert(drain_timer_events(actx, &expired)); - mux_is_not_ready(actx->mux, "when timer is drained a second time"); - is(expired, 0, "drain_timer_events() reports no expiration"); - is(timer_expired(actx), 0, "timer_expired() still returns 0"); - } - - free_test_actx(actx); -} - -static void -test_register_socket(void) -{ - struct async_ctx *actx = init_test_actx(); - int pipefd[2]; - int rfd, - wfd; - bool bidirectional; - - /* Create a local pipe for communication. */ - Assert(pipe(pipefd) == 0); - rfd = pipefd[0]; - wfd = pipefd[1]; - - /* - * Some platforms (FreeBSD) implement bidirectional pipes, affecting the - * behavior of some of these tests. Store that knowledge for later. - */ - bidirectional = PQsocketPoll(rfd /* read */ , 0, 1 /* write */ , 0) > 0; - - /* - * This suite runs twice -- once using CURL_POLL_IN/CURL_POLL_OUT for - * read/write operations, respectively, and once using CURL_POLL_INOUT for - * both sides. - */ - for (int inout = 0; inout < 2; inout++) - { - const int in_event = inout ? CURL_POLL_INOUT : CURL_POLL_IN; - const int out_event = inout ? CURL_POLL_INOUT : CURL_POLL_OUT; - const pg_usec_time_t deadline = PQgetCurrentTimeUSec() + timeout_us; - size_t bidi_pipe_size = 0; /* silence compiler warnings */ - - printf("# test_register_socket %s\n", inout ? "(INOUT)" : ""); - - /* - * At the start of the test, the read side should be blocked and the - * write side should be open. (There's a mistake at the end of this - * loop otherwise.) - */ - Assert(PQsocketPoll(rfd, 1, 0, 0) == 0); - Assert(PQsocketPoll(wfd, 0, 1, 0) > 0); - - /* - * For bidirectional systems, emulate unidirectional behavior here by - * filling up the "read side" of the pipe. - */ - if (bidirectional) - Assert((bidi_pipe_size = fill_pipe(rfd)) > 0); - - /* Listen on the read side. The multiplexer shouldn't be ready yet. */ - Assert(register_socket(NULL, rfd, in_event, actx, NULL) == 0); - mux_is_not_ready(actx->mux, "when fd is not readable"); - - /* Writing to the pipe should result in a read-ready multiplexer. */ - Assert(write(wfd, "x", 1) == 1); - mux_is_ready(actx->mux, deadline, "when fd is readable"); - - /* - * Update the registration to wait on write events instead. The - * multiplexer should be unset. - */ - Assert(register_socket(NULL, rfd, CURL_POLL_OUT, actx, NULL) == 0); - mux_is_not_ready(actx->mux, "when waiting for writes on readable fd"); - - /* Re-register for read events. */ - Assert(register_socket(NULL, rfd, in_event, actx, NULL) == 0); - mux_is_ready(actx->mux, deadline, "when waiting for reads again"); - - /* Stop listening. The multiplexer should be unset. */ - Assert(register_socket(NULL, rfd, CURL_POLL_REMOVE, actx, NULL) == 0); - mux_is_not_ready(actx->mux, "when readable fd is removed"); - - /* Listen again. */ - Assert(register_socket(NULL, rfd, in_event, actx, NULL) == 0); - mux_is_ready(actx->mux, deadline, "when readable fd is re-added"); - - /* - * Draining the pipe should unset the multiplexer again, once the old - * event is cleared. - */ - Assert(drain_pipe(rfd, 1)); - Assert(comb_multiplexer(actx)); - mux_is_not_ready(actx->mux, "when fd is drained"); - - /* Undo any unidirectional emulation. */ - if (bidirectional) - Assert(drain_pipe(wfd, bidi_pipe_size)); - - /* Listen on the write side. An empty buffer should be writable. */ - Assert(register_socket(NULL, rfd, CURL_POLL_REMOVE, actx, NULL) == 0); - Assert(register_socket(NULL, wfd, out_event, actx, NULL) == 0); - mux_is_ready(actx->mux, deadline, "when fd is writable"); - - /* As above, wait on read events instead. */ - Assert(register_socket(NULL, wfd, CURL_POLL_IN, actx, NULL) == 0); - mux_is_not_ready(actx->mux, "when waiting for reads on writable fd"); - - /* Re-register for write events. */ - Assert(register_socket(NULL, wfd, out_event, actx, NULL) == 0); - mux_is_ready(actx->mux, deadline, "when waiting for writes again"); - - { - ssize_t written; - - /* - * Fill the pipe. Once the old writable event is cleared, the mux - * should not be ready. - */ - Assert((written = fill_pipe(wfd)) > 0); - printf("# pipe buffer is full at %zd bytes\n", written); - - Assert(comb_multiplexer(actx)); - mux_is_not_ready(actx->mux, "when fd buffer is full"); - - /* Drain the pipe again. */ - Assert(drain_pipe(rfd, written)); - mux_is_ready(actx->mux, deadline, "when fd buffer is drained"); - } - - /* Stop listening. */ - Assert(register_socket(NULL, wfd, CURL_POLL_REMOVE, actx, NULL) == 0); - mux_is_not_ready(actx->mux, "when fd is removed"); - - /* Make sure an expired timer doesn't interfere with event draining. */ - { - bool expired; - - /* Make the rfd appear unidirectional if necessary. */ - if (bidirectional) - Assert((bidi_pipe_size = fill_pipe(rfd)) > 0); - - /* Set the timer and wait for it to expire. */ - Assert(set_timer(actx, 0)); - Assert(PQsocketPoll(actx->timerfd, 1, 0, deadline) > 0); - is(timer_expired(actx), 1, "timer is expired"); - - /* Register for read events and make the fd readable. */ - Assert(register_socket(NULL, rfd, in_event, actx, NULL) == 0); - Assert(write(wfd, "x", 1) == 1); - mux_is_ready(actx->mux, deadline, "when fd is readable and timer expired"); - - /* - * Draining the pipe should unset the multiplexer again, once the - * old event is drained and the timer is reset. - * - * Order matters, since comb_multiplexer() doesn't have to remove - * stale events when active events exist. Follow the call sequence - * used in the code: drain the timer expiration, drain the pipe, - * then clear the stale events. - */ - Assert(drain_timer_events(actx, &expired)); - Assert(drain_pipe(rfd, 1)); - Assert(comb_multiplexer(actx)); - - is(expired, 1, "drain_timer_events() reports expiration"); - is(timer_expired(actx), 0, "timer is no longer expired"); - mux_is_not_ready(actx->mux, "when fd is drained and timer reset"); - - /* Stop listening. */ - Assert(register_socket(NULL, rfd, CURL_POLL_REMOVE, actx, NULL) == 0); - - /* Undo any unidirectional emulation. */ - if (bidirectional) - Assert(drain_pipe(wfd, bidi_pipe_size)); - } - - /* Ensure comb_multiplexer() can handle multiple stale events. */ - { - int rfd2, - wfd2; - - /* Create a second local pipe. */ - Assert(pipe(pipefd) == 0); - rfd2 = pipefd[0]; - wfd2 = pipefd[1]; - - /* Make both rfds appear unidirectional if necessary. */ - if (bidirectional) - { - Assert((bidi_pipe_size = fill_pipe(rfd)) > 0); - Assert(fill_pipe(rfd2) == bidi_pipe_size); - } - - /* Register for read events on both fds, and make them readable. */ - Assert(register_socket(NULL, rfd, in_event, actx, NULL) == 0); - Assert(register_socket(NULL, rfd2, in_event, actx, NULL) == 0); - - Assert(write(wfd, "x", 1) == 1); - Assert(write(wfd2, "x", 1) == 1); - - mux_is_ready(actx->mux, deadline, "when two fds are readable"); - - /* - * Drain both fds. comb_multiplexer() should then ensure that the - * mux is no longer readable. - */ - Assert(drain_pipe(rfd, 1)); - Assert(drain_pipe(rfd2, 1)); - Assert(comb_multiplexer(actx)); - mux_is_not_ready(actx->mux, "when two fds are drained"); - - /* Stop listening. */ - Assert(register_socket(NULL, rfd, CURL_POLL_REMOVE, actx, NULL) == 0); - Assert(register_socket(NULL, rfd2, CURL_POLL_REMOVE, actx, NULL) == 0); - - /* Undo any unidirectional emulation. */ - if (bidirectional) - { - Assert(drain_pipe(wfd, bidi_pipe_size)); - Assert(drain_pipe(wfd2, bidi_pipe_size)); - } - - close(rfd2); - close(wfd2); - } - } - - close(rfd); - close(wfd); - free_test_actx(actx); -} - -int -main(int argc, char *argv[]) -{ - const char *timeout; - - /* Grab the default timeout. */ - timeout = getenv("PG_TEST_TIMEOUT_DEFAULT"); - if (timeout) - { - int timeout_s = atoi(timeout); - - if (timeout_s > 0) - timeout_us = timeout_s * 1000 * 1000; - } - - /* - * Set up line buffering for our output, to let stderr interleave in the - * log files. - */ - setvbuf(stdout, NULL, PG_IOLBF, 0); - - test_set_timer(); - test_register_socket(); - - printf("1..%d\n", num_tests); - return 0; -} - -#else /* !USE_ASSERT_CHECKING */ - -/* - * Skip the test suite when we don't have assertions. - */ -int -main(int argc, char *argv[]) -{ - printf("1..0 # skip: cassert is not enabled\n"); - - return 0; -} - -#endif /* USE_ASSERT_CHECKING */ From 138750dde4a8b939a30a6d650c6c8146192b39cb Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Fri, 8 Aug 2025 19:34:31 +0200 Subject: [PATCH 479/602] postgres_fdw and dblink should check if backend has MyProcPort before checking ->has_scram_keys. MyProcPort is NULL in background workers. So this could crash for example if a background worker accessed a suitable configured foreign table. Author: Alexander Pyhalov Reviewed-by: Peter Eisentraut Reviewed-by: Matheus Alcantara Discussion: https://www.postgresql.org/message-id/flat/27b29a35-9b96-46a9-bc1a-914140869dac%40gmail.com --- contrib/dblink/dblink.c | 8 ++++---- contrib/postgres_fdw/connection.c | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/contrib/dblink/dblink.c b/contrib/dblink/dblink.c index f98805fb5f735..0cf4c27f2e967 100644 --- a/contrib/dblink/dblink.c +++ b/contrib/dblink/dblink.c @@ -2643,7 +2643,7 @@ dblink_connstr_has_required_scram_options(const char *connstr) PQconninfoFree(options); } - has_scram_keys = has_scram_client_key && has_scram_server_key && MyProcPort->has_scram_keys; + has_scram_keys = has_scram_client_key && has_scram_server_key && MyProcPort != NULL && MyProcPort->has_scram_keys; return (has_scram_keys && has_require_auth); } @@ -2676,7 +2676,7 @@ dblink_security_check(PGconn *conn, const char *connname, const char *connstr) * only added if UseScramPassthrough is set, and the user is not allowed * to add the SCRAM keys on fdw and user mapping options. */ - if (MyProcPort->has_scram_keys && dblink_connstr_has_required_scram_options(connstr)) + if (MyProcPort != NULL && MyProcPort->has_scram_keys && dblink_connstr_has_required_scram_options(connstr)) return; #ifdef ENABLE_GSS @@ -2749,7 +2749,7 @@ dblink_connstr_check(const char *connstr) if (dblink_connstr_has_pw(connstr)) return; - if (MyProcPort->has_scram_keys && dblink_connstr_has_required_scram_options(connstr)) + if (MyProcPort != NULL && MyProcPort->has_scram_keys && dblink_connstr_has_required_scram_options(connstr)) return; #ifdef ENABLE_GSS @@ -2896,7 +2896,7 @@ get_connect_string(const char *servername) * the user overwrites these options we can ereport on * dblink_connstr_check and dblink_security_check. */ - if (MyProcPort->has_scram_keys && UseScramPassthrough(foreign_server, user_mapping)) + if (MyProcPort != NULL && MyProcPort->has_scram_keys && UseScramPassthrough(foreign_server, user_mapping)) appendSCRAMKeysInfo(&buf); foreach(cell, fdw->options) diff --git a/contrib/postgres_fdw/connection.c b/contrib/postgres_fdw/connection.c index e8148f2c5a223..4fbb6c182b82c 100644 --- a/contrib/postgres_fdw/connection.c +++ b/contrib/postgres_fdw/connection.c @@ -464,7 +464,7 @@ pgfdw_security_check(const char **keywords, const char **values, UserMapping *us * assume that UseScramPassthrough is also true since SCRAM options are * only set when UseScramPassthrough is enabled. */ - if (MyProcPort->has_scram_keys && pgfdw_has_required_scram_options(keywords, values)) + if (MyProcPort != NULL && MyProcPort->has_scram_keys && pgfdw_has_required_scram_options(keywords, values)) return; ereport(ERROR, @@ -570,7 +570,7 @@ connect_pg_server(ForeignServer *server, UserMapping *user) n++; /* Add required SCRAM pass-through connection options if it's enabled. */ - if (MyProcPort->has_scram_keys && UseScramPassthrough(server, user)) + if (MyProcPort != NULL && MyProcPort->has_scram_keys && UseScramPassthrough(server, user)) { int len; int encoded_len; @@ -748,7 +748,7 @@ check_conn_params(const char **keywords, const char **values, UserMapping *user) * assume that UseScramPassthrough is also true since SCRAM options are * only set when UseScramPassthrough is enabled. */ - if (MyProcPort->has_scram_keys && pgfdw_has_required_scram_options(keywords, values)) + if (MyProcPort != NULL && MyProcPort->has_scram_keys && pgfdw_has_required_scram_options(keywords, values)) return; ereport(ERROR, @@ -2559,7 +2559,7 @@ pgfdw_has_required_scram_options(const char **keywords, const char **values) } } - has_scram_keys = has_scram_client_key && has_scram_server_key && MyProcPort->has_scram_keys; + has_scram_keys = has_scram_client_key && has_scram_server_key && MyProcPort != NULL && MyProcPort->has_scram_keys; return (has_scram_keys && has_require_auth); } From dcfc0f891273eeeb85ce6e723decf5cc37f9b1c3 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Fri, 8 Aug 2025 22:05:05 +0200 Subject: [PATCH 480/602] Remove useless/superfluous Datum conversions Remove useless DatumGetFoo() and FooGetDatum() calls. These are places where no conversion from or to Datum was actually happening. We think these extra calls covered here were harmless. Some actual bugs that were discovered during this process have been committed separately (80c758a2e1d, 2242b26ce47). Reviewed-by: Tom Lane Discussion: https://www.postgresql.org/message-id/flat/8246d7ff-f4b7-4363-913e-827dadfeb145%40eisentraut.org --- src/backend/commands/alter.c | 2 +- src/backend/executor/execExprInterp.c | 6 +++--- src/backend/rewrite/rewriteDefine.c | 5 ++--- src/backend/statistics/extended_stats.c | 2 +- src/backend/tsearch/ts_parse.c | 4 ++-- src/backend/utils/activity/pgstat.c | 2 +- src/backend/utils/adt/json.c | 2 +- src/backend/utils/adt/multirangetypes.c | 7 +++---- src/backend/utils/adt/rangetypes.c | 4 ++-- src/backend/utils/adt/varlena.c | 5 ++--- src/backend/utils/cache/relcache.c | 2 +- 11 files changed, 19 insertions(+), 22 deletions(-) diff --git a/src/backend/commands/alter.c b/src/backend/commands/alter.c index c801c869c1cfc..cb75e11fced62 100644 --- a/src/backend/commands/alter.c +++ b/src/backend/commands/alter.c @@ -220,7 +220,7 @@ AlterObjectRename_internal(Relation rel, Oid objectId, const char *new_name) Assert(!isnull); ownerId = DatumGetObjectId(datum); - if (!has_privs_of_role(GetUserId(), DatumGetObjectId(ownerId))) + if (!has_privs_of_role(GetUserId(), ownerId)) aclcheck_error(ACLCHECK_NOT_OWNER, get_object_type(classId, objectId), old_name); diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c index 1a37737d4a235..a5cfe246e6380 100644 --- a/src/backend/executor/execExprInterp.c +++ b/src/backend/executor/execExprInterp.c @@ -2815,7 +2815,7 @@ ExecJustHashVarImpl(ExprState *state, TupleTableSlot *slot, bool *isnull) *isnull = false; if (!fcinfo->args[0].isnull) - return DatumGetUInt32(hashop->d.hashdatum.fn_addr(fcinfo)); + return hashop->d.hashdatum.fn_addr(fcinfo); else return (Datum) 0; } @@ -2849,7 +2849,7 @@ ExecJustHashVarVirtImpl(ExprState *state, TupleTableSlot *slot, bool *isnull) *isnull = false; if (!fcinfo->args[0].isnull) - return DatumGetUInt32(hashop->d.hashdatum.fn_addr(fcinfo)); + return hashop->d.hashdatum.fn_addr(fcinfo); else return (Datum) 0; } @@ -2892,7 +2892,7 @@ ExecJustHashOuterVarStrict(ExprState *state, ExprContext *econtext, if (!fcinfo->args[0].isnull) { *isnull = false; - return DatumGetUInt32(hashop->d.hashdatum.fn_addr(fcinfo)); + return hashop->d.hashdatum.fn_addr(fcinfo); } else { diff --git a/src/backend/rewrite/rewriteDefine.c b/src/backend/rewrite/rewriteDefine.c index 8aa90b0d6fb75..a96fbdc1ddd64 100644 --- a/src/backend/rewrite/rewriteDefine.c +++ b/src/backend/rewrite/rewriteDefine.c @@ -725,10 +725,9 @@ EnableDisableRule(Relation rel, const char *rulename, /* * Change ev_enabled if it is different from the desired new state. */ - if (DatumGetChar(ruleform->ev_enabled) != - fires_when) + if (ruleform->ev_enabled != fires_when) { - ruleform->ev_enabled = CharGetDatum(fires_when); + ruleform->ev_enabled = fires_when; CatalogTupleUpdate(pg_rewrite_desc, &ruletup->t_self, ruletup); changed = true; diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c index a8b63ec0884a9..3e031cf831a26 100644 --- a/src/backend/statistics/extended_stats.c +++ b/src/backend/statistics/extended_stats.c @@ -2618,7 +2618,7 @@ make_build_data(Relation rel, StatExtEntry *stat, int numrows, HeapTuple *rows, } else { - result->values[idx][i] = (Datum) datum; + result->values[idx][i] = datum; result->nulls[idx][i] = false; } diff --git a/src/backend/tsearch/ts_parse.c b/src/backend/tsearch/ts_parse.c index e5da6cf17ec19..cba421892bf45 100644 --- a/src/backend/tsearch/ts_parse.c +++ b/src/backend/tsearch/ts_parse.c @@ -218,7 +218,7 @@ LexizeExec(LexizeData *ld, ParsedLex **correspondLexem) * position and go to multiword mode */ - ld->curDictId = DatumGetObjectId(map->dictIds[i]); + ld->curDictId = map->dictIds[i]; ld->posDict = i + 1; ld->curSub = curVal->next; if (res) @@ -275,7 +275,7 @@ LexizeExec(LexizeData *ld, ParsedLex **correspondLexem) * dictionaries ? */ for (i = 0; i < map->len && !dictExists; i++) - if (ld->curDictId == DatumGetObjectId(map->dictIds[i])) + if (ld->curDictId == map->dictIds[i]) dictExists = true; if (!dictExists) diff --git a/src/backend/utils/activity/pgstat.c b/src/backend/utils/activity/pgstat.c index 6bc91ce0dadda..ffb5b8cce3441 100644 --- a/src/backend/utils/activity/pgstat.c +++ b/src/backend/utils/activity/pgstat.c @@ -821,7 +821,7 @@ pgstat_force_next_flush(void) static bool match_db_entries(PgStatShared_HashEntry *entry, Datum match_data) { - return entry->key.dboid == DatumGetObjectId(MyDatabaseId); + return entry->key.dboid == MyDatabaseId; } /* diff --git a/src/backend/utils/adt/json.c b/src/backend/utils/adt/json.c index 51452755f5868..e9d370cb3da8e 100644 --- a/src/backend/utils/adt/json.c +++ b/src/backend/utils/adt/json.c @@ -904,7 +904,7 @@ json_unique_hash(const void *key, Size keysize) hash ^= hash_bytes((const unsigned char *) entry->key, entry->key_len); - return DatumGetUInt32(hash); + return hash; } static int diff --git a/src/backend/utils/adt/multirangetypes.c b/src/backend/utils/adt/multirangetypes.c index 46f2ec0c29fbd..e64feeaeccb7c 100644 --- a/src/backend/utils/adt/multirangetypes.c +++ b/src/backend/utils/adt/multirangetypes.c @@ -2082,15 +2082,14 @@ range_overleft_multirange_internal(TypeCacheEntry *rangetyp, bool empty; if (RangeIsEmpty(r) || MultirangeIsEmpty(mr)) - PG_RETURN_BOOL(false); - + return false; range_deserialize(rangetyp, r, &lower1, &upper1, &empty); Assert(!empty); multirange_get_bounds(rangetyp, mr, mr->rangeCount - 1, &lower2, &upper2); - PG_RETURN_BOOL(range_cmp_bounds(rangetyp, &upper1, &upper2) <= 0); + return (range_cmp_bounds(rangetyp, &upper1, &upper2) <= 0); } Datum @@ -2167,7 +2166,7 @@ range_overright_multirange_internal(TypeCacheEntry *rangetyp, bool empty; if (RangeIsEmpty(r) || MultirangeIsEmpty(mr)) - PG_RETURN_BOOL(false); + return false; range_deserialize(rangetyp, r, &lower1, &upper1, &empty); Assert(!empty); diff --git a/src/backend/utils/adt/rangetypes.c b/src/backend/utils/adt/rangetypes.c index c83b239b3bb28..254b8f65c21a6 100644 --- a/src/backend/utils/adt/rangetypes.c +++ b/src/backend/utils/adt/rangetypes.c @@ -1075,8 +1075,8 @@ range_union_internal(TypeCacheEntry *typcache, RangeType *r1, RangeType *r2, return r1; if (strict && - !DatumGetBool(range_overlaps_internal(typcache, r1, r2)) && - !DatumGetBool(range_adjacent_internal(typcache, r1, r2))) + !range_overlaps_internal(typcache, r1, r2) && + !range_adjacent_internal(typcache, r1, r2)) ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), errmsg("result of range union would not be contiguous"))); diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index ffae8c23abfaf..11b442a5941c9 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -408,13 +408,12 @@ text_length(Datum str) { /* fastpath when max encoding length is one */ if (pg_database_encoding_max_length() == 1) - PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ); + return (toast_raw_datum_size(str) - VARHDRSZ); else { text *t = DatumGetTextPP(str); - PG_RETURN_INT32(pg_mbstrlen_with_len(VARDATA_ANY(t), - VARSIZE_ANY_EXHDR(t))); + return (pg_mbstrlen_with_len(VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t))); } } diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index 559ba9cdb2cde..153d2fde6fd3d 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -3184,7 +3184,7 @@ AssertPendingSyncs_RelationCache(void) if ((LockTagType) locallock->tag.lock.locktag_type != LOCKTAG_RELATION) continue; - relid = ObjectIdGetDatum(locallock->tag.lock.locktag_field2); + relid = locallock->tag.lock.locktag_field2; r = RelationIdGetRelation(relid); if (!RelationIsValid(r)) continue; From ff89e182d42048380dba32fee1b491893c7b4bec Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Fri, 8 Aug 2025 22:05:05 +0200 Subject: [PATCH 481/602] Add missing Datum conversions Add various missing conversions from and to Datum. The previous code mostly relied on implicit conversions or its own explicit casts instead of using the correct DatumGet*() or *GetDatum() functions. We think these omissions are harmless. Some actual bugs that were discovered during this process have been committed separately (80c758a2e1d, fd2ab03fea2). Reviewed-by: Tom Lane Discussion: https://www.postgresql.org/message-id/flat/8246d7ff-f4b7-4363-913e-827dadfeb145%40eisentraut.org --- contrib/btree_gist/btree_enum.c | 4 ++-- contrib/btree_gist/btree_numeric.c | 2 +- contrib/btree_gist/btree_utils_num.c | 22 ++++++++++----------- contrib/intarray/_int_op.c | 2 +- contrib/pageinspect/heapfuncs.c | 2 +- contrib/pgrowlocks/pgrowlocks.c | 4 ++-- contrib/seg/seg.c | 4 ++-- src/backend/access/brin/brin.c | 4 ++-- src/backend/access/brin/brin_bloom.c | 2 +- src/backend/access/brin/brin_minmax.c | 10 +++++----- src/backend/access/brin/brin_minmax_multi.c | 6 +++--- src/backend/access/common/heaptuple.c | 2 +- src/backend/access/common/toast_internals.c | 4 ++-- src/backend/catalog/objectaddress.c | 4 ++-- src/backend/catalog/pg_proc.c | 2 +- src/backend/catalog/pg_publication.c | 2 +- src/backend/catalog/pg_shdepend.c | 12 +++++------ src/backend/commands/event_trigger.c | 4 ++-- src/backend/commands/subscriptioncmds.c | 8 ++++---- src/backend/commands/tablecmds.c | 2 +- src/backend/commands/trigger.c | 2 +- src/backend/commands/tsearchcmds.c | 8 ++++---- src/backend/commands/user.c | 6 +++--- src/backend/executor/execExprInterp.c | 2 +- src/backend/statistics/attribute_stats.c | 6 +++--- src/backend/storage/aio/aio_funcs.c | 2 +- src/backend/storage/buffer/bufmgr.c | 4 ++-- src/backend/storage/ipc/shmem.c | 2 +- src/backend/storage/lmgr/lock.c | 4 ++-- src/backend/utils/adt/datum.c | 6 +++--- src/backend/utils/adt/jsonpath_exec.c | 2 +- src/backend/utils/adt/lockfuncs.c | 8 ++++---- src/backend/utils/adt/multirangetypes.c | 12 +++++------ src/backend/utils/adt/rangetypes.c | 8 ++++---- src/backend/utils/adt/rangetypes_spgist.c | 2 +- src/backend/utils/adt/rowtypes.c | 4 ++-- src/backend/utils/adt/waitfuncs.c | 2 +- src/backend/utils/cache/attoptcache.c | 2 +- src/backend/utils/cache/lsyscache.c | 2 +- src/backend/utils/cache/relcache.c | 2 +- src/backend/utils/cache/syscache.c | 6 +++--- src/backend/utils/sort/sortsupport.c | 2 +- src/backend/utils/sort/tuplesortvariants.c | 6 +++--- src/pl/plperl/plperl.c | 10 +++++----- src/test/regress/regress.c | 2 +- 45 files changed, 107 insertions(+), 107 deletions(-) diff --git a/contrib/btree_gist/btree_enum.c b/contrib/btree_gist/btree_enum.c index 83c95c7bb0401..c54cf3c7bae87 100644 --- a/contrib/btree_gist/btree_enum.c +++ b/contrib/btree_gist/btree_enum.c @@ -193,8 +193,8 @@ gbt_enum_ssup_cmp(Datum x, Datum y, SortSupport ssup) return DatumGetInt32(CallerFInfoFunctionCall2(enum_cmp, ssup->ssup_extra, InvalidOid, - arg1->lower, - arg2->lower)); + ObjectIdGetDatum(arg1->lower), + ObjectIdGetDatum(arg2->lower))); } Datum diff --git a/contrib/btree_gist/btree_numeric.c b/contrib/btree_gist/btree_numeric.c index a39c05d9da1cf..052f27b07949e 100644 --- a/contrib/btree_gist/btree_numeric.c +++ b/contrib/btree_gist/btree_numeric.c @@ -192,7 +192,7 @@ gbt_numeric_penalty(PG_FUNCTION_ARGS) *result = 0.0; - if (DirectFunctionCall2(numeric_gt, NumericGetDatum(ds), NumericGetDatum(nul))) + if (DatumGetBool(DirectFunctionCall2(numeric_gt, NumericGetDatum(ds), NumericGetDatum(nul)))) { *result += FLT_MIN; os = DatumGetNumeric(DirectFunctionCall2(numeric_div, diff --git a/contrib/btree_gist/btree_utils_num.c b/contrib/btree_gist/btree_utils_num.c index 346ee837d75f4..446fa930b92c4 100644 --- a/contrib/btree_gist/btree_utils_num.c +++ b/contrib/btree_gist/btree_utils_num.c @@ -119,38 +119,38 @@ gbt_num_fetch(GISTENTRY *entry, const gbtree_ninfo *tinfo) switch (tinfo->t) { case gbt_t_bool: - datum = BoolGetDatum(*(bool *) entry->key); + datum = BoolGetDatum(*(bool *) DatumGetPointer(entry->key)); break; case gbt_t_int2: - datum = Int16GetDatum(*(int16 *) entry->key); + datum = Int16GetDatum(*(int16 *) DatumGetPointer(entry->key)); break; case gbt_t_int4: - datum = Int32GetDatum(*(int32 *) entry->key); + datum = Int32GetDatum(*(int32 *) DatumGetPointer(entry->key)); break; case gbt_t_int8: - datum = Int64GetDatum(*(int64 *) entry->key); + datum = Int64GetDatum(*(int64 *) DatumGetPointer(entry->key)); break; case gbt_t_oid: case gbt_t_enum: - datum = ObjectIdGetDatum(*(Oid *) entry->key); + datum = ObjectIdGetDatum(*(Oid *) DatumGetPointer(entry->key)); break; case gbt_t_float4: - datum = Float4GetDatum(*(float4 *) entry->key); + datum = Float4GetDatum(*(float4 *) DatumGetPointer(entry->key)); break; case gbt_t_float8: - datum = Float8GetDatum(*(float8 *) entry->key); + datum = Float8GetDatum(*(float8 *) DatumGetPointer(entry->key)); break; case gbt_t_date: - datum = DateADTGetDatum(*(DateADT *) entry->key); + datum = DateADTGetDatum(*(DateADT *) DatumGetPointer(entry->key)); break; case gbt_t_time: - datum = TimeADTGetDatum(*(TimeADT *) entry->key); + datum = TimeADTGetDatum(*(TimeADT *) DatumGetPointer(entry->key)); break; case gbt_t_ts: - datum = TimestampGetDatum(*(Timestamp *) entry->key); + datum = TimestampGetDatum(*(Timestamp *) DatumGetPointer(entry->key)); break; case gbt_t_cash: - datum = CashGetDatum(*(Cash *) entry->key); + datum = CashGetDatum(*(Cash *) DatumGetPointer(entry->key)); break; default: datum = entry->key; diff --git a/contrib/intarray/_int_op.c b/contrib/intarray/_int_op.c index ba6d0a99995ed..a706e353c6f94 100644 --- a/contrib/intarray/_int_op.c +++ b/contrib/intarray/_int_op.c @@ -108,7 +108,7 @@ _int_overlap(PG_FUNCTION_ARGS) CHECKARRVALID(a); CHECKARRVALID(b); if (ARRISEMPTY(a) || ARRISEMPTY(b)) - return false; + PG_RETURN_BOOL(false); SORT(a); SORT(b); diff --git a/contrib/pageinspect/heapfuncs.c b/contrib/pageinspect/heapfuncs.c index 377ae30d1fa71..c13f07655c441 100644 --- a/contrib/pageinspect/heapfuncs.c +++ b/contrib/pageinspect/heapfuncs.c @@ -256,7 +256,7 @@ heap_page_items(PG_FUNCTION_ARGS) nulls[11] = true; if (tuphdr->t_infomask & HEAP_HASOID_OLD) - values[12] = HeapTupleHeaderGetOidOld(tuphdr); + values[12] = ObjectIdGetDatum(HeapTupleHeaderGetOidOld(tuphdr)); else nulls[12] = true; diff --git a/contrib/pgrowlocks/pgrowlocks.c b/contrib/pgrowlocks/pgrowlocks.c index b75d80fa7a9c2..f88269332b6be 100644 --- a/contrib/pgrowlocks/pgrowlocks.c +++ b/contrib/pgrowlocks/pgrowlocks.c @@ -141,8 +141,8 @@ pgrowlocks(PG_FUNCTION_ARGS) */ if (htsu == TM_BeingModified) { - values[Atnum_tid] = (char *) DirectFunctionCall1(tidout, - PointerGetDatum(&tuple->t_self)); + values[Atnum_tid] = DatumGetCString(DirectFunctionCall1(tidout, + PointerGetDatum(&tuple->t_self))); values[Atnum_xmax] = palloc(NCHARS * sizeof(char)); snprintf(values[Atnum_xmax], NCHARS, "%u", xmax); diff --git a/contrib/seg/seg.c b/contrib/seg/seg.c index 151cbb954b9a1..b5de2a5e1be3f 100644 --- a/contrib/seg/seg.c +++ b/contrib/seg/seg.c @@ -417,7 +417,7 @@ gseg_same(PG_FUNCTION_ARGS) { bool *result = (bool *) PG_GETARG_POINTER(2); - if (DirectFunctionCall2(seg_same, PG_GETARG_DATUM(0), PG_GETARG_DATUM(1))) + if (DatumGetBool(DirectFunctionCall2(seg_same, PG_GETARG_DATUM(0), PG_GETARG_DATUM(1)))) *result = true; else *result = false; @@ -470,7 +470,7 @@ gseg_leaf_consistent(Datum key, Datum query, StrategyNumber strategy) retval = DirectFunctionCall2(seg_contained, key, query); break; default: - retval = false; + retval = BoolGetDatum(false); } PG_RETURN_DATUM(retval); diff --git a/src/backend/access/brin/brin.c b/src/backend/access/brin/brin.c index 4204088fa0d7d..7ff7467e462b0 100644 --- a/src/backend/access/brin/brin.c +++ b/src/backend/access/brin/brin.c @@ -1608,7 +1608,7 @@ brin_build_desc(Relation rel) opcInfoFn = index_getprocinfo(rel, keyno + 1, BRIN_PROCNUM_OPCINFO); opcinfo[keyno] = (BrinOpcInfo *) - DatumGetPointer(FunctionCall1(opcInfoFn, attr->atttypid)); + DatumGetPointer(FunctionCall1(opcInfoFn, ObjectIdGetDatum(attr->atttypid))); totalstored += opcinfo[keyno]->oi_nstored; } @@ -2262,7 +2262,7 @@ add_values_to_range(Relation idxRel, BrinDesc *bdesc, BrinMemTuple *dtup, PointerGetDatum(bdesc), PointerGetDatum(bval), values[keyno], - nulls[keyno]); + BoolGetDatum(nulls[keyno])); /* if that returned true, we need to insert the updated tuple */ modified |= DatumGetBool(result); diff --git a/src/backend/access/brin/brin_bloom.c b/src/backend/access/brin/brin_bloom.c index 82b425ce37daa..7c3f7d454fc25 100644 --- a/src/backend/access/brin/brin_bloom.c +++ b/src/backend/access/brin/brin_bloom.c @@ -540,7 +540,7 @@ brin_bloom_add_value(PG_FUNCTION_ARGS) BrinDesc *bdesc = (BrinDesc *) PG_GETARG_POINTER(0); BrinValues *column = (BrinValues *) PG_GETARG_POINTER(1); Datum newval = PG_GETARG_DATUM(2); - bool isnull PG_USED_FOR_ASSERTS_ONLY = PG_GETARG_DATUM(3); + bool isnull PG_USED_FOR_ASSERTS_ONLY = PG_GETARG_BOOL(3); BloomOptions *opts = (BloomOptions *) PG_GET_OPCLASS_OPTIONS(); Oid colloid = PG_GET_COLLATION(); FmgrInfo *hashFn; diff --git a/src/backend/access/brin/brin_minmax.c b/src/backend/access/brin/brin_minmax.c index d21ab3a668cce..79c5a0aa18578 100644 --- a/src/backend/access/brin/brin_minmax.c +++ b/src/backend/access/brin/brin_minmax.c @@ -66,7 +66,7 @@ brin_minmax_add_value(PG_FUNCTION_ARGS) BrinDesc *bdesc = (BrinDesc *) PG_GETARG_POINTER(0); BrinValues *column = (BrinValues *) PG_GETARG_POINTER(1); Datum newval = PG_GETARG_DATUM(2); - bool isnull PG_USED_FOR_ASSERTS_ONLY = PG_GETARG_DATUM(3); + bool isnull PG_USED_FOR_ASSERTS_ONLY = PG_GETARG_BOOL(3); Oid colloid = PG_GET_COLLATION(); FmgrInfo *cmpFn; Datum compar; @@ -225,8 +225,8 @@ brin_minmax_union(PG_FUNCTION_ARGS) /* Adjust minimum, if B's min is less than A's min */ finfo = minmax_get_strategy_procinfo(bdesc, attno, attr->atttypid, BTLessStrategyNumber); - needsadj = FunctionCall2Coll(finfo, colloid, col_b->bv_values[0], - col_a->bv_values[0]); + needsadj = DatumGetBool(FunctionCall2Coll(finfo, colloid, col_b->bv_values[0], + col_a->bv_values[0])); if (needsadj) { if (!attr->attbyval) @@ -238,8 +238,8 @@ brin_minmax_union(PG_FUNCTION_ARGS) /* Adjust maximum, if B's max is greater than A's max */ finfo = minmax_get_strategy_procinfo(bdesc, attno, attr->atttypid, BTGreaterStrategyNumber); - needsadj = FunctionCall2Coll(finfo, colloid, col_b->bv_values[1], - col_a->bv_values[1]); + needsadj = DatumGetBool(FunctionCall2Coll(finfo, colloid, col_b->bv_values[1], + col_a->bv_values[1])); if (needsadj) { if (!attr->attbyval) diff --git a/src/backend/access/brin/brin_minmax_multi.c b/src/backend/access/brin/brin_minmax_multi.c index a5a414182caa6..c87f1b9cd7eb2 100644 --- a/src/backend/access/brin/brin_minmax_multi.c +++ b/src/backend/access/brin/brin_minmax_multi.c @@ -1992,8 +1992,8 @@ brin_minmax_multi_distance_tid(PG_FUNCTION_ARGS) double da1, da2; - ItemPointer pa1 = (ItemPointer) PG_GETARG_DATUM(0); - ItemPointer pa2 = (ItemPointer) PG_GETARG_DATUM(1); + ItemPointer pa1 = (ItemPointer) PG_GETARG_POINTER(0); + ItemPointer pa2 = (ItemPointer) PG_GETARG_POINTER(1); /* * We know the values are range boundaries, but the range may be collapsed @@ -2414,7 +2414,7 @@ brin_minmax_multi_add_value(PG_FUNCTION_ARGS) BrinDesc *bdesc = (BrinDesc *) PG_GETARG_POINTER(0); BrinValues *column = (BrinValues *) PG_GETARG_POINTER(1); Datum newval = PG_GETARG_DATUM(2); - bool isnull PG_USED_FOR_ASSERTS_ONLY = PG_GETARG_DATUM(3); + bool isnull PG_USED_FOR_ASSERTS_ONLY = PG_GETARG_BOOL(3); MinMaxMultiOptions *opts = (MinMaxMultiOptions *) PG_GET_OPCLASS_OPTIONS(); Oid colloid = PG_GET_COLLATION(); bool modified = false; diff --git a/src/backend/access/common/heaptuple.c b/src/backend/access/common/heaptuple.c index a410b5eb99b99..1173a6d81b5ed 100644 --- a/src/backend/access/common/heaptuple.c +++ b/src/backend/access/common/heaptuple.c @@ -105,7 +105,7 @@ missing_hash(const void *key, Size keysize) { const missing_cache_key *entry = (missing_cache_key *) key; - return hash_bytes((const unsigned char *) entry->value, entry->len); + return hash_bytes((const unsigned char *) DatumGetPointer(entry->value), entry->len); } static int diff --git a/src/backend/access/common/toast_internals.c b/src/backend/access/common/toast_internals.c index 196e06115e936..a1d0eed8953ba 100644 --- a/src/backend/access/common/toast_internals.c +++ b/src/backend/access/common/toast_internals.c @@ -64,11 +64,11 @@ toast_compress_datum(Datum value, char cmethod) switch (cmethod) { case TOAST_PGLZ_COMPRESSION: - tmp = pglz_compress_datum((const struct varlena *) value); + tmp = pglz_compress_datum((const struct varlena *) DatumGetPointer(value)); cmid = TOAST_PGLZ_COMPRESSION_ID; break; case TOAST_LZ4_COMPRESSION: - tmp = lz4_compress_datum((const struct varlena *) value); + tmp = lz4_compress_datum((const struct varlena *) DatumGetPointer(value)); cmid = TOAST_LZ4_COMPRESSION_ID; break; default: diff --git a/src/backend/catalog/objectaddress.c b/src/backend/catalog/objectaddress.c index b63fd57dc04bb..0102c9984e7db 100644 --- a/src/backend/catalog/objectaddress.c +++ b/src/backend/catalog/objectaddress.c @@ -4283,8 +4283,8 @@ pg_identify_object(PG_FUNCTION_ARGS) nspAttnum = get_object_attnum_namespace(address.classId); if (nspAttnum != InvalidAttrNumber) { - schema_oid = heap_getattr(objtup, nspAttnum, - RelationGetDescr(catalog), &isnull); + schema_oid = DatumGetObjectId(heap_getattr(objtup, nspAttnum, + RelationGetDescr(catalog), &isnull)); if (isnull) elog(ERROR, "invalid null namespace in object %u/%u/%d", address.classId, address.objectId, address.objectSubId); diff --git a/src/backend/catalog/pg_proc.c b/src/backend/catalog/pg_proc.c index 5fdcf24d5f8de..75b17fed15e5b 100644 --- a/src/backend/catalog/pg_proc.c +++ b/src/backend/catalog/pg_proc.c @@ -1212,6 +1212,6 @@ oid_array_to_list(Datum datum) deconstruct_array_builtin(array, OIDOID, &values, NULL, &nelems); for (i = 0; i < nelems; i++) - result = lappend_oid(result, values[i]); + result = lappend_oid(result, DatumGetObjectId(values[i])); return result; } diff --git a/src/backend/catalog/pg_publication.c b/src/backend/catalog/pg_publication.c index d6f94db5d999b..b911efcf9cb6a 100644 --- a/src/backend/catalog/pg_publication.c +++ b/src/backend/catalog/pg_publication.c @@ -1001,7 +1001,7 @@ GetSchemaPublicationRelations(Oid schemaid, PublicationPartOpt pub_partopt) ScanKeyInit(&key[0], Anum_pg_class_relnamespace, BTEqualStrategyNumber, F_OIDEQ, - schemaid); + ObjectIdGetDatum(schemaid)); /* get all the relations present in the specified schema */ scan = table_beginscan_catalog(classRel, 1, key); diff --git a/src/backend/catalog/pg_shdepend.c b/src/backend/catalog/pg_shdepend.c index 536191284e803..32e544da28a13 100644 --- a/src/backend/catalog/pg_shdepend.c +++ b/src/backend/catalog/pg_shdepend.c @@ -956,12 +956,12 @@ copyTemplateDependencies(Oid templateDbId, Oid newDbId) shdep = (Form_pg_shdepend) GETSTRUCT(tup); slot[slot_stored_count]->tts_values[Anum_pg_shdepend_dbid - 1] = ObjectIdGetDatum(newDbId); - slot[slot_stored_count]->tts_values[Anum_pg_shdepend_classid - 1] = shdep->classid; - slot[slot_stored_count]->tts_values[Anum_pg_shdepend_objid - 1] = shdep->objid; - slot[slot_stored_count]->tts_values[Anum_pg_shdepend_objsubid - 1] = shdep->objsubid; - slot[slot_stored_count]->tts_values[Anum_pg_shdepend_refclassid - 1] = shdep->refclassid; - slot[slot_stored_count]->tts_values[Anum_pg_shdepend_refobjid - 1] = shdep->refobjid; - slot[slot_stored_count]->tts_values[Anum_pg_shdepend_deptype - 1] = shdep->deptype; + slot[slot_stored_count]->tts_values[Anum_pg_shdepend_classid - 1] = ObjectIdGetDatum(shdep->classid); + slot[slot_stored_count]->tts_values[Anum_pg_shdepend_objid - 1] = ObjectIdGetDatum(shdep->objid); + slot[slot_stored_count]->tts_values[Anum_pg_shdepend_objsubid - 1] = Int32GetDatum(shdep->objsubid); + slot[slot_stored_count]->tts_values[Anum_pg_shdepend_refclassid - 1] = ObjectIdGetDatum(shdep->refclassid); + slot[slot_stored_count]->tts_values[Anum_pg_shdepend_refobjid - 1] = ObjectIdGetDatum(shdep->refobjid); + slot[slot_stored_count]->tts_values[Anum_pg_shdepend_deptype - 1] = CharGetDatum(shdep->deptype); ExecStoreVirtualTuple(slot[slot_stored_count]); slot_stored_count++; diff --git a/src/backend/commands/event_trigger.c b/src/backend/commands/event_trigger.c index edc2c988e2934..631fb0525f1e7 100644 --- a/src/backend/commands/event_trigger.c +++ b/src/backend/commands/event_trigger.c @@ -2021,8 +2021,8 @@ pg_event_trigger_ddl_commands(PG_FUNCTION_ARGS) elog(ERROR, "cache lookup failed for object %u/%u", addr.classId, addr.objectId); schema_oid = - heap_getattr(objtup, nspAttnum, - RelationGetDescr(catalog), &isnull); + DatumGetObjectId(heap_getattr(objtup, nspAttnum, + RelationGetDescr(catalog), &isnull)); if (isnull) elog(ERROR, "invalid null namespace in object %u/%u/%d", diff --git a/src/backend/commands/subscriptioncmds.c b/src/backend/commands/subscriptioncmds.c index cd6c3684482f9..faa3650d287de 100644 --- a/src/backend/commands/subscriptioncmds.c +++ b/src/backend/commands/subscriptioncmds.c @@ -638,7 +638,7 @@ CreateSubscription(ParseState *pstate, CreateSubscriptionStmt *stmt, /* Check if name is used */ subid = GetSysCacheOid2(SUBSCRIPTIONNAME, Anum_pg_subscription_oid, - MyDatabaseId, CStringGetDatum(stmt->subname)); + ObjectIdGetDatum(MyDatabaseId), CStringGetDatum(stmt->subname)); if (OidIsValid(subid)) { ereport(ERROR, @@ -1185,7 +1185,7 @@ AlterSubscription(ParseState *pstate, AlterSubscriptionStmt *stmt, rel = table_open(SubscriptionRelationId, RowExclusiveLock); /* Fetch the existing tuple. */ - tup = SearchSysCacheCopy2(SUBSCRIPTIONNAME, MyDatabaseId, + tup = SearchSysCacheCopy2(SUBSCRIPTIONNAME, ObjectIdGetDatum(MyDatabaseId), CStringGetDatum(stmt->subname)); if (!HeapTupleIsValid(tup)) @@ -1808,7 +1808,7 @@ DropSubscription(DropSubscriptionStmt *stmt, bool isTopLevel) */ rel = table_open(SubscriptionRelationId, AccessExclusiveLock); - tup = SearchSysCache2(SUBSCRIPTIONNAME, MyDatabaseId, + tup = SearchSysCache2(SUBSCRIPTIONNAME, ObjectIdGetDatum(MyDatabaseId), CStringGetDatum(stmt->subname)); if (!HeapTupleIsValid(tup)) @@ -2193,7 +2193,7 @@ AlterSubscriptionOwner(const char *name, Oid newOwnerId) rel = table_open(SubscriptionRelationId, RowExclusiveLock); - tup = SearchSysCacheCopy2(SUBSCRIPTIONNAME, MyDatabaseId, + tup = SearchSysCacheCopy2(SUBSCRIPTIONNAME, ObjectIdGetDatum(MyDatabaseId), CStringGetDatum(name)); if (!HeapTupleIsValid(tup)) diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index cb811520c2959..c6dd2e020da23 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -8985,7 +8985,7 @@ ATExecSetStatistics(Relation rel, const char *colName, int16 colNum, Node *newVa memset(repl_null, false, sizeof(repl_null)); memset(repl_repl, false, sizeof(repl_repl)); if (!newtarget_default) - repl_val[Anum_pg_attribute_attstattarget - 1] = newtarget; + repl_val[Anum_pg_attribute_attstattarget - 1] = Int16GetDatum(newtarget); else repl_null[Anum_pg_attribute_attstattarget - 1] = true; repl_repl[Anum_pg_attribute_attstattarget - 1] = true; diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c index 71014c65b4950..235533ac17f78 100644 --- a/src/backend/commands/trigger.c +++ b/src/backend/commands/trigger.c @@ -872,7 +872,7 @@ CreateTriggerFiringOn(CreateTrigStmt *stmt, const char *queryString, CStringGetDatum(trigname)); values[Anum_pg_trigger_tgfoid - 1] = ObjectIdGetDatum(funcoid); values[Anum_pg_trigger_tgtype - 1] = Int16GetDatum(tgtype); - values[Anum_pg_trigger_tgenabled - 1] = trigger_fires_when; + values[Anum_pg_trigger_tgenabled - 1] = CharGetDatum(trigger_fires_when); values[Anum_pg_trigger_tgisinternal - 1] = BoolGetDatum(isInternal); values[Anum_pg_trigger_tgconstrrelid - 1] = ObjectIdGetDatum(constrrelid); values[Anum_pg_trigger_tgconstrindid - 1] = ObjectIdGetDatum(indexOid); diff --git a/src/backend/commands/tsearchcmds.c b/src/backend/commands/tsearchcmds.c index ab16d42ad56ba..dc7df736fb826 100644 --- a/src/backend/commands/tsearchcmds.c +++ b/src/backend/commands/tsearchcmds.c @@ -1058,10 +1058,10 @@ DefineTSConfiguration(List *names, List *parameters, ObjectAddress *copied) memset(slot[slot_stored_count]->tts_isnull, false, slot[slot_stored_count]->tts_tupleDescriptor->natts * sizeof(bool)); - slot[slot_stored_count]->tts_values[Anum_pg_ts_config_map_mapcfg - 1] = cfgOid; - slot[slot_stored_count]->tts_values[Anum_pg_ts_config_map_maptokentype - 1] = cfgmap->maptokentype; - slot[slot_stored_count]->tts_values[Anum_pg_ts_config_map_mapseqno - 1] = cfgmap->mapseqno; - slot[slot_stored_count]->tts_values[Anum_pg_ts_config_map_mapdict - 1] = cfgmap->mapdict; + slot[slot_stored_count]->tts_values[Anum_pg_ts_config_map_mapcfg - 1] = ObjectIdGetDatum(cfgOid); + slot[slot_stored_count]->tts_values[Anum_pg_ts_config_map_maptokentype - 1] = Int32GetDatum(cfgmap->maptokentype); + slot[slot_stored_count]->tts_values[Anum_pg_ts_config_map_mapseqno - 1] = Int32GetDatum(cfgmap->mapseqno); + slot[slot_stored_count]->tts_values[Anum_pg_ts_config_map_mapdict - 1] = ObjectIdGetDatum(cfgmap->mapdict); ExecStoreVirtualTuple(slot[slot_stored_count]); slot_stored_count++; diff --git a/src/backend/commands/user.c b/src/backend/commands/user.c index 0d638e29d0066..1e3d4ab0e20e7 100644 --- a/src/backend/commands/user.c +++ b/src/backend/commands/user.c @@ -1924,7 +1924,7 @@ AddRoleMems(Oid currentUserId, const char *rolename, Oid roleid, */ if ((popt->specified & GRANT_ROLE_SPECIFIED_INHERIT) != 0) new_record[Anum_pg_auth_members_inherit_option - 1] = - popt->inherit; + BoolGetDatum(popt->inherit); else { HeapTuple mrtup; @@ -1935,14 +1935,14 @@ AddRoleMems(Oid currentUserId, const char *rolename, Oid roleid, elog(ERROR, "cache lookup failed for role %u", memberid); mrform = (Form_pg_authid) GETSTRUCT(mrtup); new_record[Anum_pg_auth_members_inherit_option - 1] = - mrform->rolinherit; + BoolGetDatum(mrform->rolinherit); ReleaseSysCache(mrtup); } /* get an OID for the new row and insert it */ objectId = GetNewOidWithIndex(pg_authmem_rel, AuthMemOidIndexId, Anum_pg_auth_members_oid); - new_record[Anum_pg_auth_members_oid - 1] = objectId; + new_record[Anum_pg_auth_members_oid - 1] = ObjectIdGetDatum(objectId); tuple = heap_form_tuple(pg_authmem_dsc, new_record, new_record_nulls); CatalogTupleInsert(pg_authmem_rel, tuple); diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c index a5cfe246e6380..0e1a74976f7d3 100644 --- a/src/backend/executor/execExprInterp.c +++ b/src/backend/executor/execExprInterp.c @@ -4393,7 +4393,7 @@ ExecEvalHashedScalarArrayOp(ExprState *state, ExprEvalStep *op, ExprContext *eco * is the equality function and we need not-equals. */ if (!inclause) - result = !result; + result = BoolGetDatum(!DatumGetBool(result)); } } diff --git a/src/backend/statistics/attribute_stats.c b/src/backend/statistics/attribute_stats.c index ab198076401b0..e8241926d2c67 100644 --- a/src/backend/statistics/attribute_stats.c +++ b/src/backend/statistics/attribute_stats.c @@ -339,7 +339,7 @@ attribute_statistics_update(FunctionCallInfo fcinfo) starel = table_open(StatisticRelationId, RowExclusiveLock); - statup = SearchSysCache3(STATRELATTINH, reloid, attnum, inherited); + statup = SearchSysCache3(STATRELATTINH, ObjectIdGetDatum(reloid), Int16GetDatum(attnum), BoolGetDatum(inherited)); /* initialize from existing tuple if exists */ if (HeapTupleIsValid(statup)) @@ -895,9 +895,9 @@ init_empty_stats_tuple(Oid reloid, int16 attnum, bool inherited, { values[Anum_pg_statistic_stakind1 + slotnum - 1] = (Datum) 0; nulls[Anum_pg_statistic_stakind1 + slotnum - 1] = false; - values[Anum_pg_statistic_staop1 + slotnum - 1] = InvalidOid; + values[Anum_pg_statistic_staop1 + slotnum - 1] = ObjectIdGetDatum(InvalidOid); nulls[Anum_pg_statistic_staop1 + slotnum - 1] = false; - values[Anum_pg_statistic_stacoll1 + slotnum - 1] = InvalidOid; + values[Anum_pg_statistic_stacoll1 + slotnum - 1] = ObjectIdGetDatum(InvalidOid); nulls[Anum_pg_statistic_stacoll1 + slotnum - 1] = false; } } diff --git a/src/backend/storage/aio/aio_funcs.c b/src/backend/storage/aio/aio_funcs.c index 584e683371a31..905ea129c813d 100644 --- a/src/backend/storage/aio/aio_funcs.c +++ b/src/backend/storage/aio/aio_funcs.c @@ -152,7 +152,7 @@ pg_get_aios(PG_FUNCTION_ARGS) nulls[0] = false; /* column: IO's id */ - values[1] = ioh_id; + values[1] = UInt32GetDatum(ioh_id); /* column: IO's generation */ values[2] = Int64GetDatum(start_generation); diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 6c9ae83a7a869..fd7e21d96d31f 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -6365,8 +6365,8 @@ ckpt_buforder_comparator(const CkptSortItem *a, const CkptSortItem *b) static int ts_ckpt_progress_comparator(Datum a, Datum b, void *arg) { - CkptTsStatus *sa = (CkptTsStatus *) a; - CkptTsStatus *sb = (CkptTsStatus *) b; + CkptTsStatus *sa = (CkptTsStatus *) DatumGetPointer(a); + CkptTsStatus *sb = (CkptTsStatus *) DatumGetPointer(b); /* we want a min-heap, so return 1 for the a < b */ if (sa->progress < sb->progress) diff --git a/src/backend/storage/ipc/shmem.c b/src/backend/storage/ipc/shmem.c index ca3656fc76f43..d12a3ca06842f 100644 --- a/src/backend/storage/ipc/shmem.c +++ b/src/backend/storage/ipc/shmem.c @@ -714,7 +714,7 @@ pg_get_shmem_allocations_numa(PG_FUNCTION_ARGS) for (i = 0; i <= max_nodes; i++) { values[0] = CStringGetTextDatum(ent->key); - values[1] = i; + values[1] = Int32GetDatum(i); values[2] = Int64GetDatum(nodes[i] * os_page_size); tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c index 62f3471448ebc..f8c88147160e9 100644 --- a/src/backend/storage/lmgr/lock.c +++ b/src/backend/storage/lmgr/lock.c @@ -589,7 +589,7 @@ proclock_hash(const void *key, Size keysize) * intermediate variable to suppress cast-pointer-to-int warnings. */ procptr = PointerGetDatum(proclocktag->myProc); - lockhash ^= ((uint32) procptr) << LOG2_NUM_LOCK_PARTITIONS; + lockhash ^= DatumGetUInt32(procptr) << LOG2_NUM_LOCK_PARTITIONS; return lockhash; } @@ -610,7 +610,7 @@ ProcLockHashCode(const PROCLOCKTAG *proclocktag, uint32 hashcode) * This must match proclock_hash()! */ procptr = PointerGetDatum(proclocktag->myProc); - lockhash ^= ((uint32) procptr) << LOG2_NUM_LOCK_PARTITIONS; + lockhash ^= DatumGetUInt32(procptr) << LOG2_NUM_LOCK_PARTITIONS; return lockhash; } diff --git a/src/backend/utils/adt/datum.c b/src/backend/utils/adt/datum.c index fcd5b1653dd3e..614644a4e2a06 100644 --- a/src/backend/utils/adt/datum.c +++ b/src/backend/utils/adt/datum.c @@ -299,9 +299,9 @@ datum_image_eq(Datum value1, Datum value2, bool typByVal, int typLen) len1 - VARHDRSZ) == 0); /* Only free memory if it's a copy made here. */ - if ((Pointer) arg1val != (Pointer) value1) + if ((Pointer) arg1val != DatumGetPointer(value1)) pfree(arg1val); - if ((Pointer) arg2val != (Pointer) value2) + if ((Pointer) arg2val != DatumGetPointer(value2)) pfree(arg2val); } } @@ -355,7 +355,7 @@ datum_image_hash(Datum value, bool typByVal, int typLen) result = hash_bytes((unsigned char *) VARDATA_ANY(val), len - VARHDRSZ); /* Only free memory if it's a copy made here. */ - if ((Pointer) val != (Pointer) value) + if ((Pointer) val != DatumGetPointer(value)) pfree(val); } else if (typLen == -2) diff --git a/src/backend/utils/adt/jsonpath_exec.c b/src/backend/utils/adt/jsonpath_exec.c index 407041b14a177..5a56253522357 100644 --- a/src/backend/utils/adt/jsonpath_exec.c +++ b/src/backend/utils/adt/jsonpath_exec.c @@ -1517,7 +1517,7 @@ executeItemOptUnwrapTarget(JsonPathExecContext *cxt, JsonPathItem *jsp, /* Convert numstr to Numeric with typmod */ Assert(numstr != NULL); noerr = DirectInputFunctionCallSafe(numeric_in, numstr, - InvalidOid, dtypmod, + InvalidOid, DatumGetInt32(dtypmod), (Node *) &escontext, &numdatum); diff --git a/src/backend/utils/adt/lockfuncs.c b/src/backend/utils/adt/lockfuncs.c index 00e67fb46d074..df938812dd368 100644 --- a/src/backend/utils/adt/lockfuncs.c +++ b/src/backend/utils/adt/lockfuncs.c @@ -398,15 +398,15 @@ pg_lock_status(PG_FUNCTION_ARGS) values[0] = CStringGetTextDatum(PredicateLockTagTypeNames[lockType]); /* lock target */ - values[1] = GET_PREDICATELOCKTARGETTAG_DB(*predTag); - values[2] = GET_PREDICATELOCKTARGETTAG_RELATION(*predTag); + values[1] = ObjectIdGetDatum(GET_PREDICATELOCKTARGETTAG_DB(*predTag)); + values[2] = ObjectIdGetDatum(GET_PREDICATELOCKTARGETTAG_RELATION(*predTag)); if (lockType == PREDLOCKTAG_TUPLE) - values[4] = GET_PREDICATELOCKTARGETTAG_OFFSET(*predTag); + values[4] = UInt16GetDatum(GET_PREDICATELOCKTARGETTAG_OFFSET(*predTag)); else nulls[4] = true; if ((lockType == PREDLOCKTAG_TUPLE) || (lockType == PREDLOCKTAG_PAGE)) - values[3] = GET_PREDICATELOCKTARGETTAG_PAGE(*predTag); + values[3] = UInt32GetDatum(GET_PREDICATELOCKTARGETTAG_PAGE(*predTag)); else nulls[3] = true; diff --git a/src/backend/utils/adt/multirangetypes.c b/src/backend/utils/adt/multirangetypes.c index e64feeaeccb7c..84733dc50195b 100644 --- a/src/backend/utils/adt/multirangetypes.c +++ b/src/backend/utils/adt/multirangetypes.c @@ -2523,7 +2523,7 @@ multirange_adjacent_range(PG_FUNCTION_ARGS) TypeCacheEntry *typcache; if (RangeIsEmpty(r) || MultirangeIsEmpty(mr)) - return false; + PG_RETURN_BOOL(false); typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr)); @@ -2544,7 +2544,7 @@ multirange_adjacent_multirange(PG_FUNCTION_ARGS) upper2; if (MultirangeIsEmpty(mr1) || MultirangeIsEmpty(mr2)) - return false; + PG_RETURN_BOOL(false); typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr1)); @@ -2639,7 +2639,7 @@ multirange_cmp(PG_FUNCTION_ARGS) Datum multirange_lt(PG_FUNCTION_ARGS) { - int cmp = multirange_cmp(fcinfo); + int cmp = DatumGetInt32(multirange_cmp(fcinfo)); PG_RETURN_BOOL(cmp < 0); } @@ -2647,7 +2647,7 @@ multirange_lt(PG_FUNCTION_ARGS) Datum multirange_le(PG_FUNCTION_ARGS) { - int cmp = multirange_cmp(fcinfo); + int cmp = DatumGetInt32(multirange_cmp(fcinfo)); PG_RETURN_BOOL(cmp <= 0); } @@ -2655,7 +2655,7 @@ multirange_le(PG_FUNCTION_ARGS) Datum multirange_ge(PG_FUNCTION_ARGS) { - int cmp = multirange_cmp(fcinfo); + int cmp = DatumGetInt32(multirange_cmp(fcinfo)); PG_RETURN_BOOL(cmp >= 0); } @@ -2663,7 +2663,7 @@ multirange_ge(PG_FUNCTION_ARGS) Datum multirange_gt(PG_FUNCTION_ARGS) { - int cmp = multirange_cmp(fcinfo); + int cmp = DatumGetInt32(multirange_cmp(fcinfo)); PG_RETURN_BOOL(cmp > 0); } diff --git a/src/backend/utils/adt/rangetypes.c b/src/backend/utils/adt/rangetypes.c index 254b8f65c21a6..15398c72ea004 100644 --- a/src/backend/utils/adt/rangetypes.c +++ b/src/backend/utils/adt/rangetypes.c @@ -1356,7 +1356,7 @@ range_fast_cmp(Datum a, Datum b, SortSupport ssup) Datum range_lt(PG_FUNCTION_ARGS) { - int cmp = range_cmp(fcinfo); + int cmp = DatumGetInt32(range_cmp(fcinfo)); PG_RETURN_BOOL(cmp < 0); } @@ -1364,7 +1364,7 @@ range_lt(PG_FUNCTION_ARGS) Datum range_le(PG_FUNCTION_ARGS) { - int cmp = range_cmp(fcinfo); + int cmp = DatumGetInt32(range_cmp(fcinfo)); PG_RETURN_BOOL(cmp <= 0); } @@ -1372,7 +1372,7 @@ range_le(PG_FUNCTION_ARGS) Datum range_ge(PG_FUNCTION_ARGS) { - int cmp = range_cmp(fcinfo); + int cmp = DatumGetInt32(range_cmp(fcinfo)); PG_RETURN_BOOL(cmp >= 0); } @@ -1380,7 +1380,7 @@ range_ge(PG_FUNCTION_ARGS) Datum range_gt(PG_FUNCTION_ARGS) { - int cmp = range_cmp(fcinfo); + int cmp = DatumGetInt32(range_cmp(fcinfo)); PG_RETURN_BOOL(cmp > 0); } diff --git a/src/backend/utils/adt/rangetypes_spgist.c b/src/backend/utils/adt/rangetypes_spgist.c index 9b6d7061a1812..be51965488088 100644 --- a/src/backend/utils/adt/rangetypes_spgist.c +++ b/src/backend/utils/adt/rangetypes_spgist.c @@ -757,7 +757,7 @@ spg_range_quad_inner_consistent(PG_FUNCTION_ARGS) * because it's range */ previousCentroid = datumCopy(in->prefixDatum, false, -1); - out->traversalValues[out->nNodes] = (void *) previousCentroid; + out->traversalValues[out->nNodes] = DatumGetPointer(previousCentroid); } out->nodeNumbers[out->nNodes] = i - 1; out->nNodes++; diff --git a/src/backend/utils/adt/rowtypes.c b/src/backend/utils/adt/rowtypes.c index fe5edc0027da3..9e5449f17d7c0 100644 --- a/src/backend/utils/adt/rowtypes.c +++ b/src/backend/utils/adt/rowtypes.c @@ -1529,9 +1529,9 @@ record_image_cmp(FunctionCallInfo fcinfo) if ((cmpresult == 0) && (len1 != len2)) cmpresult = (len1 < len2) ? -1 : 1; - if ((Pointer) arg1val != (Pointer) values1[i1]) + if ((Pointer) arg1val != DatumGetPointer(values1[i1])) pfree(arg1val); - if ((Pointer) arg2val != (Pointer) values2[i2]) + if ((Pointer) arg2val != DatumGetPointer(values2[i2])) pfree(arg2val); } else diff --git a/src/backend/utils/adt/waitfuncs.c b/src/backend/utils/adt/waitfuncs.c index ddd0a57c0c597..f01cad72a0feb 100644 --- a/src/backend/utils/adt/waitfuncs.c +++ b/src/backend/utils/adt/waitfuncs.c @@ -73,7 +73,7 @@ pg_isolation_test_session_is_blocked(PG_FUNCTION_ARGS) * acquire heavyweight locks. */ blocking_pids_a = - DatumGetArrayTypeP(DirectFunctionCall1(pg_blocking_pids, blocked_pid)); + DatumGetArrayTypeP(DirectFunctionCall1(pg_blocking_pids, Int32GetDatum(blocked_pid))); Assert(ARR_ELEMTYPE(blocking_pids_a) == INT4OID); Assert(!array_contains_nulls(blocking_pids_a)); diff --git a/src/backend/utils/cache/attoptcache.c b/src/backend/utils/cache/attoptcache.c index 5c8360c08b5f8..45d1e2be007ba 100644 --- a/src/backend/utils/cache/attoptcache.c +++ b/src/backend/utils/cache/attoptcache.c @@ -86,7 +86,7 @@ relatt_cache_syshash(const void *key, Size keysize) const AttoptCacheKey *ckey = key; Assert(keysize == sizeof(*ckey)); - return GetSysCacheHashValue2(ATTNUM, ckey->attrelid, ckey->attnum); + return GetSysCacheHashValue2(ATTNUM, ObjectIdGetDatum(ckey->attrelid), Int32GetDatum(ckey->attnum)); } /* diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c index c460a72b75d90..032bb6222c49b 100644 --- a/src/backend/utils/cache/lsyscache.c +++ b/src/backend/utils/cache/lsyscache.c @@ -3817,7 +3817,7 @@ get_subscription_oid(const char *subname, bool missing_ok) Oid oid; oid = GetSysCacheOid2(SUBSCRIPTIONNAME, Anum_pg_subscription_oid, - MyDatabaseId, CStringGetDatum(subname)); + ObjectIdGetDatum(MyDatabaseId), CStringGetDatum(subname)); if (!OidIsValid(oid) && !missing_ok) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index 153d2fde6fd3d..6fe268a8eec1f 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -6991,5 +6991,5 @@ ResOwnerReleaseRelation(Datum res) Assert(rel->rd_refcnt > 0); rel->rd_refcnt -= 1; - RelationCloseCleanup((Relation) res); + RelationCloseCleanup((Relation) DatumGetPointer(res)); } diff --git a/src/backend/utils/cache/syscache.c b/src/backend/utils/cache/syscache.c index f944453a1d884..7828bdcba8f64 100644 --- a/src/backend/utils/cache/syscache.c +++ b/src/backend/utils/cache/syscache.c @@ -459,9 +459,9 @@ GetSysCacheOid(int cacheId, tuple = SearchSysCache(cacheId, key1, key2, key3, key4); if (!HeapTupleIsValid(tuple)) return InvalidOid; - result = heap_getattr(tuple, oidcol, - SysCache[cacheId]->cc_tupdesc, - &isNull); + result = DatumGetObjectId(heap_getattr(tuple, oidcol, + SysCache[cacheId]->cc_tupdesc, + &isNull)); Assert(!isNull); /* columns used as oids should never be NULL */ ReleaseSysCache(tuple); return result; diff --git a/src/backend/utils/sort/sortsupport.c b/src/backend/utils/sort/sortsupport.c index e0f500b9aa29c..f582c6624f11a 100644 --- a/src/backend/utils/sort/sortsupport.c +++ b/src/backend/utils/sort/sortsupport.c @@ -57,7 +57,7 @@ comparison_shim(Datum x, Datum y, SortSupport ssup) if (extra->fcinfo.isnull) elog(ERROR, "function %u returned NULL", extra->flinfo.fn_oid); - return result; + return DatumGetInt32(result); } /* diff --git a/src/backend/utils/sort/tuplesortvariants.c b/src/backend/utils/sort/tuplesortvariants.c index 5f70e8dddac57..c5d18e46716fd 100644 --- a/src/backend/utils/sort/tuplesortvariants.c +++ b/src/backend/utils/sort/tuplesortvariants.c @@ -865,7 +865,7 @@ tuplesort_putbrintuple(Tuplesortstate *state, BrinTuple *tuple, Size size) memcpy(&bstup->tuple, tuple, size); stup.tuple = bstup; - stup.datum1 = tuple->bt_blkno; + stup.datum1 = UInt32GetDatum(tuple->bt_blkno); stup.isnull1 = false; /* GetMemoryChunkSpace is not supported for bump contexts */ @@ -1836,7 +1836,7 @@ removeabbrev_index_brin(Tuplesortstate *state, SortTuple *stups, int count) BrinSortTuple *tuple; tuple = stups[i].tuple; - stups[i].datum1 = tuple->tuple.bt_blkno; + stups[i].datum1 = UInt32GetDatum(tuple->tuple.bt_blkno); } } @@ -1893,7 +1893,7 @@ readtup_index_brin(Tuplesortstate *state, SortTuple *stup, stup->tuple = tuple; /* set up first-column key value, which is block number */ - stup->datum1 = tuple->tuple.bt_blkno; + stup->datum1 = UInt32GetDatum(tuple->tuple.bt_blkno); } /* diff --git a/src/pl/plperl/plperl.c b/src/pl/plperl/plperl.c index 29cb4d7e47f80..73ba1748fe0a8 100644 --- a/src/pl/plperl/plperl.c +++ b/src/pl/plperl/plperl.c @@ -1453,7 +1453,7 @@ plperl_sv_to_literal(SV *sv, char *fqtypename) check_spi_usage_allowed(); - typid = DirectFunctionCall1(regtypein, CStringGetDatum(fqtypename)); + typid = DatumGetObjectId(DirectFunctionCall1(regtypein, CStringGetDatum(fqtypename))); if (!OidIsValid(typid)) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), @@ -2569,13 +2569,13 @@ plperl_trigger_handler(PG_FUNCTION_ARGS) TriggerData *trigdata = ((TriggerData *) fcinfo->context); if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event)) - retval = (Datum) trigdata->tg_trigtuple; + retval = PointerGetDatum(trigdata->tg_trigtuple); else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event)) - retval = (Datum) trigdata->tg_newtuple; + retval = PointerGetDatum(trigdata->tg_newtuple); else if (TRIGGER_FIRED_BY_DELETE(trigdata->tg_event)) - retval = (Datum) trigdata->tg_trigtuple; + retval = PointerGetDatum(trigdata->tg_trigtuple); else if (TRIGGER_FIRED_BY_TRUNCATE(trigdata->tg_event)) - retval = (Datum) trigdata->tg_trigtuple; + retval = PointerGetDatum(trigdata->tg_trigtuple); else retval = (Datum) 0; /* can this happen? */ } diff --git a/src/test/regress/regress.c b/src/test/regress/regress.c index 3dbba06902405..465ac148ac9f7 100644 --- a/src/test/regress/regress.c +++ b/src/test/regress/regress.c @@ -727,7 +727,7 @@ PG_FUNCTION_INFO_V1(is_catalog_text_unique_index_oid); Datum is_catalog_text_unique_index_oid(PG_FUNCTION_ARGS) { - return IsCatalogTextUniqueIndexOid(PG_GETARG_OID(0)); + return BoolGetDatum(IsCatalogTextUniqueIndexOid(PG_GETARG_OID(0))); } PG_FUNCTION_INFO_V1(test_support_func); From 665c3dbba497b795c4ee46145777bc4eb89c78a1 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Fri, 8 Aug 2025 18:44:57 -0400 Subject: [PATCH 482/602] Mop-up for Datum conversion cleanups. Fix a couple more places where an explicit Datum conversion is needed (not clear how we missed these in ff89e182d and previous commits). Replace the minority usage "(Datum) NULL" with "(Datum) 0". The former depends on the assumption that Datum is the same width as Pointer, the latter doesn't. Anyway consistency is a good thing. This is, I believe, the last of the notational mop-up needed before we can consider changing Datum to uint64 everywhere. It's also important cleanup for more aggressive ideas such as making Datum a struct. Discussion: https://postgr.es/m/1749799.1752797397@sss.pgh.pa.us Discussion: https://postgr.es/m/8246d7ff-f4b7-4363-913e-827dadfeb145@eisentraut.org --- contrib/ltree/_ltree_gist.c | 2 +- src/backend/catalog/pg_aggregate.c | 2 +- src/backend/catalog/pg_constraint.c | 2 +- src/backend/catalog/pg_conversion.c | 2 +- src/backend/catalog/pg_namespace.c | 2 +- src/backend/catalog/pg_operator.c | 4 ++-- src/backend/catalog/pg_type.c | 2 +- src/backend/executor/spi.c | 2 +- src/backend/nodes/readfuncs.c | 2 +- src/backend/utils/adt/jsonfuncs.c | 6 +++--- src/backend/utils/adt/rangetypes.c | 4 ++-- src/include/access/htup_details.h | 2 +- src/include/access/itup.h | 2 +- 13 files changed, 17 insertions(+), 17 deletions(-) diff --git a/contrib/ltree/_ltree_gist.c b/contrib/ltree/_ltree_gist.c index 286ad24fbe847..30d516e60bc21 100644 --- a/contrib/ltree/_ltree_gist.c +++ b/contrib/ltree/_ltree_gist.c @@ -84,7 +84,7 @@ _ltree_compress(PG_FUNCTION_ARGS) entry->rel, entry->page, entry->offset, false); } - else if (!LTG_ISALLTRUE(entry->key)) + else if (!LTG_ISALLTRUE(DatumGetPointer(entry->key))) { int32 i; ltree_gist *key; diff --git a/src/backend/catalog/pg_aggregate.c b/src/backend/catalog/pg_aggregate.c index a05f8a87c1f83..c62e8acd41375 100644 --- a/src/backend/catalog/pg_aggregate.c +++ b/src/backend/catalog/pg_aggregate.c @@ -654,7 +654,7 @@ AggregateCreate(const char *aggName, for (i = 0; i < Natts_pg_aggregate; i++) { nulls[i] = false; - values[i] = (Datum) NULL; + values[i] = (Datum) 0; replaces[i] = true; } values[Anum_pg_aggregate_aggfnoid - 1] = ObjectIdGetDatum(procOid); diff --git a/src/backend/catalog/pg_constraint.c b/src/backend/catalog/pg_constraint.c index 2d5ac1ea8138b..6002fd0002fc9 100644 --- a/src/backend/catalog/pg_constraint.c +++ b/src/backend/catalog/pg_constraint.c @@ -179,7 +179,7 @@ CreateConstraintEntry(const char *constraintName, for (i = 0; i < Natts_pg_constraint; i++) { nulls[i] = false; - values[i] = (Datum) NULL; + values[i] = (Datum) 0; } conOid = GetNewOidWithIndex(conDesc, ConstraintOidIndexId, diff --git a/src/backend/catalog/pg_conversion.c b/src/backend/catalog/pg_conversion.c index 04cc375caea8c..090f680d1908f 100644 --- a/src/backend/catalog/pg_conversion.c +++ b/src/backend/catalog/pg_conversion.c @@ -87,7 +87,7 @@ ConversionCreate(const char *conname, Oid connamespace, for (i = 0; i < Natts_pg_conversion; i++) { nulls[i] = false; - values[i] = (Datum) NULL; + values[i] = (Datum) 0; } /* form a tuple */ diff --git a/src/backend/catalog/pg_namespace.c b/src/backend/catalog/pg_namespace.c index 6f5634a4de69b..616bcc7852113 100644 --- a/src/backend/catalog/pg_namespace.c +++ b/src/backend/catalog/pg_namespace.c @@ -76,7 +76,7 @@ NamespaceCreate(const char *nspName, Oid ownerId, bool isTemp) for (i = 0; i < Natts_pg_namespace; i++) { nulls[i] = false; - values[i] = (Datum) NULL; + values[i] = (Datum) 0; } nspoid = GetNewOidWithIndex(nspdesc, NamespaceOidIndexId, diff --git a/src/backend/catalog/pg_operator.c b/src/backend/catalog/pg_operator.c index bfcfa643464ac..44d2ccb6788e9 100644 --- a/src/backend/catalog/pg_operator.c +++ b/src/backend/catalog/pg_operator.c @@ -225,7 +225,7 @@ OperatorShellMake(const char *operatorName, for (i = 0; i < Natts_pg_operator; ++i) { nulls[i] = false; - values[i] = (Datum) NULL; /* redundant, but safe */ + values[i] = (Datum) 0; /* redundant, but safe */ } /* @@ -453,7 +453,7 @@ OperatorCreate(const char *operatorName, for (i = 0; i < Natts_pg_operator; ++i) { - values[i] = (Datum) NULL; + values[i] = (Datum) 0; replaces[i] = true; nulls[i] = false; } diff --git a/src/backend/catalog/pg_type.c b/src/backend/catalog/pg_type.c index b36f81afb9d3f..1ec523ee3e59e 100644 --- a/src/backend/catalog/pg_type.c +++ b/src/backend/catalog/pg_type.c @@ -80,7 +80,7 @@ TypeShellMake(const char *typeName, Oid typeNamespace, Oid ownerId) for (i = 0; i < Natts_pg_type; ++i) { nulls[i] = false; - values[i] = (Datum) NULL; /* redundant, but safe */ + values[i] = (Datum) 0; /* redundant, but safe */ } /* diff --git a/src/backend/executor/spi.c b/src/backend/executor/spi.c index ecb2e4ccaa1ca..50fcd0237768e 100644 --- a/src/backend/executor/spi.c +++ b/src/backend/executor/spi.c @@ -1258,7 +1258,7 @@ SPI_getbinval(HeapTuple tuple, TupleDesc tupdesc, int fnumber, bool *isnull) { SPI_result = SPI_ERROR_NOATTRIBUTE; *isnull = true; - return (Datum) NULL; + return (Datum) 0; } return heap_getattr(tuple, fnumber, tupdesc, isnull); diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index 48b5d13b9b62c..2f933e95cb95e 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -630,7 +630,7 @@ readDatum(bool typbyval) } } else if (length <= 0) - res = (Datum) NULL; + res = (Datum) 0; else { s = (char *) palloc(length); diff --git a/src/backend/utils/adt/jsonfuncs.c b/src/backend/utils/adt/jsonfuncs.c index 370456408bfba..c5e1a027956bc 100644 --- a/src/backend/utils/adt/jsonfuncs.c +++ b/src/backend/utils/adt/jsonfuncs.c @@ -2027,7 +2027,7 @@ each_worker_jsonb(FunctionCallInfo fcinfo, const char *funcname, bool as_text) { /* a json null is an sql null in text mode */ nulls[1] = true; - values[1] = (Datum) NULL; + values[1] = (Datum) 0; } else values[1] = PointerGetDatum(JsonbValueAsText(&v)); @@ -2266,7 +2266,7 @@ elements_worker_jsonb(FunctionCallInfo fcinfo, const char *funcname, { /* a json null is an sql null in text mode */ nulls[0] = true; - values[0] = (Datum) NULL; + values[0] = (Datum) 0; } else values[0] = PointerGetDatum(JsonbValueAsText(&v)); @@ -2389,7 +2389,7 @@ elements_array_element_end(void *state, bool isnull) if (isnull && _state->normalize_results) { nulls[0] = true; - values[0] = (Datum) NULL; + values[0] = (Datum) 0; } else if (_state->next_scalar) { diff --git a/src/backend/utils/adt/rangetypes.c b/src/backend/utils/adt/rangetypes.c index 15398c72ea004..18e467bccd3a0 100644 --- a/src/backend/utils/adt/rangetypes.c +++ b/src/backend/utils/adt/rangetypes.c @@ -1343,9 +1343,9 @@ range_fast_cmp(Datum a, Datum b, SortSupport ssup) cmp = range_cmp_bounds(typcache, &upper1, &upper2); } - if ((Datum) range_a != a) + if ((Pointer) range_a != DatumGetPointer(a)) pfree(range_a); - if ((Datum) range_b != b) + if ((Pointer) range_b != DatumGetPointer(b)) pfree(range_b); return cmp; diff --git a/src/include/access/htup_details.h b/src/include/access/htup_details.h index aa957cf3b0165..ae813a790419e 100644 --- a/src/include/access/htup_details.h +++ b/src/include/access/htup_details.h @@ -884,7 +884,7 @@ fastgetattr(HeapTuple tup, int attnum, TupleDesc tupleDesc, bool *isnull) if (att_isnull(attnum - 1, tup->t_data->t_bits)) { *isnull = true; - return (Datum) NULL; + return (Datum) 0; } else return nocachegetattr(tup, attnum, tupleDesc); diff --git a/src/include/access/itup.h b/src/include/access/itup.h index 7066c2a2868b3..338e90749bd1f 100644 --- a/src/include/access/itup.h +++ b/src/include/access/itup.h @@ -154,7 +154,7 @@ index_getattr(IndexTuple tup, int attnum, TupleDesc tupleDesc, bool *isnull) if (att_isnull(attnum - 1, (bits8 *) tup + sizeof(IndexTupleData))) { *isnull = true; - return (Datum) NULL; + return (Datum) 0; } else return nocache_index_getattr(tup, attnum, tupleDesc); From b421223172a28db2e724d5e35304097fe68a1e38 Mon Sep 17 00:00:00 2001 From: Thomas Munro Date: Sat, 9 Aug 2025 12:33:06 +1200 Subject: [PATCH 483/602] Fix rare bug in read_stream.c's split IO handling. The internal queue of buffers could become corrupted in a rare edge case that failed to invalidate an entry, causing a stale buffer to be "forwarded" to StartReadBuffers(). This is a simple fix for the immediate problem. A small API change might be able to remove this and related fragility entirely, but that will have to wait a bit. Defect in commit ed0b87ca. Bug: 19006 Backpatch-through: 18 Reported-by: Alexander Lakhin Reviewed-by: Tom Lane Reviewed-by: Michael Paquier Reviewed-by: Xuneng Zhou Discussion: https://postgr.es/m/19006-80fcaaf69000377e%40postgresql.org --- src/backend/storage/aio/read_stream.c | 34 +++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/src/backend/storage/aio/read_stream.c b/src/backend/storage/aio/read_stream.c index 0e7f5557f5cb9..031fde9f4cbef 100644 --- a/src/backend/storage/aio/read_stream.c +++ b/src/backend/storage/aio/read_stream.c @@ -247,12 +247,33 @@ read_stream_start_pending_read(ReadStream *stream) Assert(stream->pinned_buffers + stream->pending_read_nblocks <= stream->max_pinned_buffers); +#ifdef USE_ASSERT_CHECKING /* We had better not be overwriting an existing pinned buffer. */ if (stream->pinned_buffers > 0) Assert(stream->next_buffer_index != stream->oldest_buffer_index); else Assert(stream->next_buffer_index == stream->oldest_buffer_index); + /* + * Pinned buffers forwarded by a preceding StartReadBuffers() call that + * had to split the operation should match the leading blocks of this + * following StartReadBuffers() call. + */ + Assert(stream->forwarded_buffers <= stream->pending_read_nblocks); + for (int i = 0; i < stream->forwarded_buffers; ++i) + Assert(BufferGetBlockNumber(stream->buffers[stream->next_buffer_index + i]) == + stream->pending_read_blocknum + i); + + /* + * Check that we've cleared the queue/overflow entries corresponding to + * the rest of the blocks covered by this read, unless it's the first go + * around and we haven't even initialized them yet. + */ + for (int i = stream->forwarded_buffers; i < stream->pending_read_nblocks; ++i) + Assert(stream->next_buffer_index + i >= stream->initialized_buffers || + stream->buffers[stream->next_buffer_index + i] == InvalidBuffer); +#endif + /* Do we need to issue read-ahead advice? */ flags = stream->read_buffers_flags; if (stream->advice_enabled) @@ -979,6 +1000,19 @@ read_stream_next_buffer(ReadStream *stream, void **per_buffer_data) stream->pending_read_nblocks == 0 && stream->per_buffer_data_size == 0) { + /* + * The fast path spins on one buffer entry repeatedly instead of + * rotating through the whole queue and clearing the entries behind + * it. If the buffer it starts with happened to be forwarded between + * StartReadBuffers() calls and also wrapped around the circular queue + * partway through, then a copy also exists in the overflow zone, and + * it won't clear it out as the regular path would. Do that now, so + * it doesn't need code for that. + */ + if (stream->oldest_buffer_index < stream->io_combine_limit - 1) + stream->buffers[stream->queue_size + stream->oldest_buffer_index] = + InvalidBuffer; + stream->fast_path = true; } #endif From 22424953cded3f83f0382383773eaf36eb1abda9 Mon Sep 17 00:00:00 2001 From: Dean Rasheed Date: Mon, 11 Aug 2025 09:03:11 +0100 Subject: [PATCH 484/602] Fix security checks in selectivity estimation functions. Commit e2d4ef8de86 (the fix for CVE-2017-7484) added security checks to the selectivity estimation functions to prevent them from running user-supplied operators on data obtained from pg_statistic if the user lacks privileges to select from the underlying table. In cases involving inheritance/partitioning, those checks were originally performed against the child RTE (which for plain inheritance might actually refer to the parent table). Commit 553d2ec2710 then extended that to also check the parent RTE, allowing access if the user had permissions on either the parent or the child. It turns out, however, that doing any checks using the child RTE is incorrect, since securityQuals is set to NULL when creating an RTE for an inheritance child (whether it refers to the parent table or the child table), and therefore such checks do not correctly account for any RLS policies or security barrier views. Therefore, do the security checks using only the parent RTE. This is consistent with how RLS policies are applied, and the executor's ACL checks, both of which use only the parent table's permissions/policies. Similar checks are performed in the extended stats code, so update that in the same way, centralizing all the checks in a new function. In addition, note that these checks by themselves are insufficient to ensure that the user has access to the table's data because, in a query that goes via a view, they only check that the view owner has permissions on the underlying table, not that the current user has permissions on the view itself. In the selectivity estimation functions, there is no easy way to navigate from underlying tables to views, so add permissions checks for all views mentioned in the query to the planner startup code. If the user lacks permissions on a view, a permissions error will now be reported at planner-startup, and the selectivity estimation functions will not be run. Checking view permissions at planner-startup in this way is a little ugly, since the same checks will be repeated at executor-startup. Longer-term, it might be better to move all the permissions checks from the executor to the planner so that permissions errors can be reported sooner, instead of creating a plan that won't ever be run. However, such a change seems too far-reaching to be back-patched. Back-patch to all supported versions. In v13, there is the added complication that UPDATEs and DELETEs on inherited target tables are planned using inheritance_planner(), which plans each inheritance child table separately, so that the selectivity estimation functions do not know that they are dealing with a child table accessed via its parent. Handle that by checking access permissions on the top parent table at planner-startup, in the same way as we do for views. Any securityQuals on the top parent table are moved down to the child tables by inheritance_planner(), so they continue to be checked by the selectivity estimation functions. Author: Dean Rasheed Reviewed-by: Tom Lane Reviewed-by: Noah Misch Backpatch-through: 13 Security: CVE-2025-8713 --- src/backend/executor/execMain.c | 3 +- src/backend/optimizer/plan/planner.c | 33 ++ src/backend/statistics/extended_stats.c | 108 ++--- src/backend/utils/adt/selfuncs.c | 486 ++++++++++++---------- src/include/executor/executor.h | 1 + src/include/utils/selfuncs.h | 4 +- src/test/regress/expected/privileges.out | 13 +- src/test/regress/expected/rowsecurity.out | 73 +++- src/test/regress/expected/stats_ext.out | 71 +++- src/test/regress/sql/privileges.sql | 12 +- src/test/regress/sql/rowsecurity.sql | 51 ++- src/test/regress/sql/stats_ext.sql | 45 +- 12 files changed, 596 insertions(+), 304 deletions(-) diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c index 0391798dd2c33..b8b9d2a85f76c 100644 --- a/src/backend/executor/execMain.c +++ b/src/backend/executor/execMain.c @@ -84,7 +84,6 @@ static void ExecutePlan(QueryDesc *queryDesc, uint64 numberTuples, ScanDirection direction, DestReceiver *dest); -static bool ExecCheckOneRelPerms(RTEPermissionInfo *perminfo); static bool ExecCheckPermissionsModified(Oid relOid, Oid userid, Bitmapset *modifiedCols, AclMode requiredPerms); @@ -643,7 +642,7 @@ ExecCheckPermissions(List *rangeTable, List *rteperminfos, * ExecCheckOneRelPerms * Check access permissions for a single relation. */ -static bool +bool ExecCheckOneRelPerms(RTEPermissionInfo *perminfo) { AclMode requiredPerms; diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index d59d6e4c6a021..5ba0d22befd3a 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -58,6 +58,7 @@ #include "parser/parsetree.h" #include "partitioning/partdesc.h" #include "rewrite/rewriteManip.h" +#include "utils/acl.h" #include "utils/backend_status.h" #include "utils/lsyscache.h" #include "utils/rel.h" @@ -837,6 +838,38 @@ subquery_planner(PlannerGlobal *glob, Query *parse, PlannerInfo *parent_root, bms_make_singleton(parse->resultRelation); } + /* + * This would be a convenient time to check access permissions for all + * relations mentioned in the query, since it would be better to fail now, + * before doing any detailed planning. However, for historical reasons, + * we leave this to be done at executor startup. + * + * Note, however, that we do need to check access permissions for any view + * relations mentioned in the query, in order to prevent information being + * leaked by selectivity estimation functions, which only check view owner + * permissions on underlying tables (see all_rows_selectable() and its + * callers). This is a little ugly, because it means that access + * permissions for views will be checked twice, which is another reason + * why it would be better to do all the ACL checks here. + */ + foreach(l, parse->rtable) + { + RangeTblEntry *rte = lfirst_node(RangeTblEntry, l); + + if (rte->perminfoindex != 0 && + rte->relkind == RELKIND_VIEW) + { + RTEPermissionInfo *perminfo; + bool result; + + perminfo = getRTEPermissionInfo(parse->rteperminfos, rte); + result = ExecCheckOneRelPerms(perminfo); + if (!result) + aclcheck_error(ACLCHECK_NO_PRIV, OBJECT_VIEW, + get_rel_name(perminfo->relid)); + } + } + /* * Preprocess RowMark information. We need to do this after subquery * pullup, so that all base relations are present. diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c index 3e031cf831a26..af0b99243c614 100644 --- a/src/backend/statistics/extended_stats.c +++ b/src/backend/statistics/extended_stats.c @@ -1317,6 +1317,9 @@ choose_best_statistics(List *stats, char requiredkind, bool inh, * so we can't cope with system columns. * *exprs: input/output parameter collecting primitive subclauses within * the clause tree + * *leakproof: input/output parameter recording the leakproofness of the + * clause tree. This should be true initially, and will be set to false + * if any operator function used in an OpExpr is not leakproof. * * Returns false if there is something we definitively can't handle. * On true return, we can proceed to match the *exprs against statistics. @@ -1324,7 +1327,7 @@ choose_best_statistics(List *stats, char requiredkind, bool inh, static bool statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause, Index relid, Bitmapset **attnums, - List **exprs) + List **exprs, bool *leakproof) { /* Look inside any binary-compatible relabeling (as in examine_variable) */ if (IsA(clause, RelabelType)) @@ -1359,7 +1362,6 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause, /* (Var/Expr op Const) or (Const op Var/Expr) */ if (is_opclause(clause)) { - RangeTblEntry *rte = root->simple_rte_array[relid]; OpExpr *expr = (OpExpr *) clause; Node *clause_expr; @@ -1394,24 +1396,15 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause, return false; } - /* - * If there are any securityQuals on the RTE from security barrier - * views or RLS policies, then the user may not have access to all the - * table's data, and we must check that the operator is leakproof. - * - * If the operator is leaky, then we must ignore this clause for the - * purposes of estimating with MCV lists, otherwise the operator might - * reveal values from the MCV list that the user doesn't have - * permission to see. - */ - if (rte->securityQuals != NIL && - !get_func_leakproof(get_opcode(expr->opno))) - return false; + /* Check if the operator is leakproof */ + if (*leakproof) + *leakproof = get_func_leakproof(get_opcode(expr->opno)); /* Check (Var op Const) or (Const op Var) clauses by recursing. */ if (IsA(clause_expr, Var)) return statext_is_compatible_clause_internal(root, clause_expr, - relid, attnums, exprs); + relid, attnums, + exprs, leakproof); /* Otherwise we have (Expr op Const) or (Const op Expr). */ *exprs = lappend(*exprs, clause_expr); @@ -1421,7 +1414,6 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause, /* Var/Expr IN Array */ if (IsA(clause, ScalarArrayOpExpr)) { - RangeTblEntry *rte = root->simple_rte_array[relid]; ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause; Node *clause_expr; bool expronleft; @@ -1461,24 +1453,15 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause, return false; } - /* - * If there are any securityQuals on the RTE from security barrier - * views or RLS policies, then the user may not have access to all the - * table's data, and we must check that the operator is leakproof. - * - * If the operator is leaky, then we must ignore this clause for the - * purposes of estimating with MCV lists, otherwise the operator might - * reveal values from the MCV list that the user doesn't have - * permission to see. - */ - if (rte->securityQuals != NIL && - !get_func_leakproof(get_opcode(expr->opno))) - return false; + /* Check if the operator is leakproof */ + if (*leakproof) + *leakproof = get_func_leakproof(get_opcode(expr->opno)); /* Check Var IN Array clauses by recursing. */ if (IsA(clause_expr, Var)) return statext_is_compatible_clause_internal(root, clause_expr, - relid, attnums, exprs); + relid, attnums, + exprs, leakproof); /* Otherwise we have Expr IN Array. */ *exprs = lappend(*exprs, clause_expr); @@ -1515,7 +1498,8 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause, */ if (!statext_is_compatible_clause_internal(root, (Node *) lfirst(lc), - relid, attnums, exprs)) + relid, attnums, exprs, + leakproof)) return false; } @@ -1529,8 +1513,10 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause, /* Check Var IS NULL clauses by recursing. */ if (IsA(nt->arg, Var)) - return statext_is_compatible_clause_internal(root, (Node *) (nt->arg), - relid, attnums, exprs); + return statext_is_compatible_clause_internal(root, + (Node *) (nt->arg), + relid, attnums, + exprs, leakproof); /* Otherwise we have Expr IS NULL. */ *exprs = lappend(*exprs, nt->arg); @@ -1569,11 +1555,9 @@ static bool statext_is_compatible_clause(PlannerInfo *root, Node *clause, Index relid, Bitmapset **attnums, List **exprs) { - RangeTblEntry *rte = root->simple_rte_array[relid]; - RelOptInfo *rel = root->simple_rel_array[relid]; RestrictInfo *rinfo; int clause_relid; - Oid userid; + bool leakproof; /* * Special-case handling for bare BoolExpr AND clauses, because the @@ -1613,18 +1597,31 @@ statext_is_compatible_clause(PlannerInfo *root, Node *clause, Index relid, clause_relid != relid) return false; - /* Check the clause and determine what attributes it references. */ + /* + * Check the clause, determine what attributes it references, and whether + * it includes any non-leakproof operators. + */ + leakproof = true; if (!statext_is_compatible_clause_internal(root, (Node *) rinfo->clause, - relid, attnums, exprs)) + relid, attnums, exprs, + &leakproof)) return false; /* - * Check that the user has permission to read all required attributes. + * If the clause includes any non-leakproof operators, check that the user + * has permission to read all required attributes, otherwise the operators + * might reveal values from the MCV list that the user doesn't have + * permission to see. We require all rows to be selectable --- there must + * be no securityQuals from security barrier views or RLS policies. See + * similar code in examine_variable(), examine_simple_variable(), and + * statistic_proc_security_check(). + * + * Note that for an inheritance child, the permission checks are performed + * on the inheritance root parent, and whole-table select privilege on the + * parent doesn't guarantee that the user could read all columns of the + * child. Therefore we must check all referenced columns. */ - userid = OidIsValid(rel->userid) ? rel->userid : GetUserId(); - - /* Table-level SELECT privilege is sufficient for all columns */ - if (pg_class_aclcheck(rte->relid, userid, ACL_SELECT) != ACLCHECK_OK) + if (!leakproof) { Bitmapset *clause_attnums = NULL; int attnum = -1; @@ -1649,26 +1646,9 @@ statext_is_compatible_clause(PlannerInfo *root, Node *clause, Index relid, if (*exprs != NIL) pull_varattnos((Node *) *exprs, relid, &clause_attnums); - attnum = -1; - while ((attnum = bms_next_member(clause_attnums, attnum)) >= 0) - { - /* Undo the offset */ - AttrNumber attno = attnum + FirstLowInvalidHeapAttributeNumber; - - if (attno == InvalidAttrNumber) - { - /* Whole-row reference, so must have access to all columns */ - if (pg_attribute_aclcheck_all(rte->relid, userid, ACL_SELECT, - ACLMASK_ALL) != ACLCHECK_OK) - return false; - } - else - { - if (pg_attribute_aclcheck(rte->relid, attno, userid, - ACL_SELECT) != ACLCHECK_OK) - return false; - } - } + /* Must have permission to read all rows from these columns */ + if (!all_rows_selectable(root, relid, clause_attnums)) + return false; } /* If we reach here, the clause is OK */ diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 17fbfa9b41063..1c480cfaaf781 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -5288,8 +5288,8 @@ ReleaseDummy(HeapTuple tuple) * unique for this query. (Caution: this should be trusted for * statistical purposes only, since we do not check indimmediate nor * verify that the exact same definition of equality applies.) - * acl_ok: true if current user has permission to read the column(s) - * underlying the pg_statistic entry. This is consulted by + * acl_ok: true if current user has permission to read all table rows from + * the column(s) underlying the pg_statistic entry. This is consulted by * statistic_proc_security_check(). * * Caller is responsible for doing ReleaseVariableStats() before exiting. @@ -5408,7 +5408,6 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid, */ ListCell *ilist; ListCell *slist; - Oid userid; /* * The nullingrels bits within the expression could prevent us from @@ -5418,17 +5417,6 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid, if (bms_overlap(varnos, root->outer_join_rels)) node = remove_nulling_relids(node, root->outer_join_rels, NULL); - /* - * Determine the user ID to use for privilege checks: either - * onerel->userid if it's set (e.g., in case we're accessing the table - * via a view), or the current user otherwise. - * - * If we drill down to child relations, we keep using the same userid: - * it's going to be the same anyway, due to how we set up the relation - * tree (q.v. build_simple_rel). - */ - userid = OidIsValid(onerel->userid) ? onerel->userid : GetUserId(); - foreach(ilist, onerel->indexlist) { IndexOptInfo *index = (IndexOptInfo *) lfirst(ilist); @@ -5496,69 +5484,32 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid, if (HeapTupleIsValid(vardata->statsTuple)) { - /* Get index's table for permission check */ - RangeTblEntry *rte; - - rte = planner_rt_fetch(index->rel->relid, root); - Assert(rte->rtekind == RTE_RELATION); - /* + * Test if user has permission to access all + * rows from the index's table. + * * For simplicity, we insist on the whole * table being selectable, rather than trying * to identify which column(s) the index - * depends on. Also require all rows to be - * selectable --- there must be no - * securityQuals from security barrier views - * or RLS policies. + * depends on. + * + * Note that for an inheritance child, + * permissions are checked on the inheritance + * root parent, and whole-table select + * privilege on the parent doesn't quite + * guarantee that the user could read all + * columns of the child. But in practice it's + * unlikely that any interesting security + * violation could result from allowing access + * to the expression index's stats, so we + * allow it anyway. See similar code in + * examine_simple_variable() for additional + * comments. */ vardata->acl_ok = - rte->securityQuals == NIL && - (pg_class_aclcheck(rte->relid, userid, - ACL_SELECT) == ACLCHECK_OK); - - /* - * If the user doesn't have permissions to - * access an inheritance child relation, check - * the permissions of the table actually - * mentioned in the query, since most likely - * the user does have that permission. Note - * that whole-table select privilege on the - * parent doesn't quite guarantee that the - * user could read all columns of the child. - * But in practice it's unlikely that any - * interesting security violation could result - * from allowing access to the expression - * index's stats, so we allow it anyway. See - * similar code in examine_simple_variable() - * for additional comments. - */ - if (!vardata->acl_ok && - root->append_rel_array != NULL) - { - AppendRelInfo *appinfo; - Index varno = index->rel->relid; - - appinfo = root->append_rel_array[varno]; - while (appinfo && - planner_rt_fetch(appinfo->parent_relid, - root)->rtekind == RTE_RELATION) - { - varno = appinfo->parent_relid; - appinfo = root->append_rel_array[varno]; - } - if (varno != index->rel->relid) - { - /* Repeat access check on this rel */ - rte = planner_rt_fetch(varno, root); - Assert(rte->rtekind == RTE_RELATION); - - vardata->acl_ok = - rte->securityQuals == NIL && - (pg_class_aclcheck(rte->relid, - userid, - ACL_SELECT) == ACLCHECK_OK); - } - } + all_rows_selectable(root, + index->rel->relid, + NULL); } else { @@ -5628,58 +5579,26 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid, vardata->freefunc = ReleaseDummy; /* + * Test if user has permission to access all rows from the + * table. + * * For simplicity, we insist on the whole table being * selectable, rather than trying to identify which - * column(s) the statistics object depends on. Also - * require all rows to be selectable --- there must be no - * securityQuals from security barrier views or RLS - * policies. + * column(s) the statistics object depends on. + * + * Note that for an inheritance child, permissions are + * checked on the inheritance root parent, and whole-table + * select privilege on the parent doesn't quite guarantee + * that the user could read all columns of the child. But + * in practice it's unlikely that any interesting security + * violation could result from allowing access to the + * expression stats, so we allow it anyway. See similar + * code in examine_simple_variable() for additional + * comments. */ - vardata->acl_ok = - rte->securityQuals == NIL && - (pg_class_aclcheck(rte->relid, userid, - ACL_SELECT) == ACLCHECK_OK); - - /* - * If the user doesn't have permissions to access an - * inheritance child relation, check the permissions of - * the table actually mentioned in the query, since most - * likely the user does have that permission. Note that - * whole-table select privilege on the parent doesn't - * quite guarantee that the user could read all columns of - * the child. But in practice it's unlikely that any - * interesting security violation could result from - * allowing access to the expression stats, so we allow it - * anyway. See similar code in examine_simple_variable() - * for additional comments. - */ - if (!vardata->acl_ok && - root->append_rel_array != NULL) - { - AppendRelInfo *appinfo; - Index varno = onerel->relid; - - appinfo = root->append_rel_array[varno]; - while (appinfo && - planner_rt_fetch(appinfo->parent_relid, - root)->rtekind == RTE_RELATION) - { - varno = appinfo->parent_relid; - appinfo = root->append_rel_array[varno]; - } - if (varno != onerel->relid) - { - /* Repeat access check on this rel */ - rte = planner_rt_fetch(varno, root); - Assert(rte->rtekind == RTE_RELATION); - - vardata->acl_ok = - rte->securityQuals == NIL && - (pg_class_aclcheck(rte->relid, - userid, - ACL_SELECT) == ACLCHECK_OK); - } - } + vardata->acl_ok = all_rows_selectable(root, + onerel->relid, + NULL); break; } @@ -5734,109 +5653,20 @@ examine_simple_variable(PlannerInfo *root, Var *var, if (HeapTupleIsValid(vardata->statsTuple)) { - RelOptInfo *onerel = find_base_rel_noerr(root, var->varno); - Oid userid; - /* - * Check if user has permission to read this column. We require - * all rows to be accessible, so there must be no securityQuals - * from security barrier views or RLS policies. + * Test if user has permission to read all rows from this column. * - * Normally the Var will have an associated RelOptInfo from which - * we can find out which userid to do the check as; but it might - * not if it's a RETURNING Var for an INSERT target relation. In - * that case use the RTEPermissionInfo associated with the RTE. + * This requires that the user has the appropriate SELECT + * privileges and that there are no securityQuals from security + * barrier views or RLS policies. If that's not the case, then we + * only permit leakproof functions to be passed pg_statistic data + * in vardata, otherwise the functions might reveal data that the + * user doesn't have permission to see --- see + * statistic_proc_security_check(). */ - if (onerel) - userid = onerel->userid; - else - { - RTEPermissionInfo *perminfo; - - perminfo = getRTEPermissionInfo(root->parse->rteperminfos, rte); - userid = perminfo->checkAsUser; - } - if (!OidIsValid(userid)) - userid = GetUserId(); - vardata->acl_ok = - rte->securityQuals == NIL && - ((pg_class_aclcheck(rte->relid, userid, - ACL_SELECT) == ACLCHECK_OK) || - (pg_attribute_aclcheck(rte->relid, var->varattno, userid, - ACL_SELECT) == ACLCHECK_OK)); - - /* - * If the user doesn't have permissions to access an inheritance - * child relation or specifically this attribute, check the - * permissions of the table/column actually mentioned in the - * query, since most likely the user does have that permission - * (else the query will fail at runtime), and if the user can read - * the column there then he can get the values of the child table - * too. To do that, we must find out which of the root parent's - * attributes the child relation's attribute corresponds to. - */ - if (!vardata->acl_ok && var->varattno > 0 && - root->append_rel_array != NULL) - { - AppendRelInfo *appinfo; - Index varno = var->varno; - int varattno = var->varattno; - bool found = false; - - appinfo = root->append_rel_array[varno]; - - /* - * Partitions are mapped to their immediate parent, not the - * root parent, so must be ready to walk up multiple - * AppendRelInfos. But stop if we hit a parent that is not - * RTE_RELATION --- that's a flattened UNION ALL subquery, not - * an inheritance parent. - */ - while (appinfo && - planner_rt_fetch(appinfo->parent_relid, - root)->rtekind == RTE_RELATION) - { - int parent_varattno; - - found = false; - if (varattno <= 0 || varattno > appinfo->num_child_cols) - break; /* safety check */ - parent_varattno = appinfo->parent_colnos[varattno - 1]; - if (parent_varattno == 0) - break; /* Var is local to child */ - - varno = appinfo->parent_relid; - varattno = parent_varattno; - found = true; - - /* If the parent is itself a child, continue up. */ - appinfo = root->append_rel_array[varno]; - } - - /* - * In rare cases, the Var may be local to the child table, in - * which case, we've got to live with having no access to this - * column's stats. - */ - if (!found) - return; - - /* Repeat the access check on this parent rel & column */ - rte = planner_rt_fetch(varno, root); - Assert(rte->rtekind == RTE_RELATION); - - /* - * Fine to use the same userid as it's the same in all - * relations of a given inheritance tree. - */ - vardata->acl_ok = - rte->securityQuals == NIL && - ((pg_class_aclcheck(rte->relid, userid, - ACL_SELECT) == ACLCHECK_OK) || - (pg_attribute_aclcheck(rte->relid, varattno, userid, - ACL_SELECT) == ACLCHECK_OK)); - } + all_rows_selectable(root, var->varno, + bms_make_singleton(var->varattno - FirstLowInvalidHeapAttributeNumber)); } else { @@ -6033,6 +5863,214 @@ examine_simple_variable(PlannerInfo *root, Var *var, } } +/* + * all_rows_selectable + * Test whether the user has permission to select all rows from a given + * relation. + * + * Inputs: + * root: the planner info + * varno: the index of the relation (assumed to be an RTE_RELATION) + * varattnos: the attributes for which permission is required, or NULL if + * whole-table access is required + * + * Returns true if the user has the required select permissions, and there are + * no securityQuals from security barrier views or RLS policies. + * + * Note that if the relation is an inheritance child relation, securityQuals + * and access permissions are checked against the inheritance root parent (the + * relation actually mentioned in the query) --- see the comments in + * expand_single_inheritance_child() for an explanation of why it has to be + * done this way. + * + * If varattnos is non-NULL, its attribute numbers should be offset by + * FirstLowInvalidHeapAttributeNumber so that system attributes can be + * checked. If varattnos is NULL, only table-level SELECT privileges are + * checked, not any column-level privileges. + * + * Note: if the relation is accessed via a view, this function actually tests + * whether the view owner has permission to select from the relation. To + * ensure that the current user has permission, it is also necessary to check + * that the current user has permission to select from the view, which we do + * at planner-startup --- see subquery_planner(). + * + * This is exported so that other estimation functions can use it. + */ +bool +all_rows_selectable(PlannerInfo *root, Index varno, Bitmapset *varattnos) +{ + RelOptInfo *rel = find_base_rel_noerr(root, varno); + RangeTblEntry *rte = planner_rt_fetch(varno, root); + Oid userid; + int varattno; + + Assert(rte->rtekind == RTE_RELATION); + + /* + * Determine the user ID to use for privilege checks (either the current + * user or the view owner, if we're accessing the table via a view). + * + * Normally the relation will have an associated RelOptInfo from which we + * can find the userid, but it might not if it's a RETURNING Var for an + * INSERT target relation. In that case use the RTEPermissionInfo + * associated with the RTE. + * + * If we navigate up to a parent relation, we keep using the same userid, + * since it's the same in all relations of a given inheritance tree. + */ + if (rel) + userid = rel->userid; + else + { + RTEPermissionInfo *perminfo; + + perminfo = getRTEPermissionInfo(root->parse->rteperminfos, rte); + userid = perminfo->checkAsUser; + } + if (!OidIsValid(userid)) + userid = GetUserId(); + + /* + * Permissions and securityQuals must be checked on the table actually + * mentioned in the query, so if this is an inheritance child, navigate up + * to the inheritance root parent. If the user can read the whole table + * or the required columns there, then they can read from the child table + * too. For per-column checks, we must find out which of the root + * parent's attributes the child relation's attributes correspond to. + */ + if (root->append_rel_array != NULL) + { + AppendRelInfo *appinfo; + + appinfo = root->append_rel_array[varno]; + + /* + * Partitions are mapped to their immediate parent, not the root + * parent, so must be ready to walk up multiple AppendRelInfos. But + * stop if we hit a parent that is not RTE_RELATION --- that's a + * flattened UNION ALL subquery, not an inheritance parent. + */ + while (appinfo && + planner_rt_fetch(appinfo->parent_relid, + root)->rtekind == RTE_RELATION) + { + Bitmapset *parent_varattnos = NULL; + + /* + * For each child attribute, find the corresponding parent + * attribute. In rare cases, the attribute may be local to the + * child table, in which case, we've got to live with having no + * access to this column. + */ + varattno = -1; + while ((varattno = bms_next_member(varattnos, varattno)) >= 0) + { + AttrNumber attno; + AttrNumber parent_attno; + + attno = varattno + FirstLowInvalidHeapAttributeNumber; + + if (attno == InvalidAttrNumber) + { + /* + * Whole-row reference, so must map each column of the + * child to the parent table. + */ + for (attno = 1; attno <= appinfo->num_child_cols; attno++) + { + parent_attno = appinfo->parent_colnos[attno - 1]; + if (parent_attno == 0) + return false; /* attr is local to child */ + parent_varattnos = + bms_add_member(parent_varattnos, + parent_attno - FirstLowInvalidHeapAttributeNumber); + } + } + else + { + if (attno < 0) + { + /* System attnos are the same in all tables */ + parent_attno = attno; + } + else + { + if (attno > appinfo->num_child_cols) + return false; /* safety check */ + parent_attno = appinfo->parent_colnos[attno - 1]; + if (parent_attno == 0) + return false; /* attr is local to child */ + } + parent_varattnos = + bms_add_member(parent_varattnos, + parent_attno - FirstLowInvalidHeapAttributeNumber); + } + } + + /* If the parent is itself a child, continue up */ + varno = appinfo->parent_relid; + varattnos = parent_varattnos; + appinfo = root->append_rel_array[varno]; + } + + /* Perform the access check on this parent rel */ + rte = planner_rt_fetch(varno, root); + Assert(rte->rtekind == RTE_RELATION); + } + + /* + * For all rows to be accessible, there must be no securityQuals from + * security barrier views or RLS policies. + */ + if (rte->securityQuals != NIL) + return false; + + /* + * Test for table-level SELECT privilege. + * + * If varattnos is non-NULL, this is sufficient to give access to all + * requested attributes, even for a child table, since we have verified + * that all required child columns have matching parent columns. + * + * If varattnos is NULL (whole-table access requested), this doesn't + * necessarily guarantee that the user can read all columns of a child + * table, but we allow it anyway (see comments in examine_variable()) and + * don't bother checking any column privileges. + */ + if (pg_class_aclcheck(rte->relid, userid, ACL_SELECT) == ACLCHECK_OK) + return true; + + if (varattnos == NULL) + return false; /* whole-table access requested */ + + /* + * Don't have table-level SELECT privilege, so check per-column + * privileges. + */ + varattno = -1; + while ((varattno = bms_next_member(varattnos, varattno)) >= 0) + { + AttrNumber attno = varattno + FirstLowInvalidHeapAttributeNumber; + + if (attno == InvalidAttrNumber) + { + /* Whole-row reference, so must have access to all columns */ + if (pg_attribute_aclcheck_all(rte->relid, userid, ACL_SELECT, + ACLMASK_ALL) != ACLCHECK_OK) + return false; + } + else + { + if (pg_attribute_aclcheck(rte->relid, attno, userid, + ACL_SELECT) != ACLCHECK_OK) + return false; + } + } + + /* If we reach here, have all required column privileges */ + return true; +} + /* * examine_indexcol_variable * Try to look up statistical data about an index column/expression. @@ -6121,15 +6159,17 @@ examine_indexcol_variable(PlannerInfo *root, IndexOptInfo *index, /* * Check whether it is permitted to call func_oid passing some of the - * pg_statistic data in vardata. We allow this either if the user has SELECT - * privileges on the table or column underlying the pg_statistic data or if - * the function is marked leakproof. + * pg_statistic data in vardata. We allow this if either of the following + * conditions is met: (1) the user has SELECT privileges on the table or + * column underlying the pg_statistic data and there are no securityQuals from + * security barrier views or RLS policies, or (2) the function is marked + * leakproof. */ bool statistic_proc_security_check(VariableStatData *vardata, Oid func_oid) { if (vardata->acl_ok) - return true; + return true; /* have SELECT privs and no securityQuals */ if (!OidIsValid(func_oid)) return false; diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h index a71502efeed75..10dcea037c3d0 100644 --- a/src/include/executor/executor.h +++ b/src/include/executor/executor.h @@ -242,6 +242,7 @@ extern void standard_ExecutorEnd(QueryDesc *queryDesc); extern void ExecutorRewind(QueryDesc *queryDesc); extern bool ExecCheckPermissions(List *rangeTable, List *rteperminfos, bool ereport_on_violation); +extern bool ExecCheckOneRelPerms(RTEPermissionInfo *perminfo); extern void CheckValidResultRel(ResultRelInfo *resultRelInfo, CmdType operation, List *mergeActions); extern void InitResultRelInfo(ResultRelInfo *resultRelInfo, diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h index 013049b3098ac..fb4fa53363d7c 100644 --- a/src/include/utils/selfuncs.h +++ b/src/include/utils/selfuncs.h @@ -96,7 +96,8 @@ typedef struct VariableStatData int32 atttypmod; /* actual typmod (after stripping relabel) */ bool isunique; /* matches unique index, DISTINCT or GROUP-BY * clause */ - bool acl_ok; /* result of ACL check on table or column */ + bool acl_ok; /* true if user has SELECT privilege on all + * rows from the table or column */ } VariableStatData; #define ReleaseVariableStats(vardata) \ @@ -153,6 +154,7 @@ extern PGDLLIMPORT get_index_stats_hook_type get_index_stats_hook; extern void examine_variable(PlannerInfo *root, Node *node, int varRelid, VariableStatData *vardata); +extern bool all_rows_selectable(PlannerInfo *root, Index varno, Bitmapset *varattnos); extern bool statistic_proc_security_check(VariableStatData *vardata, Oid func_oid); extern bool get_restriction_variable(PlannerInfo *root, List *args, int varRelid, diff --git a/src/test/regress/expected/privileges.out b/src/test/regress/expected/privileges.out index 602a6b255bc08..845e477da044d 100644 --- a/src/test/regress/expected/privileges.out +++ b/src/test/regress/expected/privileges.out @@ -513,8 +513,6 @@ CREATE VIEW atest12v AS SELECT * FROM atest12 WHERE b <<< 5; CREATE VIEW atest12sbv WITH (security_barrier=true) AS SELECT * FROM atest12 WHERE b <<< 5; -GRANT SELECT ON atest12v TO PUBLIC; -GRANT SELECT ON atest12sbv TO PUBLIC; -- This plan should use nestloop, knowing that few rows will be selected. EXPLAIN (COSTS OFF) SELECT * FROM atest12v x, atest12v y WHERE x.a = y.b; QUERY PLAN @@ -560,9 +558,18 @@ CREATE FUNCTION leak2(integer,integer) RETURNS boolean LANGUAGE plpgsql immutable; CREATE OPERATOR >>> (procedure = leak2, leftarg = integer, rightarg = integer, restrict = scalargtsel); --- This should not show any "leak" notices before failing. +-- These should not show any "leak" notices before failing. EXPLAIN (COSTS OFF) SELECT * FROM atest12 WHERE a >>> 0; ERROR: permission denied for table atest12 +EXPLAIN (COSTS OFF) SELECT * FROM atest12v WHERE a >>> 0; +ERROR: permission denied for view atest12v +EXPLAIN (COSTS OFF) SELECT * FROM atest12sbv WHERE a >>> 0; +ERROR: permission denied for view atest12sbv +-- Now regress_priv_user1 grants access to regress_priv_user2 via the views. +SET SESSION AUTHORIZATION regress_priv_user1; +GRANT SELECT ON atest12v TO PUBLIC; +GRANT SELECT ON atest12sbv TO PUBLIC; +SET SESSION AUTHORIZATION regress_priv_user2; -- These plans should continue to use a nestloop, since they execute with the -- privileges of the view owner. EXPLAIN (COSTS OFF) SELECT * FROM atest12v x, atest12v y WHERE x.a = y.b; diff --git a/src/test/regress/expected/rowsecurity.out b/src/test/regress/expected/rowsecurity.out index 1c4e37d22493d..8c87950931315 100644 --- a/src/test/regress/expected/rowsecurity.out +++ b/src/test/regress/expected/rowsecurity.out @@ -4506,7 +4506,7 @@ RESET SESSION AUTHORIZATION; DROP VIEW rls_view; DROP TABLE rls_tbl; DROP TABLE ref_tbl; --- Leaky operator test +-- Leaky operator tests CREATE TABLE rls_tbl (a int); INSERT INTO rls_tbl SELECT x/10 FROM generate_series(1, 100) x; ANALYZE rls_tbl; @@ -4530,9 +4530,80 @@ EXPLAIN (COSTS OFF) SELECT * FROM rls_tbl WHERE a <<< 1000 or a <<< 900; One-Time Filter: false (2 rows) +RESET SESSION AUTHORIZATION; +CREATE TABLE rls_child_tbl () INHERITS (rls_tbl); +INSERT INTO rls_child_tbl SELECT x/10 FROM generate_series(1, 100) x; +ANALYZE rls_child_tbl; +CREATE TABLE rls_ptbl (a int) PARTITION BY RANGE (a); +CREATE TABLE rls_part PARTITION OF rls_ptbl FOR VALUES FROM (-100) TO (100); +INSERT INTO rls_ptbl SELECT x/10 FROM generate_series(1, 100) x; +ANALYZE rls_ptbl, rls_part; +ALTER TABLE rls_ptbl ENABLE ROW LEVEL SECURITY; +ALTER TABLE rls_part ENABLE ROW LEVEL SECURITY; +GRANT SELECT ON rls_ptbl TO regress_rls_alice; +GRANT SELECT ON rls_part TO regress_rls_alice; +CREATE POLICY p1 ON rls_tbl USING (a < 0); +CREATE POLICY p2 ON rls_ptbl USING (a < 0); +CREATE POLICY p3 ON rls_part USING (a < 0); +SET SESSION AUTHORIZATION regress_rls_alice; +SELECT * FROM rls_tbl WHERE a <<< 1000; + a +--- +(0 rows) + +SELECT * FROM rls_child_tbl WHERE a <<< 1000; +ERROR: permission denied for table rls_child_tbl +SELECT * FROM rls_ptbl WHERE a <<< 1000; + a +--- +(0 rows) + +SELECT * FROM rls_part WHERE a <<< 1000; + a +--- +(0 rows) + +SELECT * FROM (SELECT * FROM rls_tbl UNION ALL + SELECT * FROM rls_tbl) t WHERE a <<< 1000; + a +--- +(0 rows) + +SELECT * FROM (SELECT * FROM rls_child_tbl UNION ALL + SELECT * FROM rls_child_tbl) t WHERE a <<< 1000; +ERROR: permission denied for table rls_child_tbl +RESET SESSION AUTHORIZATION; +REVOKE SELECT ON rls_tbl FROM regress_rls_alice; +CREATE VIEW rls_tbl_view AS SELECT * FROM rls_tbl; +ALTER TABLE rls_child_tbl ENABLE ROW LEVEL SECURITY; +GRANT SELECT ON rls_child_tbl TO regress_rls_alice; +CREATE POLICY p4 ON rls_child_tbl USING (a < 0); +SET SESSION AUTHORIZATION regress_rls_alice; +SELECT * FROM rls_tbl WHERE a <<< 1000; +ERROR: permission denied for table rls_tbl +SELECT * FROM rls_tbl_view WHERE a <<< 1000; +ERROR: permission denied for view rls_tbl_view +SELECT * FROM rls_child_tbl WHERE a <<< 1000; + a +--- +(0 rows) + +SELECT * FROM (SELECT * FROM rls_tbl UNION ALL + SELECT * FROM rls_tbl) t WHERE a <<< 1000; +ERROR: permission denied for table rls_tbl +SELECT * FROM (SELECT * FROM rls_child_tbl UNION ALL + SELECT * FROM rls_child_tbl) t WHERE a <<< 1000; + a +--- +(0 rows) + DROP OPERATOR <<< (int, int); DROP FUNCTION op_leak(int, int); RESET SESSION AUTHORIZATION; +DROP TABLE rls_part; +DROP TABLE rls_ptbl; +DROP TABLE rls_child_tbl; +DROP VIEW rls_tbl_view; DROP TABLE rls_tbl; -- Bug #16006: whole-row Vars in a policy don't play nice with sub-selects SET SESSION AUTHORIZATION regress_rls_alice; diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out index 6359e5fb689cb..c66e09f8b1651 100644 --- a/src/test/regress/expected/stats_ext.out +++ b/src/test/regress/expected/stats_ext.out @@ -3275,9 +3275,17 @@ CREATE FUNCTION op_leak(int, int) RETURNS bool LANGUAGE plpgsql; CREATE OPERATOR <<< (procedure = op_leak, leftarg = int, rightarg = int, restrict = scalarltsel); +CREATE FUNCTION op_leak(record, record) RETURNS bool + AS 'BEGIN RAISE NOTICE ''op_leak => %, %'', $1, $2; RETURN $1 < $2; END' + LANGUAGE plpgsql; +CREATE OPERATOR <<< (procedure = op_leak, leftarg = record, rightarg = record, + restrict = scalarltsel); SELECT * FROM tststats.priv_test_tbl WHERE a <<< 0 AND b <<< 0; -- Permission denied ERROR: permission denied for table priv_test_tbl -SELECT * FROM tststats.priv_test_tbl WHERE a <<< 0 OR b <<< 0; +SELECT * FROM tststats.priv_test_tbl WHERE a <<< 0 OR b <<< 0; -- Permission denied +ERROR: permission denied for table priv_test_tbl +SELECT * FROM tststats.priv_test_tbl t + WHERE a <<< 0 AND (b <<< 0 OR t.* <<< (1, 1) IS NOT NULL); -- Permission denied ERROR: permission denied for table priv_test_tbl DELETE FROM tststats.priv_test_tbl WHERE a <<< 0 AND b <<< 0; -- Permission denied ERROR: permission denied for table priv_test_tbl @@ -3298,10 +3306,17 @@ SELECT * FROM tststats.priv_test_view WHERE a <<< 0 OR b <<< 0; -- Should not le ---+--- (0 rows) +SELECT * FROM tststats.priv_test_view t + WHERE a <<< 0 AND (b <<< 0 OR t.* <<< (1, 1) IS NOT NULL); -- Should not leak + a | b +---+--- +(0 rows) + DELETE FROM tststats.priv_test_view WHERE a <<< 0 AND b <<< 0; -- Should not leak -- Grant table access, but hide all data with RLS RESET SESSION AUTHORIZATION; ALTER TABLE tststats.priv_test_tbl ENABLE ROW LEVEL SECURITY; +CREATE POLICY priv_test_tbl_pol ON tststats.priv_test_tbl USING (2 * a < 0); GRANT SELECT, DELETE ON tststats.priv_test_tbl TO regress_stats_user1; -- Should now have direct table access, but see nothing and leak nothing SET SESSION AUTHORIZATION regress_stats_user1; @@ -3310,12 +3325,57 @@ SELECT * FROM tststats.priv_test_tbl WHERE a <<< 0 AND b <<< 0; -- Should not le ---+--- (0 rows) -SELECT * FROM tststats.priv_test_tbl WHERE a <<< 0 OR b <<< 0; +SELECT * FROM tststats.priv_test_tbl WHERE a <<< 0 OR b <<< 0; -- Should not leak + a | b +---+--- +(0 rows) + +SELECT * FROM tststats.priv_test_tbl t + WHERE a <<< 0 AND (b <<< 0 OR t.* <<< (1, 1) IS NOT NULL); -- Should not leak a | b ---+--- (0 rows) DELETE FROM tststats.priv_test_tbl WHERE a <<< 0 AND b <<< 0; -- Should not leak +-- Create plain inheritance parent table with no access permissions +RESET SESSION AUTHORIZATION; +CREATE TABLE tststats.priv_test_parent_tbl (a int, b int); +ALTER TABLE tststats.priv_test_tbl INHERIT tststats.priv_test_parent_tbl; +-- Should not have access to parent, and should leak nothing +SET SESSION AUTHORIZATION regress_stats_user1; +SELECT * FROM tststats.priv_test_parent_tbl WHERE a <<< 0 AND b <<< 0; -- Permission denied +ERROR: permission denied for table priv_test_parent_tbl +SELECT * FROM tststats.priv_test_parent_tbl WHERE a <<< 0 OR b <<< 0; -- Permission denied +ERROR: permission denied for table priv_test_parent_tbl +SELECT * FROM tststats.priv_test_parent_tbl t + WHERE a <<< 0 AND (b <<< 0 OR t.* <<< (1, 1) IS NOT NULL); -- Permission denied +ERROR: permission denied for table priv_test_parent_tbl +DELETE FROM tststats.priv_test_parent_tbl WHERE a <<< 0 AND b <<< 0; -- Permission denied +ERROR: permission denied for table priv_test_parent_tbl +-- Grant table access to parent, but hide all data with RLS +RESET SESSION AUTHORIZATION; +ALTER TABLE tststats.priv_test_parent_tbl ENABLE ROW LEVEL SECURITY; +CREATE POLICY priv_test_parent_tbl_pol ON tststats.priv_test_parent_tbl USING (2 * a < 0); +GRANT SELECT, DELETE ON tststats.priv_test_parent_tbl TO regress_stats_user1; +-- Should now have direct table access to parent, but see nothing and leak nothing +SET SESSION AUTHORIZATION regress_stats_user1; +SELECT * FROM tststats.priv_test_parent_tbl WHERE a <<< 0 AND b <<< 0; -- Should not leak + a | b +---+--- +(0 rows) + +SELECT * FROM tststats.priv_test_parent_tbl WHERE a <<< 0 OR b <<< 0; -- Should not leak + a | b +---+--- +(0 rows) + +SELECT * FROM tststats.priv_test_parent_tbl t + WHERE a <<< 0 AND (b <<< 0 OR t.* <<< (1, 1) IS NOT NULL); -- Should not leak + a | b +---+--- +(0 rows) + +DELETE FROM tststats.priv_test_parent_tbl WHERE a <<< 0 AND b <<< 0; -- Should not leak -- privilege checks for pg_stats_ext and pg_stats_ext_exprs RESET SESSION AUTHORIZATION; CREATE TABLE stats_ext_tbl (id INT PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY, col TEXT); @@ -3361,11 +3421,14 @@ SELECT statistics_name, most_common_vals FROM pg_stats_ext_exprs x -- Tidy up DROP OPERATOR <<< (int, int); DROP FUNCTION op_leak(int, int); +DROP OPERATOR <<< (record, record); +DROP FUNCTION op_leak(record, record); RESET SESSION AUTHORIZATION; DROP TABLE stats_ext_tbl; DROP SCHEMA tststats CASCADE; -NOTICE: drop cascades to 2 other objects -DETAIL: drop cascades to table tststats.priv_test_tbl +NOTICE: drop cascades to 3 other objects +DETAIL: drop cascades to table tststats.priv_test_parent_tbl +drop cascades to table tststats.priv_test_tbl drop cascades to view tststats.priv_test_view DROP USER regress_stats_user1; CREATE TABLE grouping_unique (x integer); diff --git a/src/test/regress/sql/privileges.sql b/src/test/regress/sql/privileges.sql index 3eacc1340aad2..661cf186cfd59 100644 --- a/src/test/regress/sql/privileges.sql +++ b/src/test/regress/sql/privileges.sql @@ -346,8 +346,6 @@ CREATE VIEW atest12v AS SELECT * FROM atest12 WHERE b <<< 5; CREATE VIEW atest12sbv WITH (security_barrier=true) AS SELECT * FROM atest12 WHERE b <<< 5; -GRANT SELECT ON atest12v TO PUBLIC; -GRANT SELECT ON atest12sbv TO PUBLIC; -- This plan should use nestloop, knowing that few rows will be selected. EXPLAIN (COSTS OFF) SELECT * FROM atest12v x, atest12v y WHERE x.a = y.b; @@ -369,8 +367,16 @@ CREATE FUNCTION leak2(integer,integer) RETURNS boolean CREATE OPERATOR >>> (procedure = leak2, leftarg = integer, rightarg = integer, restrict = scalargtsel); --- This should not show any "leak" notices before failing. +-- These should not show any "leak" notices before failing. EXPLAIN (COSTS OFF) SELECT * FROM atest12 WHERE a >>> 0; +EXPLAIN (COSTS OFF) SELECT * FROM atest12v WHERE a >>> 0; +EXPLAIN (COSTS OFF) SELECT * FROM atest12sbv WHERE a >>> 0; + +-- Now regress_priv_user1 grants access to regress_priv_user2 via the views. +SET SESSION AUTHORIZATION regress_priv_user1; +GRANT SELECT ON atest12v TO PUBLIC; +GRANT SELECT ON atest12sbv TO PUBLIC; +SET SESSION AUTHORIZATION regress_priv_user2; -- These plans should continue to use a nestloop, since they execute with the -- privileges of the view owner. diff --git a/src/test/regress/sql/rowsecurity.sql b/src/test/regress/sql/rowsecurity.sql index 9da967a9ef2f5..21ac0ca51eed1 100644 --- a/src/test/regress/sql/rowsecurity.sql +++ b/src/test/regress/sql/rowsecurity.sql @@ -2189,7 +2189,7 @@ DROP VIEW rls_view; DROP TABLE rls_tbl; DROP TABLE ref_tbl; --- Leaky operator test +-- Leaky operator tests CREATE TABLE rls_tbl (a int); INSERT INTO rls_tbl SELECT x/10 FROM generate_series(1, 100) x; ANALYZE rls_tbl; @@ -2205,9 +2205,58 @@ CREATE OPERATOR <<< (procedure = op_leak, leftarg = int, rightarg = int, restrict = scalarltsel); SELECT * FROM rls_tbl WHERE a <<< 1000; EXPLAIN (COSTS OFF) SELECT * FROM rls_tbl WHERE a <<< 1000 or a <<< 900; +RESET SESSION AUTHORIZATION; + +CREATE TABLE rls_child_tbl () INHERITS (rls_tbl); +INSERT INTO rls_child_tbl SELECT x/10 FROM generate_series(1, 100) x; +ANALYZE rls_child_tbl; + +CREATE TABLE rls_ptbl (a int) PARTITION BY RANGE (a); +CREATE TABLE rls_part PARTITION OF rls_ptbl FOR VALUES FROM (-100) TO (100); +INSERT INTO rls_ptbl SELECT x/10 FROM generate_series(1, 100) x; +ANALYZE rls_ptbl, rls_part; + +ALTER TABLE rls_ptbl ENABLE ROW LEVEL SECURITY; +ALTER TABLE rls_part ENABLE ROW LEVEL SECURITY; +GRANT SELECT ON rls_ptbl TO regress_rls_alice; +GRANT SELECT ON rls_part TO regress_rls_alice; +CREATE POLICY p1 ON rls_tbl USING (a < 0); +CREATE POLICY p2 ON rls_ptbl USING (a < 0); +CREATE POLICY p3 ON rls_part USING (a < 0); + +SET SESSION AUTHORIZATION regress_rls_alice; +SELECT * FROM rls_tbl WHERE a <<< 1000; +SELECT * FROM rls_child_tbl WHERE a <<< 1000; +SELECT * FROM rls_ptbl WHERE a <<< 1000; +SELECT * FROM rls_part WHERE a <<< 1000; +SELECT * FROM (SELECT * FROM rls_tbl UNION ALL + SELECT * FROM rls_tbl) t WHERE a <<< 1000; +SELECT * FROM (SELECT * FROM rls_child_tbl UNION ALL + SELECT * FROM rls_child_tbl) t WHERE a <<< 1000; +RESET SESSION AUTHORIZATION; + +REVOKE SELECT ON rls_tbl FROM regress_rls_alice; +CREATE VIEW rls_tbl_view AS SELECT * FROM rls_tbl; + +ALTER TABLE rls_child_tbl ENABLE ROW LEVEL SECURITY; +GRANT SELECT ON rls_child_tbl TO regress_rls_alice; +CREATE POLICY p4 ON rls_child_tbl USING (a < 0); + +SET SESSION AUTHORIZATION regress_rls_alice; +SELECT * FROM rls_tbl WHERE a <<< 1000; +SELECT * FROM rls_tbl_view WHERE a <<< 1000; +SELECT * FROM rls_child_tbl WHERE a <<< 1000; +SELECT * FROM (SELECT * FROM rls_tbl UNION ALL + SELECT * FROM rls_tbl) t WHERE a <<< 1000; +SELECT * FROM (SELECT * FROM rls_child_tbl UNION ALL + SELECT * FROM rls_child_tbl) t WHERE a <<< 1000; DROP OPERATOR <<< (int, int); DROP FUNCTION op_leak(int, int); RESET SESSION AUTHORIZATION; +DROP TABLE rls_part; +DROP TABLE rls_ptbl; +DROP TABLE rls_child_tbl; +DROP VIEW rls_tbl_view; DROP TABLE rls_tbl; -- Bug #16006: whole-row Vars in a policy don't play nice with sub-selects diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql index da4f2fe9c938f..9ce4c670ecbcd 100644 --- a/src/test/regress/sql/stats_ext.sql +++ b/src/test/regress/sql/stats_ext.sql @@ -1647,8 +1647,15 @@ CREATE FUNCTION op_leak(int, int) RETURNS bool LANGUAGE plpgsql; CREATE OPERATOR <<< (procedure = op_leak, leftarg = int, rightarg = int, restrict = scalarltsel); +CREATE FUNCTION op_leak(record, record) RETURNS bool + AS 'BEGIN RAISE NOTICE ''op_leak => %, %'', $1, $2; RETURN $1 < $2; END' + LANGUAGE plpgsql; +CREATE OPERATOR <<< (procedure = op_leak, leftarg = record, rightarg = record, + restrict = scalarltsel); SELECT * FROM tststats.priv_test_tbl WHERE a <<< 0 AND b <<< 0; -- Permission denied -SELECT * FROM tststats.priv_test_tbl WHERE a <<< 0 OR b <<< 0; +SELECT * FROM tststats.priv_test_tbl WHERE a <<< 0 OR b <<< 0; -- Permission denied +SELECT * FROM tststats.priv_test_tbl t + WHERE a <<< 0 AND (b <<< 0 OR t.* <<< (1, 1) IS NOT NULL); -- Permission denied DELETE FROM tststats.priv_test_tbl WHERE a <<< 0 AND b <<< 0; -- Permission denied -- Grant access via a security barrier view, but hide all data @@ -1661,19 +1668,51 @@ GRANT SELECT, DELETE ON tststats.priv_test_view TO regress_stats_user1; SET SESSION AUTHORIZATION regress_stats_user1; SELECT * FROM tststats.priv_test_view WHERE a <<< 0 AND b <<< 0; -- Should not leak SELECT * FROM tststats.priv_test_view WHERE a <<< 0 OR b <<< 0; -- Should not leak +SELECT * FROM tststats.priv_test_view t + WHERE a <<< 0 AND (b <<< 0 OR t.* <<< (1, 1) IS NOT NULL); -- Should not leak DELETE FROM tststats.priv_test_view WHERE a <<< 0 AND b <<< 0; -- Should not leak -- Grant table access, but hide all data with RLS RESET SESSION AUTHORIZATION; ALTER TABLE tststats.priv_test_tbl ENABLE ROW LEVEL SECURITY; +CREATE POLICY priv_test_tbl_pol ON tststats.priv_test_tbl USING (2 * a < 0); GRANT SELECT, DELETE ON tststats.priv_test_tbl TO regress_stats_user1; -- Should now have direct table access, but see nothing and leak nothing SET SESSION AUTHORIZATION regress_stats_user1; SELECT * FROM tststats.priv_test_tbl WHERE a <<< 0 AND b <<< 0; -- Should not leak -SELECT * FROM tststats.priv_test_tbl WHERE a <<< 0 OR b <<< 0; +SELECT * FROM tststats.priv_test_tbl WHERE a <<< 0 OR b <<< 0; -- Should not leak +SELECT * FROM tststats.priv_test_tbl t + WHERE a <<< 0 AND (b <<< 0 OR t.* <<< (1, 1) IS NOT NULL); -- Should not leak DELETE FROM tststats.priv_test_tbl WHERE a <<< 0 AND b <<< 0; -- Should not leak +-- Create plain inheritance parent table with no access permissions +RESET SESSION AUTHORIZATION; +CREATE TABLE tststats.priv_test_parent_tbl (a int, b int); +ALTER TABLE tststats.priv_test_tbl INHERIT tststats.priv_test_parent_tbl; + +-- Should not have access to parent, and should leak nothing +SET SESSION AUTHORIZATION regress_stats_user1; +SELECT * FROM tststats.priv_test_parent_tbl WHERE a <<< 0 AND b <<< 0; -- Permission denied +SELECT * FROM tststats.priv_test_parent_tbl WHERE a <<< 0 OR b <<< 0; -- Permission denied +SELECT * FROM tststats.priv_test_parent_tbl t + WHERE a <<< 0 AND (b <<< 0 OR t.* <<< (1, 1) IS NOT NULL); -- Permission denied +DELETE FROM tststats.priv_test_parent_tbl WHERE a <<< 0 AND b <<< 0; -- Permission denied + +-- Grant table access to parent, but hide all data with RLS +RESET SESSION AUTHORIZATION; +ALTER TABLE tststats.priv_test_parent_tbl ENABLE ROW LEVEL SECURITY; +CREATE POLICY priv_test_parent_tbl_pol ON tststats.priv_test_parent_tbl USING (2 * a < 0); +GRANT SELECT, DELETE ON tststats.priv_test_parent_tbl TO regress_stats_user1; + +-- Should now have direct table access to parent, but see nothing and leak nothing +SET SESSION AUTHORIZATION regress_stats_user1; +SELECT * FROM tststats.priv_test_parent_tbl WHERE a <<< 0 AND b <<< 0; -- Should not leak +SELECT * FROM tststats.priv_test_parent_tbl WHERE a <<< 0 OR b <<< 0; -- Should not leak +SELECT * FROM tststats.priv_test_parent_tbl t + WHERE a <<< 0 AND (b <<< 0 OR t.* <<< (1, 1) IS NOT NULL); -- Should not leak +DELETE FROM tststats.priv_test_parent_tbl WHERE a <<< 0 AND b <<< 0; -- Should not leak + -- privilege checks for pg_stats_ext and pg_stats_ext_exprs RESET SESSION AUTHORIZATION; CREATE TABLE stats_ext_tbl (id INT PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY, col TEXT); @@ -1703,6 +1742,8 @@ SELECT statistics_name, most_common_vals FROM pg_stats_ext_exprs x -- Tidy up DROP OPERATOR <<< (int, int); DROP FUNCTION op_leak(int, int); +DROP OPERATOR <<< (record, record); +DROP FUNCTION op_leak(record, record); RESET SESSION AUTHORIZATION; DROP TABLE stats_ext_tbl; DROP SCHEMA tststats CASCADE; From 70693c645f6e490b9ed450e8611e94ab7af3aad2 Mon Sep 17 00:00:00 2001 From: Noah Misch Date: Mon, 11 Aug 2025 06:18:59 -0700 Subject: [PATCH 485/602] Convert newlines to spaces in names written in v11+ pg_dump comments. Maliciously-crafted object names could achieve SQL injection during restore. CVE-2012-0868 fixed this class of problem at the time, but later work reintroduced three cases. Commit bc8cd50fefd369b217f80078585c486505aafb62 (back-patched to v11+ in 2023-05 releases) introduced the pg_dump case. Commit 6cbdbd9e8d8f2986fde44f2431ed8d0c8fce7f5d (v12+) introduced the two pg_dumpall cases. Move sanitize_line(), unchanged, to dumputils.c so pg_dumpall has access to it in all supported versions. Back-patch to v13 (all supported versions). Reviewed-by: Robert Haas Reviewed-by: Nathan Bossart Backpatch-through: 13 Security: CVE-2025-8715 --- src/bin/pg_dump/dumputils.c | 37 ++++++++++++++++++++ src/bin/pg_dump/dumputils.h | 1 + src/bin/pg_dump/pg_backup_archiver.c | 37 -------------------- src/bin/pg_dump/pg_dump.c | 5 ++- src/bin/pg_dump/pg_dumpall.c | 13 +++++-- src/bin/pg_dump/t/002_pg_dump.pl | 21 +++++++++++ src/bin/pg_dump/t/003_pg_dump_with_server.pl | 16 +++++++++ 7 files changed, 90 insertions(+), 40 deletions(-) diff --git a/src/bin/pg_dump/dumputils.c b/src/bin/pg_dump/dumputils.c index 73ce34346b278..4c02a9863c471 100644 --- a/src/bin/pg_dump/dumputils.c +++ b/src/bin/pg_dump/dumputils.c @@ -31,6 +31,43 @@ static void AddAcl(PQExpBuffer aclbuf, const char *keyword, const char *subname); +/* + * Sanitize a string to be included in an SQL comment or TOC listing, by + * replacing any newlines with spaces. This ensures each logical output line + * is in fact one physical output line, to prevent corruption of the dump + * (which could, in the worst case, present an SQL injection vulnerability + * if someone were to incautiously load a dump containing objects with + * maliciously crafted names). + * + * The result is a freshly malloc'd string. If the input string is NULL, + * return a malloc'ed empty string, unless want_hyphen, in which case return a + * malloc'ed hyphen. + * + * Note that we currently don't bother to quote names, meaning that the name + * fields aren't automatically parseable. "pg_restore -L" doesn't care because + * it only examines the dumpId field, but someday we might want to try harder. + */ +char * +sanitize_line(const char *str, bool want_hyphen) +{ + char *result; + char *s; + + if (!str) + return pg_strdup(want_hyphen ? "-" : ""); + + result = pg_strdup(str); + + for (s = result; *s != '\0'; s++) + { + if (*s == '\n' || *s == '\r') + *s = ' '; + } + + return result; +} + + /* * Build GRANT/REVOKE command(s) for an object. * diff --git a/src/bin/pg_dump/dumputils.h b/src/bin/pg_dump/dumputils.h index 91c6e612e282e..71dd3f12a8dd1 100644 --- a/src/bin/pg_dump/dumputils.h +++ b/src/bin/pg_dump/dumputils.h @@ -36,6 +36,7 @@ #endif +extern char *sanitize_line(const char *str, bool want_hyphen); extern bool buildACLCommands(const char *name, const char *subname, const char *nspname, const char *type, const char *acls, const char *baseacls, const char *owner, const char *prefix, int remoteVersion, diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c index dce88f040ace3..248e4151552e5 100644 --- a/src/bin/pg_dump/pg_backup_archiver.c +++ b/src/bin/pg_dump/pg_backup_archiver.c @@ -59,7 +59,6 @@ static ArchiveHandle *_allocAH(const char *FileSpec, const ArchiveFormat fmt, DataDirSyncMethod sync_method); static void _getObjectDescription(PQExpBuffer buf, const TocEntry *te); static void _printTocEntry(ArchiveHandle *AH, TocEntry *te, const char *pfx); -static char *sanitize_line(const char *str, bool want_hyphen); static void _doSetFixedOutputState(ArchiveHandle *AH); static void _doSetSessionAuth(ArchiveHandle *AH, const char *user); static void _reconnectToDB(ArchiveHandle *AH, const char *dbname); @@ -4050,42 +4049,6 @@ _printTocEntry(ArchiveHandle *AH, TocEntry *te, const char *pfx) } } -/* - * Sanitize a string to be included in an SQL comment or TOC listing, by - * replacing any newlines with spaces. This ensures each logical output line - * is in fact one physical output line, to prevent corruption of the dump - * (which could, in the worst case, present an SQL injection vulnerability - * if someone were to incautiously load a dump containing objects with - * maliciously crafted names). - * - * The result is a freshly malloc'd string. If the input string is NULL, - * return a malloc'ed empty string, unless want_hyphen, in which case return a - * malloc'ed hyphen. - * - * Note that we currently don't bother to quote names, meaning that the name - * fields aren't automatically parseable. "pg_restore -L" doesn't care because - * it only examines the dumpId field, but someday we might want to try harder. - */ -static char * -sanitize_line(const char *str, bool want_hyphen) -{ - char *result; - char *s; - - if (!str) - return pg_strdup(want_hyphen ? "-" : ""); - - result = pg_strdup(str); - - for (s = result; *s != '\0'; s++) - { - if (*s == '\n' || *s == '\r') - *s = ' '; - } - - return result; -} - /* * Write the file header for a custom-format archive */ diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index f3a353a61a58e..66380a6d3012e 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -2840,11 +2840,14 @@ dumpTableData(Archive *fout, const TableDataInfo *tdinfo) forcePartitionRootLoad(tbinfo))) { TableInfo *parentTbinfo; + char *sanitized; parentTbinfo = getRootTableInfo(tbinfo); copyFrom = fmtQualifiedDumpable(parentTbinfo); + sanitized = sanitize_line(copyFrom, true); printfPQExpBuffer(copyBuf, "-- load via partition root %s", - copyFrom); + sanitized); + free(sanitized); tdDefn = pg_strdup(copyBuf->data); } else diff --git a/src/bin/pg_dump/pg_dumpall.c b/src/bin/pg_dump/pg_dumpall.c index 27aa1b656989c..3e7f930c9402a 100644 --- a/src/bin/pg_dump/pg_dumpall.c +++ b/src/bin/pg_dump/pg_dumpall.c @@ -1492,7 +1492,13 @@ dumpUserConfig(PGconn *conn, const char *username) res = executeQuery(conn, buf->data); if (PQntuples(res) > 0) - fprintf(OPF, "\n--\n-- User Config \"%s\"\n--\n\n", username); + { + char *sanitized; + + sanitized = sanitize_line(username, true); + fprintf(OPF, "\n--\n-- User Config \"%s\"\n--\n\n", sanitized); + free(sanitized); + } for (int i = 0; i < PQntuples(res); i++) { @@ -1594,6 +1600,7 @@ dumpDatabases(PGconn *conn) for (i = 0; i < PQntuples(res); i++) { char *dbname = PQgetvalue(res, i, 0); + char *sanitized; const char *create_opts; int ret; @@ -1610,7 +1617,9 @@ dumpDatabases(PGconn *conn) pg_log_info("dumping database \"%s\"", dbname); - fprintf(OPF, "--\n-- Database \"%s\" dump\n--\n\n", dbname); + sanitized = sanitize_line(dbname, true); + fprintf(OPF, "--\n-- Database \"%s\" dump\n--\n\n", sanitized); + free(sanitized); /* * We assume that "template1" and "postgres" already exist in the diff --git a/src/bin/pg_dump/t/002_pg_dump.pl b/src/bin/pg_dump/t/002_pg_dump.pl index a86b38466de14..e3257231a9efc 100644 --- a/src/bin/pg_dump/t/002_pg_dump.pl +++ b/src/bin/pg_dump/t/002_pg_dump.pl @@ -2224,6 +2224,27 @@ }, }, + 'newline of role or table name in comment' => { + create_sql => qq{CREATE ROLE regress_newline; + ALTER ROLE regress_newline SET enable_seqscan = off; + ALTER ROLE regress_newline + RENAME TO "regress_newline\nattack"; + + -- meet getPartitioningInfo() "unsafe" condition + CREATE TYPE pp_colors AS + ENUM ('green', 'blue', 'black'); + CREATE TABLE pp_enumpart (a pp_colors) + PARTITION BY HASH (a); + CREATE TABLE pp_enumpart1 PARTITION OF pp_enumpart + FOR VALUES WITH (MODULUS 2, REMAINDER 0); + CREATE TABLE pp_enumpart2 PARTITION OF pp_enumpart + FOR VALUES WITH (MODULUS 2, REMAINDER 1); + ALTER TABLE pp_enumpart + RENAME TO "pp_enumpart\nattack";}, + regexp => qr/\n--[^\n]*\nattack/s, + like => {}, + }, + 'CREATE TABLESPACE regress_dump_tablespace' => { create_order => 2, create_sql => q( diff --git a/src/bin/pg_dump/t/003_pg_dump_with_server.pl b/src/bin/pg_dump/t/003_pg_dump_with_server.pl index 8dc014ed6ed34..c83d3899dfbf7 100644 --- a/src/bin/pg_dump/t/003_pg_dump_with_server.pl +++ b/src/bin/pg_dump/t/003_pg_dump_with_server.pl @@ -16,6 +16,22 @@ $node->init; $node->start; +######################################### +# pg_dumpall: newline in database name + +$node->safe_psql('postgres', qq{CREATE DATABASE "regress_\nattack"}); + +my (@cmd, $stdout, $stderr); +@cmd = ("pg_dumpall", '--port' => $port, '--exclude-database=postgres'); +print("# Running: " . join(" ", @cmd) . "\n"); +my $result = IPC::Run::run \@cmd, '>' => \$stdout, '2>' => \$stderr; +ok(!$result, "newline in dbname: exit code not 0"); +like( + $stderr, + qr/shell command argument contains a newline/, + "newline in dbname: stderr matches"); +unlike($stdout, qr/^attack/m, "newline in dbname: no comment escape"); + ######################################### # Verify that dumping foreign data includes only foreign tables of # matching servers From 71ea0d6795438f95f4ee6e35867058c44b270d51 Mon Sep 17 00:00:00 2001 From: Nathan Bossart Date: Mon, 11 Aug 2025 09:00:00 -0500 Subject: [PATCH 486/602] Restrict psql meta-commands in plain-text dumps. A malicious server could inject psql meta-commands into plain-text dump output (i.e., scripts created with pg_dump --format=plain, pg_dumpall, or pg_restore --file) that are run at restore time on the machine running psql. To fix, introduce a new "restricted" mode in psql that blocks all meta-commands (except for \unrestrict to exit the mode), and teach pg_dump, pg_dumpall, and pg_restore to use this mode in plain-text dumps. While at it, encourage users to only restore dumps generated from trusted servers or to inspect it beforehand, since restoring causes the destination to execute arbitrary code of the source superusers' choice. However, the client running the dump and restore needn't trust the source or destination superusers. Reported-by: Martin Rakhmanov Reported-by: Matthieu Denais Reported-by: RyotaK Suggested-by: Tom Lane Reviewed-by: Noah Misch Reviewed-by: Michael Paquier Reviewed-by: Peter Eisentraut Security: CVE-2025-8714 Backpatch-through: 13 --- doc/src/sgml/ref/pg_dump.sgml | 35 +++++++ doc/src/sgml/ref/pg_dumpall.sgml | 30 ++++++ doc/src/sgml/ref/pg_restore.sgml | 34 +++++++ doc/src/sgml/ref/pgupgrade.sgml | 8 ++ doc/src/sgml/ref/psql-ref.sgml | 36 +++++++ .../pg_combinebackup/t/002_compare_backups.pl | 2 + src/bin/pg_dump/dumputils.c | 38 ++++++++ src/bin/pg_dump/dumputils.h | 3 + src/bin/pg_dump/pg_backup.h | 4 + src/bin/pg_dump/pg_backup_archiver.c | 32 ++++++- src/bin/pg_dump/pg_dump.c | 21 +++++ src/bin/pg_dump/pg_dumpall.c | 36 +++++++ src/bin/pg_dump/pg_restore.c | 23 +++++ src/bin/pg_dump/t/002_pg_dump.pl | 26 +++-- src/bin/pg_upgrade/t/002_pg_upgrade.pl | 3 + src/bin/psql/command.c | 94 ++++++++++++++++++- src/bin/psql/help.c | 4 + src/bin/psql/t/001_basic.pl | 7 ++ src/bin/psql/tab-complete.in.c | 4 +- src/test/recovery/t/027_stream_regress.pl | 4 + src/test/regress/expected/psql.out | 2 + src/test/regress/sql/psql.sql | 2 + 22 files changed, 435 insertions(+), 13 deletions(-) diff --git a/doc/src/sgml/ref/pg_dump.sgml b/doc/src/sgml/ref/pg_dump.sgml index 0bc7609bdf815..fd4ecf01a0a00 100644 --- a/doc/src/sgml/ref/pg_dump.sgml +++ b/doc/src/sgml/ref/pg_dump.sgml @@ -96,6 +96,18 @@ PostgreSQL documentation light of the limitations listed below. + + + Restoring a dump causes the destination to execute arbitrary code of the + source superusers' choice. Partial dumps and partial restores do not limit + that. If the source superusers are not trusted, the dumped SQL statements + must be inspected before restoring. Non-plain-text dumps can be inspected + by using pg_restore's + option. Note that the client running the dump and restore need not trust + the source or destination superusers. + + +
@@ -1252,6 +1264,29 @@ PostgreSQL documentation + + + + + Use the provided string as the psql + \restrict key in the dump output. This can only be + specified for plain-text dumps, i.e., when is + set to plain or the option + is omitted. If no restrict key is specified, + pg_dump will generate a random one as + needed. Keys may contain only alphanumeric characters. + + + This option is primarily intended for testing purposes and other + scenarios that require repeatable output (e.g., comparing dump files). + It is not recommended for general use, as a malicious server with + advance knowledge of the key may be able to inject arbitrary code that + will be executed on the machine that runs + psql with the dump output. + + + + diff --git a/doc/src/sgml/ref/pg_dumpall.sgml b/doc/src/sgml/ref/pg_dumpall.sgml index 364442f00f28e..9f639f61db021 100644 --- a/doc/src/sgml/ref/pg_dumpall.sgml +++ b/doc/src/sgml/ref/pg_dumpall.sgml @@ -66,6 +66,16 @@ PostgreSQL documentation linkend="libpq-pgpass"/> for more information. + + + Restoring a dump causes the destination to execute arbitrary code of the + source superusers' choice. Partial dumps and partial restores do not limit + that. If the source superusers are not trusted, the dumped SQL statements + must be inspected before restoring. Note that the client running the dump + and restore need not trust the source or destination superusers. + + + @@ -591,6 +601,26 @@ exclude database PATTERN + + + + + Use the provided string as the psql + \restrict key in the dump output. If no restrict + key is specified, pg_dumpall will generate a + random one as needed. Keys may contain only alphanumeric characters. + + + This option is primarily intended for testing purposes and other + scenarios that require repeatable output (e.g., comparing dump files). + It is not recommended for general use, as a malicious server with + advance knowledge of the key may be able to inject arbitrary code that + will be executed on the machine that runs + psql with the dump output. + + + + diff --git a/doc/src/sgml/ref/pg_restore.sgml b/doc/src/sgml/ref/pg_restore.sgml index 261ead1503955..a468a38361a13 100644 --- a/doc/src/sgml/ref/pg_restore.sgml +++ b/doc/src/sgml/ref/pg_restore.sgml @@ -68,6 +68,18 @@ PostgreSQL documentation pg_restore will not be able to load the data using COPY statements. + + + + Restoring a dump causes the destination to execute arbitrary code of the + source superusers' choice. Partial dumps and partial restores do not limit + that. If the source superusers are not trusted, the dumped SQL statements + must be inspected before restoring. Non-plain-text dumps can be inspected + by using pg_restore's + option. Note that the client running the dump and restore need not trust + the source or destination superusers. + + @@ -796,6 +808,28 @@ PostgreSQL documentation + + + + + Use the provided string as the psql + \restrict key in the dump output. This can only be + specified for SQL script output, i.e., when the + option is used. If no restrict key is specified, + pg_restore will generate a random one as + needed. Keys may contain only alphanumeric characters. + + + This option is primarily intended for testing purposes and other + scenarios that require repeatable output (e.g., comparing dump files). + It is not recommended for general use, as a malicious server with + advance knowledge of the key may be able to inject arbitrary code that + will be executed on the machine that runs + psql with the dump output. + + + + diff --git a/doc/src/sgml/ref/pgupgrade.sgml b/doc/src/sgml/ref/pgupgrade.sgml index 5ddf3a8ae9257..356baa912991d 100644 --- a/doc/src/sgml/ref/pgupgrade.sgml +++ b/doc/src/sgml/ref/pgupgrade.sgml @@ -70,6 +70,14 @@ PostgreSQL documentation pg_upgrade supports upgrades from 9.2.X and later to the current major release of PostgreSQL, including snapshot and beta releases. + + + + Upgrading a cluster causes the destination to execute arbitrary code of the + source superusers' choice. Ensure that the source superusers are trusted + before upgrading. + + diff --git a/doc/src/sgml/ref/psql-ref.sgml b/doc/src/sgml/ref/psql-ref.sgml index 4f7b11175c671..1a339600bc48f 100644 --- a/doc/src/sgml/ref/psql-ref.sgml +++ b/doc/src/sgml/ref/psql-ref.sgml @@ -3551,6 +3551,24 @@ SELECT $1 \parse stmt1 + + \restrict restrict_key + + + Enter "restricted" mode with the provided key. In this mode, the only + allowed meta-command is \unrestrict, to exit + restricted mode. The key may contain only alphanumeric characters. + + + This command is primarily intended for use in plain-text dumps + generated by pg_dump, + pg_dumpall, and + pg_restore, but it may be useful elsewhere. + + + + + \s [ filename ] @@ -3802,6 +3820,24 @@ SELECT 1 \bind \sendpipeline + + \unrestrict restrict_key + + + Exit "restricted" mode (i.e., where all other meta-commands are + blocked), provided the specified key matches the one given to + \restrict when restricted mode was entered. + + + This command is primarily intended for use in plain-text dumps + generated by pg_dump, + pg_dumpall, and + pg_restore, but it may be useful elsewhere. + + + + + \unset name diff --git a/src/bin/pg_combinebackup/t/002_compare_backups.pl b/src/bin/pg_combinebackup/t/002_compare_backups.pl index 2c7ca89b92f7f..9c7fe56cb7c72 100644 --- a/src/bin/pg_combinebackup/t/002_compare_backups.pl +++ b/src/bin/pg_combinebackup/t/002_compare_backups.pl @@ -174,6 +174,7 @@ $pitr1->command_ok( [ 'pg_dumpall', + '--restrict-key=test', '--no-sync', '--no-unlogged-table-data', '--file' => $dump1, @@ -183,6 +184,7 @@ $pitr2->command_ok( [ 'pg_dumpall', + '--restrict-key=test', '--no-sync', '--no-unlogged-table-data', '--file' => $dump2, diff --git a/src/bin/pg_dump/dumputils.c b/src/bin/pg_dump/dumputils.c index 4c02a9863c471..8945bdd42c5b4 100644 --- a/src/bin/pg_dump/dumputils.c +++ b/src/bin/pg_dump/dumputils.c @@ -21,6 +21,7 @@ #include "dumputils.h" #include "fe_utils/string_utils.h" +static const char restrict_chars[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; static bool parseAclItem(const char *item, const char *type, const char *name, const char *subname, int remoteVersion, @@ -957,3 +958,40 @@ create_or_open_dir(const char *dirname) pg_fatal("directory \"%s\" is not empty", dirname); } } + +/* + * Generates a valid restrict key (i.e., an alphanumeric string) for use with + * psql's \restrict and \unrestrict meta-commands. For safety, the value is + * chosen at random. + */ +char * +generate_restrict_key(void) +{ + uint8 buf[64]; + char *ret = palloc(sizeof(buf)); + + if (!pg_strong_random(buf, sizeof(buf))) + return NULL; + + for (int i = 0; i < sizeof(buf) - 1; i++) + { + uint8 idx = buf[i] % strlen(restrict_chars); + + ret[i] = restrict_chars[idx]; + } + ret[sizeof(buf) - 1] = '\0'; + + return ret; +} + +/* + * Checks that a given restrict key (intended for use with psql's \restrict and + * \unrestrict meta-commands) contains only alphanumeric characters. + */ +bool +valid_restrict_key(const char *restrict_key) +{ + return restrict_key != NULL && + restrict_key[0] != '\0' && + strspn(restrict_key, restrict_chars) == strlen(restrict_key); +} diff --git a/src/bin/pg_dump/dumputils.h b/src/bin/pg_dump/dumputils.h index 71dd3f12a8dd1..10f6e27c0a0fa 100644 --- a/src/bin/pg_dump/dumputils.h +++ b/src/bin/pg_dump/dumputils.h @@ -65,4 +65,7 @@ extern void makeAlterConfigCommand(PGconn *conn, const char *configitem, PQExpBuffer buf); extern void create_or_open_dir(const char *dirname); +extern char *generate_restrict_key(void); +extern bool valid_restrict_key(const char *restrict_key); + #endif /* DUMPUTILS_H */ diff --git a/src/bin/pg_dump/pg_backup.h b/src/bin/pg_dump/pg_backup.h index 4ebef1e864451..d9041dad72068 100644 --- a/src/bin/pg_dump/pg_backup.h +++ b/src/bin/pg_dump/pg_backup.h @@ -163,6 +163,8 @@ typedef struct _restoreOptions bool dumpSchema; bool dumpData; bool dumpStatistics; + + char *restrict_key; } RestoreOptions; typedef struct _dumpOptions @@ -213,6 +215,8 @@ typedef struct _dumpOptions bool dumpSchema; bool dumpData; bool dumpStatistics; + + char *restrict_key; } DumpOptions; /* diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c index 248e4151552e5..3c3acbaccdb51 100644 --- a/src/bin/pg_dump/pg_backup_archiver.c +++ b/src/bin/pg_dump/pg_backup_archiver.c @@ -197,6 +197,7 @@ dumpOptionsFromRestoreOptions(RestoreOptions *ropt) dopt->include_everything = ropt->include_everything; dopt->enable_row_security = ropt->enable_row_security; dopt->sequence_data = ropt->sequence_data; + dopt->restrict_key = ropt->restrict_key ? pg_strdup(ropt->restrict_key) : NULL; return dopt; } @@ -461,6 +462,17 @@ RestoreArchive(Archive *AHX) ahprintf(AH, "--\n-- PostgreSQL database dump\n--\n\n"); + /* + * If generating plain-text output, enter restricted mode to block any + * unexpected psql meta-commands. A malicious source might try to inject + * a variety of things via bogus responses to queries. While we cannot + * prevent such sources from affecting the destination at restore time, we + * can block psql meta-commands so that the client machine that runs psql + * with the dump output remains unaffected. + */ + if (ropt->restrict_key) + ahprintf(AH, "\\restrict %s\n\n", ropt->restrict_key); + if (AH->archiveRemoteVersion) ahprintf(AH, "-- Dumped from database version %s\n", AH->archiveRemoteVersion); @@ -801,6 +813,14 @@ RestoreArchive(Archive *AHX) ahprintf(AH, "--\n-- PostgreSQL database dump complete\n--\n\n"); + /* + * If generating plain-text output, exit restricted mode at the very end + * of the script. This is not pro forma; in particular, pg_dumpall + * requires this when transitioning from one database to another. + */ + if (ropt->restrict_key) + ahprintf(AH, "\\unrestrict %s\n\n", ropt->restrict_key); + /* * Clean up & we're done. */ @@ -3452,11 +3472,21 @@ _reconnectToDB(ArchiveHandle *AH, const char *dbname) else { PQExpBufferData connectbuf; + RestoreOptions *ropt = AH->public.ropt; + + /* + * We must temporarily exit restricted mode for \connect, etc. + * Anything added between this line and the following \restrict must + * be careful to avoid any possible meta-command injection vectors. + */ + ahprintf(AH, "\\unrestrict %s\n", ropt->restrict_key); initPQExpBuffer(&connectbuf); appendPsqlMetaConnect(&connectbuf, dbname); - ahprintf(AH, "%s\n", connectbuf.data); + ahprintf(AH, "%s", connectbuf.data); termPQExpBuffer(&connectbuf); + + ahprintf(AH, "\\restrict %s\n\n", ropt->restrict_key); } /* diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 66380a6d3012e..fc7a663916331 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -537,6 +537,7 @@ main(int argc, char **argv) {"filter", required_argument, NULL, 16}, {"exclude-extension", required_argument, NULL, 17}, {"sequence-data", no_argument, &dopt.sequence_data, 1}, + {"restrict-key", required_argument, NULL, 25}, {NULL, 0, NULL, 0} }; @@ -797,6 +798,10 @@ main(int argc, char **argv) with_statistics = true; break; + case 25: + dopt.restrict_key = pg_strdup(optarg); + break; + default: /* getopt_long already emitted a complaint */ pg_log_error_hint("Try \"%s --help\" for more information.", progname); @@ -889,8 +894,22 @@ main(int argc, char **argv) /* archiveFormat specific setup */ if (archiveFormat == archNull) + { plainText = 1; + /* + * If you don't provide a restrict key, one will be appointed for you. + */ + if (!dopt.restrict_key) + dopt.restrict_key = generate_restrict_key(); + if (!dopt.restrict_key) + pg_fatal("could not generate restrict key"); + if (!valid_restrict_key(dopt.restrict_key)) + pg_fatal("invalid restrict key"); + } + else if (dopt.restrict_key) + pg_fatal("option --restrict-key can only be used with --format=plain"); + /* * Custom and directory formats are compressed by default with gzip when * available, not the others. If gzip is not available, no compression is @@ -1229,6 +1248,7 @@ main(int argc, char **argv) ropt->enable_row_security = dopt.enable_row_security; ropt->sequence_data = dopt.sequence_data; ropt->binary_upgrade = dopt.binary_upgrade; + ropt->restrict_key = dopt.restrict_key ? pg_strdup(dopt.restrict_key) : NULL; ropt->compression_spec = compression_spec; @@ -1340,6 +1360,7 @@ help(const char *progname) printf(_(" --no-unlogged-table-data do not dump unlogged table data\n")); printf(_(" --on-conflict-do-nothing add ON CONFLICT DO NOTHING to INSERT commands\n")); printf(_(" --quote-all-identifiers quote all identifiers, even if not key words\n")); + printf(_(" --restrict-key=RESTRICT_KEY use provided string as psql \\restrict key\n")); printf(_(" --rows-per-insert=NROWS number of rows per INSERT; implies --inserts\n")); printf(_(" --section=SECTION dump named section (pre-data, data, or post-data)\n")); printf(_(" --sequence-data include sequence data in dump\n")); diff --git a/src/bin/pg_dump/pg_dumpall.c b/src/bin/pg_dump/pg_dumpall.c index 3e7f930c9402a..bb451c1bae144 100644 --- a/src/bin/pg_dump/pg_dumpall.c +++ b/src/bin/pg_dump/pg_dumpall.c @@ -122,6 +122,8 @@ static char *filename = NULL; static SimpleStringList database_exclude_patterns = {NULL, NULL}; static SimpleStringList database_exclude_names = {NULL, NULL}; +static char *restrict_key; + int main(int argc, char *argv[]) { @@ -184,6 +186,7 @@ main(int argc, char *argv[]) {"statistics-only", no_argument, &statistics_only, 1}, {"filter", required_argument, NULL, 8}, {"sequence-data", no_argument, &sequence_data, 1}, + {"restrict-key", required_argument, NULL, 9}, {NULL, 0, NULL, 0} }; @@ -371,6 +374,12 @@ main(int argc, char *argv[]) read_dumpall_filters(optarg, &database_exclude_patterns); break; + case 9: + restrict_key = pg_strdup(optarg); + appendPQExpBufferStr(pgdumpopts, " --restrict-key "); + appendShellString(pgdumpopts, optarg); + break; + default: /* getopt_long already emitted a complaint */ pg_log_error_hint("Try \"%s --help\" for more information.", progname); @@ -480,6 +489,16 @@ main(int argc, char *argv[]) if (sequence_data) appendPQExpBufferStr(pgdumpopts, " --sequence-data"); + /* + * If you don't provide a restrict key, one will be appointed for you. + */ + if (!restrict_key) + restrict_key = generate_restrict_key(); + if (!restrict_key) + pg_fatal("could not generate restrict key"); + if (!valid_restrict_key(restrict_key)) + pg_fatal("invalid restrict key"); + /* * If there was a database specified on the command line, use that, * otherwise try to connect to database "postgres", and failing that @@ -570,6 +589,16 @@ main(int argc, char *argv[]) if (verbose) dumpTimestamp("Started on"); + /* + * Enter restricted mode to block any unexpected psql meta-commands. A + * malicious source might try to inject a variety of things via bogus + * responses to queries. While we cannot prevent such sources from + * affecting the destination at restore time, we can block psql + * meta-commands so that the client machine that runs psql with the dump + * output remains unaffected. + */ + fprintf(OPF, "\\restrict %s\n\n", restrict_key); + /* * We used to emit \connect postgres here, but that served no purpose * other than to break things for installations without a postgres @@ -630,6 +659,12 @@ main(int argc, char *argv[]) dumpTablespaces(conn); } + /* + * Exit restricted mode just before dumping the databases. pg_dump will + * handle entering restricted mode again as appropriate. + */ + fprintf(OPF, "\\unrestrict %s\n\n", restrict_key); + if (!globals_only && !roles_only && !tablespaces_only) dumpDatabases(conn); @@ -702,6 +737,7 @@ help(void) printf(_(" --no-unlogged-table-data do not dump unlogged table data\n")); printf(_(" --on-conflict-do-nothing add ON CONFLICT DO NOTHING to INSERT commands\n")); printf(_(" --quote-all-identifiers quote all identifiers, even if not key words\n")); + printf(_(" --restrict-key=RESTRICT_KEY use provided string as psql \\restrict key\n")); printf(_(" --rows-per-insert=NROWS number of rows per INSERT; implies --inserts\n")); printf(_(" --sequence-data include sequence data in dump\n")); printf(_(" --statistics dump the statistics\n")); diff --git a/src/bin/pg_dump/pg_restore.c b/src/bin/pg_dump/pg_restore.c index 6c129278bc52b..c9776306c5c42 100644 --- a/src/bin/pg_dump/pg_restore.c +++ b/src/bin/pg_dump/pg_restore.c @@ -45,6 +45,7 @@ #include #endif +#include "dumputils.h" #include "fe_utils/option_utils.h" #include "filter.h" #include "getopt_long.h" @@ -140,6 +141,7 @@ main(int argc, char **argv) {"statistics", no_argument, &with_statistics, 1}, {"statistics-only", no_argument, &statistics_only, 1}, {"filter", required_argument, NULL, 4}, + {"restrict-key", required_argument, NULL, 6}, {NULL, 0, NULL, 0} }; @@ -315,6 +317,10 @@ main(int argc, char **argv) opts->exit_on_error = true; break; + case 6: + opts->restrict_key = pg_strdup(optarg); + break; + default: /* getopt_long already emitted a complaint */ pg_log_error_hint("Try \"%s --help\" for more information.", progname); @@ -350,8 +356,24 @@ main(int argc, char **argv) pg_log_error_hint("Try \"%s --help\" for more information.", progname); exit_nicely(1); } + + if (opts->restrict_key) + pg_fatal("options -d/--dbname and --restrict-key cannot be used together"); + opts->useDB = 1; } + else + { + /* + * If you don't provide a restrict key, one will be appointed for you. + */ + if (!opts->restrict_key) + opts->restrict_key = generate_restrict_key(); + if (!opts->restrict_key) + pg_fatal("could not generate restrict key"); + if (!valid_restrict_key(opts->restrict_key)) + pg_fatal("invalid restrict key"); + } /* reject conflicting "-only" options */ if (data_only && schema_only) @@ -546,6 +568,7 @@ usage(const char *progname) printf(_(" --no-subscriptions do not restore subscriptions\n")); printf(_(" --no-table-access-method do not restore table access methods\n")); printf(_(" --no-tablespaces do not restore tablespace assignments\n")); + printf(_(" --restrict-key=RESTRICT_KEY use provided string as psql \\restrict key\n")); printf(_(" --section=SECTION restore named section (pre-data, data, or post-data)\n")); printf(_(" --statistics restore the statistics\n")); printf(_(" --statistics-only restore only the statistics, not schema or data\n")); diff --git a/src/bin/pg_dump/t/002_pg_dump.pl b/src/bin/pg_dump/t/002_pg_dump.pl index e3257231a9efc..e7a2d64f74130 100644 --- a/src/bin/pg_dump/t/002_pg_dump.pl +++ b/src/bin/pg_dump/t/002_pg_dump.pl @@ -881,6 +881,16 @@ # This is where the actual tests are defined. my %tests = ( + 'restrict' => { + all_runs => 1, + regexp => qr/^\\restrict [a-zA-Z0-9]+$/m, + }, + + 'unrestrict' => { + all_runs => 1, + regexp => qr/^\\unrestrict [a-zA-Z0-9]+$/m, + }, + 'ALTER DEFAULT PRIVILEGES FOR ROLE regress_dump_test_role GRANT' => { create_order => 14, create_sql => 'ALTER DEFAULT PRIVILEGES @@ -4239,7 +4249,6 @@ }, 'ALTER TABLE measurement PRIMARY KEY' => { - all_runs => 1, catch_all => 'CREATE ... commands', create_order => 93, create_sql => @@ -4291,7 +4300,6 @@ }, 'ALTER INDEX ... ATTACH PARTITION (primary key)' => { - all_runs => 1, catch_all => 'CREATE ... commands', regexp => qr/^ \QALTER INDEX dump_test.measurement_pkey ATTACH PARTITION dump_test_second_schema.measurement_y2006m2_pkey\E @@ -5424,9 +5432,10 @@ # Check for proper test definitions # - # There should be a "like" list, even if it is empty. (This - # makes the test more self-documenting.) - if (!defined($tests{$test}->{like})) + # Either "all_runs" should be set or there should be a "like" list, + # even if it is empty. (This makes the test more self-documenting.) + if (!defined($tests{$test}->{all_runs}) + && !defined($tests{$test}->{like})) { die "missing \"like\" in test \"$test\""; } @@ -5462,9 +5471,10 @@ next; } - # Run the test listed as a like, unless it is specifically noted - # as an unlike (generally due to an explicit exclusion or similar). - if ($tests{$test}->{like}->{$test_key} + # Run the test if all_runs is set or if listed as a like, unless it is + # specifically noted as an unlike (generally due to an explicit + # exclusion or similar). + if (($tests{$test}->{like}->{$test_key} || $tests{$test}->{all_runs}) && !defined($tests{$test}->{unlike}->{$test_key})) { if (!ok($output_file =~ $tests{$test}->{regexp}, diff --git a/src/bin/pg_upgrade/t/002_pg_upgrade.pl b/src/bin/pg_upgrade/t/002_pg_upgrade.pl index 0b15e38297e2e..4b5e895809b40 100644 --- a/src/bin/pg_upgrade/t/002_pg_upgrade.pl +++ b/src/bin/pg_upgrade/t/002_pg_upgrade.pl @@ -86,6 +86,7 @@ sub get_dump_for_comparison $node->run_log( [ 'pg_dump', '--no-sync', + '--restrict-key=test', '-d' => $node->connstr($db), '-f' => $dumpfile ]); @@ -427,6 +428,7 @@ sub get_dump_for_comparison # that we need to use pg_dumpall from the new node here. my @dump_command = ( 'pg_dumpall', '--no-sync', + '--restrict-key=test', '--dbname' => $oldnode->connstr('postgres'), '--file' => $dump1_file); # --extra-float-digits is needed when upgrading from a version older than 11. @@ -624,6 +626,7 @@ sub get_dump_for_comparison # Second dump from the upgraded instance. @dump_command = ( 'pg_dumpall', '--no-sync', + '--restrict-key=test', '--dbname' => $newnode->connstr('postgres'), '--file' => $dump2_file); # --extra-float-digits is needed when upgrading from a version older than 11. diff --git a/src/bin/psql/command.c b/src/bin/psql/command.c index 0e00d73487c33..cc602087db246 100644 --- a/src/bin/psql/command.c +++ b/src/bin/psql/command.c @@ -130,6 +130,8 @@ static backslashResult exec_command_pset(PsqlScanState scan_state, bool active_b static backslashResult exec_command_quit(PsqlScanState scan_state, bool active_branch); static backslashResult exec_command_reset(PsqlScanState scan_state, bool active_branch, PQExpBuffer query_buf); +static backslashResult exec_command_restrict(PsqlScanState scan_state, bool active_branch, + const char *cmd); static backslashResult exec_command_s(PsqlScanState scan_state, bool active_branch); static backslashResult exec_command_sendpipeline(PsqlScanState scan_state, bool active_branch); static backslashResult exec_command_set(PsqlScanState scan_state, bool active_branch); @@ -142,6 +144,8 @@ static backslashResult exec_command_syncpipeline(PsqlScanState scan_state, bool static backslashResult exec_command_t(PsqlScanState scan_state, bool active_branch); static backslashResult exec_command_T(PsqlScanState scan_state, bool active_branch); static backslashResult exec_command_timing(PsqlScanState scan_state, bool active_branch); +static backslashResult exec_command_unrestrict(PsqlScanState scan_state, bool active_branch, + const char *cmd); static backslashResult exec_command_unset(PsqlScanState scan_state, bool active_branch, const char *cmd); static backslashResult exec_command_write(PsqlScanState scan_state, bool active_branch, @@ -192,6 +196,8 @@ static char *pset_value_string(const char *param, printQueryOpt *popt); static void checkWin32Codepage(void); #endif +static bool restricted; +static char *restrict_key; /*---------- @@ -237,8 +243,19 @@ HandleSlashCmds(PsqlScanState scan_state, /* Parse off the command name */ cmd = psql_scan_slash_command(scan_state); - /* And try to execute it */ - status = exec_command(cmd, scan_state, cstack, query_buf, previous_buf); + /* + * And try to execute it. + * + * If we are in "restricted" mode, the only allowable backslash command is + * \unrestrict (to exit restricted mode). + */ + if (restricted && strcmp(cmd, "unrestrict") != 0) + { + pg_log_error("backslash commands are restricted; only \\unrestrict is allowed"); + status = PSQL_CMD_ERROR; + } + else + status = exec_command(cmd, scan_state, cstack, query_buf, previous_buf); if (status == PSQL_CMD_UNKNOWN) { @@ -416,6 +433,8 @@ exec_command(const char *cmd, status = exec_command_quit(scan_state, active_branch); else if (strcmp(cmd, "r") == 0 || strcmp(cmd, "reset") == 0) status = exec_command_reset(scan_state, active_branch, query_buf); + else if (strcmp(cmd, "restrict") == 0) + status = exec_command_restrict(scan_state, active_branch, cmd); else if (strcmp(cmd, "s") == 0) status = exec_command_s(scan_state, active_branch); else if (strcmp(cmd, "sendpipeline") == 0) @@ -438,6 +457,8 @@ exec_command(const char *cmd, status = exec_command_T(scan_state, active_branch); else if (strcmp(cmd, "timing") == 0) status = exec_command_timing(scan_state, active_branch); + else if (strcmp(cmd, "unrestrict") == 0) + status = exec_command_unrestrict(scan_state, active_branch, cmd); else if (strcmp(cmd, "unset") == 0) status = exec_command_unset(scan_state, active_branch, cmd); else if (strcmp(cmd, "w") == 0 || strcmp(cmd, "write") == 0) @@ -2754,6 +2775,35 @@ exec_command_reset(PsqlScanState scan_state, bool active_branch, return PSQL_CMD_SKIP_LINE; } +/* + * \restrict -- enter "restricted mode" with the provided key + */ +static backslashResult +exec_command_restrict(PsqlScanState scan_state, bool active_branch, + const char *cmd) +{ + if (active_branch) + { + char *opt; + + Assert(!restricted); + + opt = psql_scan_slash_option(scan_state, OT_NORMAL, NULL, true); + if (opt == NULL || opt[0] == '\0') + { + pg_log_error("\\%s: missing required argument", cmd); + return PSQL_CMD_ERROR; + } + + restrict_key = pstrdup(opt); + restricted = true; + } + else + ignore_slash_options(scan_state); + + return PSQL_CMD_SKIP_LINE; +} + /* * \s -- save history in a file or show it on the screen */ @@ -3135,6 +3185,46 @@ exec_command_timing(PsqlScanState scan_state, bool active_branch) return success ? PSQL_CMD_SKIP_LINE : PSQL_CMD_ERROR; } +/* + * \unrestrict -- exit "restricted mode" if provided key matches + */ +static backslashResult +exec_command_unrestrict(PsqlScanState scan_state, bool active_branch, + const char *cmd) +{ + if (active_branch) + { + char *opt; + + opt = psql_scan_slash_option(scan_state, OT_NORMAL, NULL, true); + if (opt == NULL || opt[0] == '\0') + { + pg_log_error("\\%s: missing required argument", cmd); + return PSQL_CMD_ERROR; + } + + if (!restricted) + { + pg_log_error("\\%s: not currently in restricted mode", cmd); + return PSQL_CMD_ERROR; + } + else if (strcmp(opt, restrict_key) == 0) + { + pfree(restrict_key); + restricted = false; + } + else + { + pg_log_error("\\%s: wrong key", cmd); + return PSQL_CMD_ERROR; + } + } + else + ignore_slash_options(scan_state); + + return PSQL_CMD_SKIP_LINE; +} + /* * \unset -- unset variable */ diff --git a/src/bin/psql/help.c b/src/bin/psql/help.c index 8c62729a0d124..ed00c36695e85 100644 --- a/src/bin/psql/help.c +++ b/src/bin/psql/help.c @@ -171,6 +171,10 @@ slashUsage(unsigned short int pager) HELP0(" \\gset [PREFIX] execute query and store result in psql variables\n"); HELP0(" \\gx [(OPTIONS)] [FILE] as \\g, but forces expanded output mode\n"); HELP0(" \\q quit psql\n"); + HELP0(" \\restrict RESTRICT_KEY\n" + " enter restricted mode with provided key\n"); + HELP0(" \\unrestrict RESTRICT_KEY\n" + " exit restricted mode if key matches\n"); HELP0(" \\watch [[i=]SEC] [c=N] [m=MIN]\n" " execute query every SEC seconds, up to N times,\n" " stop if less than MIN rows are returned\n"); diff --git a/src/bin/psql/t/001_basic.pl b/src/bin/psql/t/001_basic.pl index f42c3961e09f7..cf07a9dbd5ed6 100644 --- a/src/bin/psql/t/001_basic.pl +++ b/src/bin/psql/t/001_basic.pl @@ -530,4 +530,11 @@ sub psql_fails_like qr/COPY in a pipeline is not supported, aborting connection/, '\copy to in pipeline: fails'); +psql_fails_like( + $node, + qq{\\restrict test +\\! should_fail}, + qr/backslash commands are restricted; only \\unrestrict is allowed/, + 'meta-command in restrict mode fails'); + done_testing(); diff --git a/src/bin/psql/tab-complete.in.c b/src/bin/psql/tab-complete.in.c index 1f2ca946fc500..8b10f2313f39d 100644 --- a/src/bin/psql/tab-complete.in.c +++ b/src/bin/psql/tab-complete.in.c @@ -1918,11 +1918,11 @@ psql_completion(const char *text, int start, int end) "\\out", "\\parse", "\\password", "\\print", "\\prompt", "\\pset", "\\qecho", "\\quit", - "\\reset", + "\\reset", "\\restrict", "\\s", "\\sendpipeline", "\\set", "\\setenv", "\\sf", "\\startpipeline", "\\sv", "\\syncpipeline", "\\t", "\\T", "\\timing", - "\\unset", + "\\unrestrict", "\\unset", "\\x", "\\warn", "\\watch", "\\write", "\\z", diff --git a/src/test/recovery/t/027_stream_regress.pl b/src/test/recovery/t/027_stream_regress.pl index 5d2c06ba06e73..12d8852fb4b3d 100644 --- a/src/test/recovery/t/027_stream_regress.pl +++ b/src/test/recovery/t/027_stream_regress.pl @@ -117,6 +117,7 @@ 'pg_dumpall', '--file' => $outputdir . '/primary.dump', '--no-sync', '--no-statistics', + '--restrict-key=test', '--port' => $node_primary->port, '--no-unlogged-table-data', # if unlogged, standby has schema only ], @@ -126,6 +127,7 @@ 'pg_dumpall', '--file' => $outputdir . '/standby.dump', '--no-sync', '--no-statistics', + '--restrict-key=test', '--port' => $node_standby_1->port, ], 'dump standby server'); @@ -145,6 +147,7 @@ '--schema' => 'pg_catalog', '--file' => $outputdir . '/catalogs_primary.dump', '--no-sync', + '--restrict-key=test', '--port', $node_primary->port, '--no-unlogged-table-data', 'regression', @@ -156,6 +159,7 @@ '--schema' => 'pg_catalog', '--file' => $outputdir . '/catalogs_standby.dump', '--no-sync', + '--restrict-key=test', '--port' => $node_standby_1->port, 'regression', ], diff --git a/src/test/regress/expected/psql.out b/src/test/regress/expected/psql.out index 236eba2540e9d..a79325e8a2f79 100644 --- a/src/test/regress/expected/psql.out +++ b/src/test/regress/expected/psql.out @@ -4705,6 +4705,7 @@ invalid command \lo \pset arg1 arg2 \q \reset + \restrict test \s arg1 \sendpipeline \set arg1 arg2 arg3 arg4 arg5 arg6 arg7 @@ -4716,6 +4717,7 @@ invalid command \lo \t arg1 \T arg1 \timing arg1 + \unrestrict not_valid \unset arg1 \w arg1 \watch arg1 arg2 diff --git a/src/test/regress/sql/psql.sql b/src/test/regress/sql/psql.sql index e2e3124543978..f064e4f545607 100644 --- a/src/test/regress/sql/psql.sql +++ b/src/test/regress/sql/psql.sql @@ -1073,6 +1073,7 @@ select \if false \\ (bogus \else \\ 42 \endif \\ forty_two; \pset arg1 arg2 \q \reset + \restrict test \s arg1 \sendpipeline \set arg1 arg2 arg3 arg4 arg5 arg6 arg7 @@ -1084,6 +1085,7 @@ select \if false \\ (bogus \else \\ 42 \endif \\ forty_two; \t arg1 \T arg1 \timing arg1 + \unrestrict not_valid \unset arg1 \w arg1 \watch arg1 arg2 From 01d6832c109bcc37acb30e934b7c472334b7c291 Mon Sep 17 00:00:00 2001 From: Andres Freund Date: Mon, 11 Aug 2025 15:14:39 -0400 Subject: [PATCH 487/602] meson: add and use stamp files for generated headers Without using stamp files, meson lists the generated headers as the dependency for every .c file, bloating build.ninja by more than 2x. Processing all the dependencies also increases the time to generate build.ninja. The immediate benefit is that this makes re-configuring and clean builds a bit faster. The main motivation however is that I have other patches that introduce additional build targets that further would increase the size of build.ninja, making re-configuring more noticeably slower. Reviewed-by: Nazir Bilal Yavuz Discussion: https://postgr.es/m/cgkdgvzdpinkacf4v33mky7tbmk467oda5dd4dlmucjjockxzi@xkqfvjoq4uiy --- meson.build | 16 +++++++++------- src/backend/meson.build | 2 +- src/fe_utils/meson.build | 2 +- src/include/meson.build | 21 +++++++++++++++++++++ 4 files changed, 32 insertions(+), 9 deletions(-) diff --git a/meson.build b/meson.build index 431fe9ccc9baf..30e0edda3e793 100644 --- a/meson.build +++ b/meson.build @@ -3133,6 +3133,8 @@ gen_export_kwargs = { 'install': false, } +# command to create stamp files on all OSs +stamp_cmd = [python, '-c', 'import sys; open(sys.argv[1], "w")', '@OUTPUT0@'] ### @@ -3250,14 +3252,14 @@ subdir('src/port') frontend_common_code = declare_dependency( compile_args: ['-DFRONTEND'], include_directories: [postgres_inc], - sources: generated_headers, + sources: generated_headers_stamp, dependencies: [os_deps, zlib, zstd, lz4], ) backend_common_code = declare_dependency( compile_args: ['-DBUILDING_DLL'], include_directories: [postgres_inc], - sources: generated_headers, + sources: generated_headers_stamp, dependencies: [os_deps, zlib, zstd], ) @@ -3272,7 +3274,7 @@ shlib_code = declare_dependency( frontend_stlib_code = declare_dependency( include_directories: [postgres_inc], link_with: [common_static, pgport_static], - sources: generated_headers, + sources: generated_headers_stamp, dependencies: [os_deps, libintl], ) @@ -3280,7 +3282,7 @@ frontend_stlib_code = declare_dependency( frontend_shlib_code = declare_dependency( include_directories: [postgres_inc], link_with: [common_shlib, pgport_shlib], - sources: generated_headers, + sources: generated_headers_stamp, dependencies: [shlib_code, os_deps, libintl], ) @@ -3290,7 +3292,7 @@ frontend_shlib_code = declare_dependency( frontend_no_fe_utils_code = declare_dependency( include_directories: [postgres_inc], link_with: [common_static, pgport_static], - sources: generated_headers, + sources: generated_headers_stamp, dependencies: [os_deps, libintl], ) @@ -3317,7 +3319,7 @@ subdir('src/interfaces/libpq-oauth') frontend_code = declare_dependency( include_directories: [postgres_inc], link_with: [fe_utils, common_static, pgport_static], - sources: generated_headers, + sources: generated_headers_stamp, dependencies: [os_deps, libintl], ) @@ -3347,7 +3349,7 @@ backend_code = declare_dependency( include_directories: [postgres_inc], link_args: ldflags_be, link_with: [], - sources: generated_headers + generated_backend_headers, + sources: generated_backend_headers_stamp, dependencies: os_deps + backend_both_deps + backend_deps, ) diff --git a/src/backend/meson.build b/src/backend/meson.build index 2b0db21480470..b831a541652bc 100644 --- a/src/backend/meson.build +++ b/src/backend/meson.build @@ -169,7 +169,7 @@ backend_mod_code = declare_dependency( compile_args: pg_mod_c_args, include_directories: postgres_inc, link_args: pg_mod_link_args, - sources: generated_headers + generated_backend_headers, + sources: generated_backend_headers_stamp, dependencies: backend_mod_deps, ) diff --git a/src/fe_utils/meson.build b/src/fe_utils/meson.build index a18cbc939e412..5a9ddb73463e1 100644 --- a/src/fe_utils/meson.build +++ b/src/fe_utils/meson.build @@ -29,7 +29,7 @@ generated_sources += psqlscan fe_utils_sources += psqlscan fe_utils = static_library('libpgfeutils', - fe_utils_sources + generated_headers, + fe_utils_sources, c_pch: pch_postgres_fe_h, include_directories: [postgres_inc, libpq_inc], c_args: host_system == 'windows' ? ['-DFD_SETSIZE=1024'] : [], diff --git a/src/include/meson.build b/src/include/meson.build index 2e4b7aa529e26..7cb3075da2a93 100644 --- a/src/include/meson.build +++ b/src/include/meson.build @@ -177,3 +177,24 @@ install_subdir('catalog', # autoconf generates the file there, ensure we get a conflict generated_sources_ac += {'src/include': ['stamp-h']} + + +# Instead of having targets depending directly on a list of all generated +# headers, have them depend on a stamp files for all of them. Dependencies on +# headers are implemented as order-only dependencies in meson (and then using +# compiler generated dependencies during incremental rebuilds ). The benefit +# of using a stamp file is that it makes ninja.build considerably smaller and +# meson setup faster, as otherwise the list of headers is repeated for every C +# file, bloating build.ninja by ~2x. +generated_headers_stamp = custom_target('generated-headers-stamp.h', + output: 'generated-headers-stamp.h', + input: generated_headers, + command: stamp_cmd, +) + +generated_backend_headers_stamp = custom_target('generated-backend-headers-stamp.h', + output: 'generated-backend-headers-stamp.h', + input: generated_backend_headers, + depends: generated_headers_stamp, + command: stamp_cmd, +) From b227b0bb4e032e19b3679bedac820eba3ac0d1cf Mon Sep 17 00:00:00 2001 From: Andres Freund Date: Mon, 11 Aug 2025 15:41:34 -0400 Subject: [PATCH 488/602] Reduce ExecSeqScan* code size using pg_assume() fb9f955025f optimized code generation by using specialized variants of ExecSeqScan* for [not] having a qual, projection etc. This allowed the compiler to optimize the code out the code for qual / projection. However, as observed by David Rowley at the time, the compiler couldn't prove the opposite, i.e. that the qual etc *are* present. By using pg_assume(), introduced in d65eb5b1b84, we can tell the compiler that the relevant variables are non-null. This reduces the code size to a surprising degree and seems to lead to a small but reproducible performance gain. Reviewed-by: Amit Langote Discussion: https://postgr.es/m/CA+HiwqFk-MbwhfX_kucxzL8zLmjEt9MMcHi2YF=DyhPrSjsBEA@mail.gmail.com --- src/backend/executor/nodeSeqscan.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/backend/executor/nodeSeqscan.c b/src/backend/executor/nodeSeqscan.c index ed35c58c2c346..94047d29430d6 100644 --- a/src/backend/executor/nodeSeqscan.c +++ b/src/backend/executor/nodeSeqscan.c @@ -131,8 +131,12 @@ ExecSeqScanWithQual(PlanState *pstate) { SeqScanState *node = castNode(SeqScanState, pstate); + /* + * Use pg_assume() for != NULL tests to make the compiler realize no + * runtime check for the field is needed in ExecScanExtended(). + */ Assert(pstate->state->es_epq_active == NULL); - Assert(pstate->qual != NULL); + pg_assume(pstate->qual != NULL); Assert(pstate->ps_ProjInfo == NULL); return ExecScanExtended(&node->ss, @@ -153,7 +157,7 @@ ExecSeqScanWithProject(PlanState *pstate) Assert(pstate->state->es_epq_active == NULL); Assert(pstate->qual == NULL); - Assert(pstate->ps_ProjInfo != NULL); + pg_assume(pstate->ps_ProjInfo != NULL); return ExecScanExtended(&node->ss, (ExecScanAccessMtd) SeqNext, @@ -173,8 +177,8 @@ ExecSeqScanWithQualProject(PlanState *pstate) SeqScanState *node = castNode(SeqScanState, pstate); Assert(pstate->state->es_epq_active == NULL); - Assert(pstate->qual != NULL); - Assert(pstate->ps_ProjInfo != NULL); + pg_assume(pstate->qual != NULL); + pg_assume(pstate->ps_ProjInfo != NULL); return ExecScanExtended(&node->ss, (ExecScanAccessMtd) SeqNext, From 5f19d13dfed35d8d4ed80d555f2d32b106771b61 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Tue, 12 Aug 2025 20:52:32 +0200 Subject: [PATCH 489/602] libpq: Set LDAP protocol version 3 Some LDAP servers reject the default version 2 protocol. So set version 3 before starting the connection. This matches how the backend LDAP code has worked all along. Co-authored-by: Andrew Jackson Reviewed-by: Pavel Seleznev Discussion: https://www.postgresql.org/message-id/flat/CAKK5BkHixcivSCA9pfd_eUp7wkLRhvQ6OtGLAYrWC%3Dk7E76LDQ%40mail.gmail.com --- src/interfaces/libpq/fe-connect.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/interfaces/libpq/fe-connect.c b/src/interfaces/libpq/fe-connect.c index afa85d9fca961..a3d12931fff30 100644 --- a/src/interfaces/libpq/fe-connect.c +++ b/src/interfaces/libpq/fe-connect.c @@ -5494,6 +5494,7 @@ ldapServiceLookup(const char *purl, PQconninfoOption *options, *entry; struct berval **values; LDAP_TIMEVAL time = {PGLDAP_TIMEOUT, 0}; + int ldapversion = LDAP_VERSION3; if ((url = strdup(purl)) == NULL) { @@ -5625,6 +5626,15 @@ ldapServiceLookup(const char *purl, PQconninfoOption *options, return 3; } + if ((rc = ldap_set_option(ld, LDAP_OPT_PROTOCOL_VERSION, &ldapversion)) != LDAP_SUCCESS) + { + libpq_append_error(errorMessage, "could not set LDAP protocol version: %s", + ldap_err2string(rc)); + free(url); + ldap_unbind(ld); + return 3; + } + /* * Perform an explicit anonymous bind. * From 783cbb6d5e8bdf87d321286f210983c177ead967 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Wed, 13 Aug 2025 13:11:19 +0900 Subject: [PATCH 490/602] postgres_fdw: Fix tests with ANALYZE and remote sampling The tests fixed in this commit were changing the sampling setting of a foreign server, but then were analyzing a local table instead of a foreign table, meaning that the test was not running for its original purpose. This commit changes the ANALYZE commands to analyze the foreign table, and changes the foreign table definition to point to a valid remote table. Attempting to analyze the foreign table "analyze_ftable" would have failed before this commit, because "analyze_rtable1" is not defined on the remote side. Issue introduced by 8ad51b5f446b. Author: Corey Huinker Discussion: https://postgr.es/m/CADkLM=cpUiJ3QF7aUthTvaVMmgQcm7QqZBRMDLhBRTR+gJX-Og@mail.gmail.com Backpatch-through: 16 --- contrib/postgres_fdw/expected/postgres_fdw.out | 12 ++++++------ contrib/postgres_fdw/sql/postgres_fdw.sql | 12 ++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out index a434eb1395e03..d3323b04676ca 100644 --- a/contrib/postgres_fdw/expected/postgres_fdw.out +++ b/contrib/postgres_fdw/expected/postgres_fdw.out @@ -12649,7 +12649,7 @@ ALTER SERVER loopback2 OPTIONS (DROP parallel_abort); -- =================================================================== CREATE TABLE analyze_table (id int, a text, b bigint); CREATE FOREIGN TABLE analyze_ftable (id int, a text, b bigint) - SERVER loopback OPTIONS (table_name 'analyze_rtable1'); + SERVER loopback OPTIONS (table_name 'analyze_table'); INSERT INTO analyze_table (SELECT x FROM generate_series(1,1000) x); ANALYZE analyze_table; SET default_statistics_target = 10; @@ -12657,15 +12657,15 @@ ANALYZE analyze_table; ALTER SERVER loopback OPTIONS (analyze_sampling 'invalid'); ERROR: invalid value for string option "analyze_sampling": invalid ALTER SERVER loopback OPTIONS (analyze_sampling 'auto'); -ANALYZE analyze_table; +ANALYZE analyze_ftable; ALTER SERVER loopback OPTIONS (SET analyze_sampling 'system'); -ANALYZE analyze_table; +ANALYZE analyze_ftable; ALTER SERVER loopback OPTIONS (SET analyze_sampling 'bernoulli'); -ANALYZE analyze_table; +ANALYZE analyze_ftable; ALTER SERVER loopback OPTIONS (SET analyze_sampling 'random'); -ANALYZE analyze_table; +ANALYZE analyze_ftable; ALTER SERVER loopback OPTIONS (SET analyze_sampling 'off'); -ANALYZE analyze_table; +ANALYZE analyze_ftable; -- cleanup DROP FOREIGN TABLE analyze_ftable; DROP TABLE analyze_table; diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql index d9bed565c81fd..2c609e060b7c8 100644 --- a/contrib/postgres_fdw/sql/postgres_fdw.sql +++ b/contrib/postgres_fdw/sql/postgres_fdw.sql @@ -4365,7 +4365,7 @@ ALTER SERVER loopback2 OPTIONS (DROP parallel_abort); CREATE TABLE analyze_table (id int, a text, b bigint); CREATE FOREIGN TABLE analyze_ftable (id int, a text, b bigint) - SERVER loopback OPTIONS (table_name 'analyze_rtable1'); + SERVER loopback OPTIONS (table_name 'analyze_table'); INSERT INTO analyze_table (SELECT x FROM generate_series(1,1000) x); ANALYZE analyze_table; @@ -4376,19 +4376,19 @@ ANALYZE analyze_table; ALTER SERVER loopback OPTIONS (analyze_sampling 'invalid'); ALTER SERVER loopback OPTIONS (analyze_sampling 'auto'); -ANALYZE analyze_table; +ANALYZE analyze_ftable; ALTER SERVER loopback OPTIONS (SET analyze_sampling 'system'); -ANALYZE analyze_table; +ANALYZE analyze_ftable; ALTER SERVER loopback OPTIONS (SET analyze_sampling 'bernoulli'); -ANALYZE analyze_table; +ANALYZE analyze_ftable; ALTER SERVER loopback OPTIONS (SET analyze_sampling 'random'); -ANALYZE analyze_table; +ANALYZE analyze_ftable; ALTER SERVER loopback OPTIONS (SET analyze_sampling 'off'); -ANALYZE analyze_table; +ANALYZE analyze_ftable; -- cleanup DROP FOREIGN TABLE analyze_ftable; From 8081e54bc52a0ea77cbe38a2a439598f361a5089 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Wed, 13 Aug 2025 15:07:51 +0200 Subject: [PATCH 491/602] Clean up order in stylesheete-fo.xsl Make a separate section for release notes customization. Commits f986882ffd6 and 8a6e85b46e0 put those into the middle of unrelated things. --- doc/src/sgml/stylesheet-fo.xsl | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/doc/src/sgml/stylesheet-fo.xsl b/doc/src/sgml/stylesheet-fo.xsl index e7916a6a88347..69ef085ff899b 100644 --- a/doc/src/sgml/stylesheet-fo.xsl +++ b/doc/src/sgml/stylesheet-fo.xsl @@ -14,21 +14,6 @@ 3 - - - - - - - - - - - 1.5em @@ -415,5 +400,21 @@ an "Unresolved ID reference found" warning during PDF builds. + + + + + + + + + + + + From 37e06ba6e82b44fc6ebfc9e52d71f0c21d0044c4 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Wed, 13 Aug 2025 15:47:46 +0200 Subject: [PATCH 492/602] Improve PDF documentation margins Set body indent to 0 to make use of the horizontal space better (and some reviewers thought it was also more readable). Add some left and right margin to the warning boxes, otherwise they drift too far off the page in combination with the above change. Author: Noboru Saito Reviewed-by: Hayato Kuroda (Fujitsu) Reviewed-by: Florents Tselai Reviewed-by: Tatsuo Ishii Discussion: https://www.postgresql.org/message-id/flat/CAAM3qnLyMUD79XF+SqAVwWCwURCF3hyuFY9Ki9Csbqs-zMwwnw@mail.gmail.com --- doc/src/sgml/stylesheet-fo.xsl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/doc/src/sgml/stylesheet-fo.xsl b/doc/src/sgml/stylesheet-fo.xsl index 69ef085ff899b..aec6de7064a7b 100644 --- a/doc/src/sgml/stylesheet-fo.xsl +++ b/doc/src/sgml/stylesheet-fo.xsl @@ -17,6 +17,8 @@ 1.5em +0 +0 @@ -27,6 +29,8 @@ solid 1pt black + 0.25in + 0.25in 12pt 12pt 6pt From 05f506a5158e7779484fc4dec731942eb809e609 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Wed, 13 Aug 2025 17:40:13 +0200 Subject: [PATCH 493/602] Adjust some table column widths in PDF Make some column widths more pleasing. Note: Some of this relies on the reduced body indents introduced by commit 37e06ba6e82. Author: Noboru Saito Discussion: https://www.postgresql.org/message-id/flat/CAAM3qnLyMUD79XF+SqAVwWCwURCF3hyuFY9Ki9Csbqs-zMwwnw@mail.gmail.com --- doc/src/sgml/datatype.sgml | 6 ++---- doc/src/sgml/func/func-formatting.sgml | 4 ++++ doc/src/sgml/func/func-info.sgml | 10 ++++++++++ doc/src/sgml/func/func-matching.sgml | 18 +++++++++++++++++- doc/src/sgml/storage.sgml | 4 ++++ 5 files changed, 37 insertions(+), 5 deletions(-) diff --git a/doc/src/sgml/datatype.sgml b/doc/src/sgml/datatype.sgml index 7458f216e5094..b81d89e260809 100644 --- a/doc/src/sgml/datatype.sgml +++ b/doc/src/sgml/datatype.sgml @@ -2054,8 +2054,6 @@ MINUTE TO SECOND Time Input - - Example @@ -5245,8 +5243,8 @@ WHERE ...
Pseudo-Types - - + + Name diff --git a/doc/src/sgml/func/func-formatting.sgml b/doc/src/sgml/func/func-formatting.sgml index 806302b2f7b34..df05e5c167691 100644 --- a/doc/src/sgml/func/func-formatting.sgml +++ b/doc/src/sgml/func/func-formatting.sgml @@ -180,6 +180,8 @@
Template Patterns for Date/Time Formatting + + Pattern @@ -773,6 +775,8 @@
Template Patterns for Numeric Formatting + + Pattern diff --git a/doc/src/sgml/func/func-info.sgml b/doc/src/sgml/func/func-info.sgml index b507bfaf64b19..c393832d94c64 100644 --- a/doc/src/sgml/func/func-info.sgml +++ b/doc/src/sgml/func/func-info.sgml @@ -2192,6 +2192,8 @@ SELECT currval(pg_get_serial_sequence('sometable', 'id'));
Index Column Properties + + NameDescription @@ -2251,6 +2253,8 @@ SELECT currval(pg_get_serial_sequence('sometable', 'id'));
Index Properties + + NameDescription @@ -2284,6 +2288,8 @@ SELECT currval(pg_get_serial_sequence('sometable', 'id'));
Index Access Method Properties + + NameDescription @@ -2323,6 +2329,8 @@ SELECT currval(pg_get_serial_sequence('sometable', 'id'));
GUC Flags + + FlagDescription @@ -2992,6 +3000,8 @@ acl | {postgres=arwdDxtm/postgres,foo=r/postgres}
Snapshot Components + + Name diff --git a/doc/src/sgml/func/func-matching.sgml b/doc/src/sgml/func/func-matching.sgml index 7d44e2a27bcca..ebe0b22c8f60f 100644 --- a/doc/src/sgml/func/func-matching.sgml +++ b/doc/src/sgml/func/func-matching.sgml @@ -1084,6 +1084,8 @@ regexp_substr('ABCDEFGHI', '(c..)(...)', 1, 1, 'i', 2) Regular Expression Atoms + + Atom @@ -1166,6 +1168,8 @@ regexp_substr('ABCDEFGHI', '(c..)(...)', 1, 1, 'i', 2) Regular Expression Quantifiers + + Quantifier @@ -1270,6 +1274,8 @@ regexp_substr('ABCDEFGHI', '(c..)(...)', 1, 1, 'i', 2) Regular Expression Constraints + + Constraint @@ -1508,6 +1514,8 @@ regexp_substr('ABCDEFGHI', '(c..)(...)', 1, 1, 'i', 2) Regular Expression Character-Entry Escapes + + Escape @@ -1649,7 +1657,9 @@ regexp_substr('ABCDEFGHI', '(c..)(...)', 1, 1, 'i', 2) Regular Expression Class-Shorthand Escapes - + + + Escape Description @@ -1708,6 +1718,8 @@ regexp_substr('ABCDEFGHI', '(c..)(...)', 1, 1, 'i', 2) Regular Expression Constraint Escapes + + Escape @@ -1764,6 +1776,8 @@ regexp_substr('ABCDEFGHI', '(c..)(...)', 1, 1, 'i', 2) Regular Expression Back References + + Escape @@ -1845,6 +1859,8 @@ regexp_substr('ABCDEFGHI', '(c..)(...)', 1, 1, 'i', 2) ARE Embedded-Option Letters + + Option diff --git a/doc/src/sgml/storage.sgml b/doc/src/sgml/storage.sgml index 61250799ec076..02ddfda834a2e 100644 --- a/doc/src/sgml/storage.sgml +++ b/doc/src/sgml/storage.sgml @@ -39,6 +39,8 @@ these required items, the cluster configuration files
Contents of <varname>PGDATA</varname> + + @@ -743,6 +745,8 @@ There are five parts to each page. Overall Page Layout Page Layout + + From 21fddb3d76909e6a4d65485c0eda2cc34b2591a3 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Wed, 13 Aug 2025 11:59:47 -0400 Subject: [PATCH 494/602] Don't treat EINVAL from semget() as a hard failure. It turns out that on some platforms (at least current macOS, NetBSD, OpenBSD) semget(2) will return EINVAL if there is a pre-existing semaphore set with the same key and too few semaphores. Our code expects EEXIST in that case and treats EINVAL as a hard failure, resulting in failure during initdb or postmaster start. POSIX does document EINVAL for too-few-semaphores-in-set, and is silent on its priority relative to EEXIST, so this behavior arguably conforms to spec. Nonetheless it's quite problematic because EINVAL is also documented to mean that nsems is greater than the system's limit on the number of semaphores per set (SEMMSL). If that is where the problem lies, retrying would just become an infinite loop. To resolve this contradiction, retry after EINVAL, but also install a loop limit that will make us give up regardless of the specific errno after trying 1000 different keys. (1000 is a pretty arbitrary number, but it seems like it should be sufficient.) I like this better than the previous infinite-looping behavior, since it will also keep us out of trouble if (say) we get EACCES due to a system-level permissions problem rather than anything to do with a specific semaphore set. This problem has only been observed in the field in PG 17, which uses a higher nsems value than other branches (cf. 38da05346, 810a8b1c8). That makes it possible to get the failure if a new v17 postmaster has a key collision with an existing postmaster of another branch. In principle though, we might see such a collision against a semaphore set created by some other application, in which case all branches are vulnerable on these platforms. Hence, backpatch. Reported-by: Gavin Panella Author: Tom Lane Discussion: https://postgr.es/m/CALL7chmzY3eXHA7zHnODUVGZLSvK3wYCSP0RmcDFHJY8f28Q3g@mail.gmail.com Backpatch-through: 13 --- src/backend/port/sysv_sema.c | 46 ++++++++++++++++++++++++++---------- 1 file changed, 33 insertions(+), 13 deletions(-) diff --git a/src/backend/port/sysv_sema.c b/src/backend/port/sysv_sema.c index 423b2b4f9d6d1..6ac83ea1a821a 100644 --- a/src/backend/port/sysv_sema.c +++ b/src/backend/port/sysv_sema.c @@ -69,7 +69,7 @@ static int nextSemaNumber; /* next free sem num in last sema set */ static IpcSemaphoreId InternalIpcSemaphoreCreate(IpcSemaphoreKey semKey, - int numSems); + int numSems, bool retry_ok); static void IpcSemaphoreInitialize(IpcSemaphoreId semId, int semNum, int value); static void IpcSemaphoreKill(IpcSemaphoreId semId); @@ -88,9 +88,13 @@ static void ReleaseSemaphores(int status, Datum arg); * If we fail with a failure code other than collision-with-existing-set, * print out an error and abort. Other types of errors suggest nonrecoverable * problems. + * + * Unfortunately, it's sometimes hard to tell whether errors are + * nonrecoverable. Our caller keeps track of whether continuing to retry + * is sane or not; if not, we abort on failure regardless of the errno. */ static IpcSemaphoreId -InternalIpcSemaphoreCreate(IpcSemaphoreKey semKey, int numSems) +InternalIpcSemaphoreCreate(IpcSemaphoreKey semKey, int numSems, bool retry_ok) { int semId; @@ -101,16 +105,27 @@ InternalIpcSemaphoreCreate(IpcSemaphoreKey semKey, int numSems) int saved_errno = errno; /* - * Fail quietly if error indicates a collision with existing set. One - * would expect EEXIST, given that we said IPC_EXCL, but perhaps we - * could get a permission violation instead? Also, EIDRM might occur - * if an old set is slated for destruction but not gone yet. + * Fail quietly if error suggests a collision with an existing set and + * our caller has not lost patience. + * + * One would expect EEXIST, given that we said IPC_EXCL, but perhaps + * we could get a permission violation instead. On some platforms + * EINVAL will be reported if the existing set has too few semaphores. + * Also, EIDRM might occur if an old set is slated for destruction but + * not gone yet. + * + * EINVAL is the key reason why we need the caller-level loop limit, + * as it can also mean that the platform's SEMMSL is less than + * numSems, and that condition can't be fixed by trying another key. */ - if (saved_errno == EEXIST || saved_errno == EACCES + if (retry_ok && + (saved_errno == EEXIST + || saved_errno == EACCES + || saved_errno == EINVAL #ifdef EIDRM - || saved_errno == EIDRM + || saved_errno == EIDRM #endif - ) + )) return -1; /* @@ -207,17 +222,22 @@ IpcSemaphoreGetLastPID(IpcSemaphoreId semId, int semNum) static IpcSemaphoreId IpcSemaphoreCreate(int numSems) { + int num_tries = 0; IpcSemaphoreId semId; union semun semun; PGSemaphoreData mysema; /* Loop till we find a free IPC key */ - for (nextSemaKey++;; nextSemaKey++) + for (nextSemaKey++;; nextSemaKey++, num_tries++) { pid_t creatorPID; - /* Try to create new semaphore set */ - semId = InternalIpcSemaphoreCreate(nextSemaKey, numSems + 1); + /* + * Try to create new semaphore set. Give up after trying 1000 + * distinct IPC keys. + */ + semId = InternalIpcSemaphoreCreate(nextSemaKey, numSems + 1, + num_tries < 1000); if (semId >= 0) break; /* successful create */ @@ -254,7 +274,7 @@ IpcSemaphoreCreate(int numSems) /* * Now try again to create the sema set. */ - semId = InternalIpcSemaphoreCreate(nextSemaKey, numSems + 1); + semId = InternalIpcSemaphoreCreate(nextSemaKey, numSems + 1, true); if (semId >= 0) break; /* successful create */ From 377b7ab1452418a90d6c46dcef2d4a2d4a4517af Mon Sep 17 00:00:00 2001 From: Andres Freund Date: Wed, 13 Aug 2025 15:17:29 -0400 Subject: [PATCH 495/602] Add very basic test for kill_prior_tuples Previously our tests did not exercise kill_prior_tuples for hash and gist. For gist some related paths were reached, but gist's implementation seems to not work if all the dead tuples are on one page (or something like that). The coverage for other index types was rather incidental. Thus add an explicit test ensuring kill_prior_tuples works at all. Reviewed-by: Heikki Linnakangas Discussion: https://postgr.es/m/lxzj26ga6ippdeunz6kuncectr5gfuugmm2ry22qu6hcx6oid6@lzx3sjsqhmt6 --- .../isolation/expected/index-killtuples.out | 355 ++++++++++++++++++ src/test/isolation/isolation_schedule | 1 + .../isolation/specs/index-killtuples.spec | 127 +++++++ 3 files changed, 483 insertions(+) create mode 100644 src/test/isolation/expected/index-killtuples.out create mode 100644 src/test/isolation/specs/index-killtuples.spec diff --git a/src/test/isolation/expected/index-killtuples.out b/src/test/isolation/expected/index-killtuples.out new file mode 100644 index 0000000000000..be7ddd756ef0e --- /dev/null +++ b/src/test/isolation/expected/index-killtuples.out @@ -0,0 +1,355 @@ +Parsed test spec with 1 sessions + +starting permutation: create_table fill_500 create_btree flush disable_seq disable_bitmap measure access flush result measure access flush result delete flush measure access flush result measure access flush result drop_table +step create_table: CREATE TEMPORARY TABLE kill_prior_tuple(key int not null, cat text not null); +step fill_500: INSERT INTO kill_prior_tuple(key, cat) SELECT g.i, 'a' FROM generate_series(1, 500) g(i); +step create_btree: CREATE INDEX kill_prior_tuple_btree ON kill_prior_tuple USING btree (key); +step flush: SELECT FROM pg_stat_force_next_flush(); +step disable_seq: SET enable_seqscan = false; +step disable_bitmap: SET enable_bitmapscan = false; +step measure: UPDATE counter SET heap_accesses = (SELECT heap_blks_read + heap_blks_hit FROM pg_statio_all_tables WHERE relname = 'kill_prior_tuple'); +step access: EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF) SELECT * FROM kill_prior_tuple WHERE key = 1; +QUERY PLAN +-------------------------------------------------------------------------------------- +Index Scan using kill_prior_tuple_btree on kill_prior_tuple (actual rows=1.00 loops=1) + Index Cond: (key = 1) + Index Searches: 1 +(3 rows) + +step flush: SELECT FROM pg_stat_force_next_flush(); +step result: SELECT heap_blks_read + heap_blks_hit - counter.heap_accesses AS new_heap_accesses FROM counter, pg_statio_all_tables WHERE relname = 'kill_prior_tuple'; +new_heap_accesses +----------------- + 1 +(1 row) + +step measure: UPDATE counter SET heap_accesses = (SELECT heap_blks_read + heap_blks_hit FROM pg_statio_all_tables WHERE relname = 'kill_prior_tuple'); +step access: EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF) SELECT * FROM kill_prior_tuple WHERE key = 1; +QUERY PLAN +-------------------------------------------------------------------------------------- +Index Scan using kill_prior_tuple_btree on kill_prior_tuple (actual rows=1.00 loops=1) + Index Cond: (key = 1) + Index Searches: 1 +(3 rows) + +step flush: SELECT FROM pg_stat_force_next_flush(); +step result: SELECT heap_blks_read + heap_blks_hit - counter.heap_accesses AS new_heap_accesses FROM counter, pg_statio_all_tables WHERE relname = 'kill_prior_tuple'; +new_heap_accesses +----------------- + 1 +(1 row) + +step delete: DELETE FROM kill_prior_tuple; +step flush: SELECT FROM pg_stat_force_next_flush(); +step measure: UPDATE counter SET heap_accesses = (SELECT heap_blks_read + heap_blks_hit FROM pg_statio_all_tables WHERE relname = 'kill_prior_tuple'); +step access: EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF) SELECT * FROM kill_prior_tuple WHERE key = 1; +QUERY PLAN +-------------------------------------------------------------------------------------- +Index Scan using kill_prior_tuple_btree on kill_prior_tuple (actual rows=0.00 loops=1) + Index Cond: (key = 1) + Index Searches: 1 +(3 rows) + +step flush: SELECT FROM pg_stat_force_next_flush(); +step result: SELECT heap_blks_read + heap_blks_hit - counter.heap_accesses AS new_heap_accesses FROM counter, pg_statio_all_tables WHERE relname = 'kill_prior_tuple'; +new_heap_accesses +----------------- + 1 +(1 row) + +step measure: UPDATE counter SET heap_accesses = (SELECT heap_blks_read + heap_blks_hit FROM pg_statio_all_tables WHERE relname = 'kill_prior_tuple'); +step access: EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF) SELECT * FROM kill_prior_tuple WHERE key = 1; +QUERY PLAN +-------------------------------------------------------------------------------------- +Index Scan using kill_prior_tuple_btree on kill_prior_tuple (actual rows=0.00 loops=1) + Index Cond: (key = 1) + Index Searches: 1 +(3 rows) + +step flush: SELECT FROM pg_stat_force_next_flush(); +step result: SELECT heap_blks_read + heap_blks_hit - counter.heap_accesses AS new_heap_accesses FROM counter, pg_statio_all_tables WHERE relname = 'kill_prior_tuple'; +new_heap_accesses +----------------- + 0 +(1 row) + +step drop_table: DROP TABLE IF EXISTS kill_prior_tuple; + +starting permutation: create_table fill_500 create_ext_btree_gist create_gist flush disable_seq disable_bitmap measure access flush result measure access flush result delete flush measure access flush result measure access flush result drop_table drop_ext_btree_gist +step create_table: CREATE TEMPORARY TABLE kill_prior_tuple(key int not null, cat text not null); +step fill_500: INSERT INTO kill_prior_tuple(key, cat) SELECT g.i, 'a' FROM generate_series(1, 500) g(i); +step create_ext_btree_gist: CREATE EXTENSION btree_gist; +step create_gist: CREATE INDEX kill_prior_tuple_gist ON kill_prior_tuple USING gist (key); +step flush: SELECT FROM pg_stat_force_next_flush(); +step disable_seq: SET enable_seqscan = false; +step disable_bitmap: SET enable_bitmapscan = false; +step measure: UPDATE counter SET heap_accesses = (SELECT heap_blks_read + heap_blks_hit FROM pg_statio_all_tables WHERE relname = 'kill_prior_tuple'); +step access: EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF) SELECT * FROM kill_prior_tuple WHERE key = 1; +QUERY PLAN +------------------------------------------------------------------------------------- +Index Scan using kill_prior_tuple_gist on kill_prior_tuple (actual rows=1.00 loops=1) + Index Cond: (key = 1) + Index Searches: 1 +(3 rows) + +step flush: SELECT FROM pg_stat_force_next_flush(); +step result: SELECT heap_blks_read + heap_blks_hit - counter.heap_accesses AS new_heap_accesses FROM counter, pg_statio_all_tables WHERE relname = 'kill_prior_tuple'; +new_heap_accesses +----------------- + 1 +(1 row) + +step measure: UPDATE counter SET heap_accesses = (SELECT heap_blks_read + heap_blks_hit FROM pg_statio_all_tables WHERE relname = 'kill_prior_tuple'); +step access: EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF) SELECT * FROM kill_prior_tuple WHERE key = 1; +QUERY PLAN +------------------------------------------------------------------------------------- +Index Scan using kill_prior_tuple_gist on kill_prior_tuple (actual rows=1.00 loops=1) + Index Cond: (key = 1) + Index Searches: 1 +(3 rows) + +step flush: SELECT FROM pg_stat_force_next_flush(); +step result: SELECT heap_blks_read + heap_blks_hit - counter.heap_accesses AS new_heap_accesses FROM counter, pg_statio_all_tables WHERE relname = 'kill_prior_tuple'; +new_heap_accesses +----------------- + 1 +(1 row) + +step delete: DELETE FROM kill_prior_tuple; +step flush: SELECT FROM pg_stat_force_next_flush(); +step measure: UPDATE counter SET heap_accesses = (SELECT heap_blks_read + heap_blks_hit FROM pg_statio_all_tables WHERE relname = 'kill_prior_tuple'); +step access: EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF) SELECT * FROM kill_prior_tuple WHERE key = 1; +QUERY PLAN +------------------------------------------------------------------------------------- +Index Scan using kill_prior_tuple_gist on kill_prior_tuple (actual rows=0.00 loops=1) + Index Cond: (key = 1) + Index Searches: 1 +(3 rows) + +step flush: SELECT FROM pg_stat_force_next_flush(); +step result: SELECT heap_blks_read + heap_blks_hit - counter.heap_accesses AS new_heap_accesses FROM counter, pg_statio_all_tables WHERE relname = 'kill_prior_tuple'; +new_heap_accesses +----------------- + 1 +(1 row) + +step measure: UPDATE counter SET heap_accesses = (SELECT heap_blks_read + heap_blks_hit FROM pg_statio_all_tables WHERE relname = 'kill_prior_tuple'); +step access: EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF) SELECT * FROM kill_prior_tuple WHERE key = 1; +QUERY PLAN +------------------------------------------------------------------------------------- +Index Scan using kill_prior_tuple_gist on kill_prior_tuple (actual rows=0.00 loops=1) + Index Cond: (key = 1) + Index Searches: 1 +(3 rows) + +step flush: SELECT FROM pg_stat_force_next_flush(); +step result: SELECT heap_blks_read + heap_blks_hit - counter.heap_accesses AS new_heap_accesses FROM counter, pg_statio_all_tables WHERE relname = 'kill_prior_tuple'; +new_heap_accesses +----------------- + 0 +(1 row) + +step drop_table: DROP TABLE IF EXISTS kill_prior_tuple; +step drop_ext_btree_gist: DROP EXTENSION btree_gist; + +starting permutation: create_table fill_10 create_ext_btree_gist create_gist flush disable_seq disable_bitmap measure access flush result measure access flush result delete flush measure access flush result measure access flush result drop_table drop_ext_btree_gist +step create_table: CREATE TEMPORARY TABLE kill_prior_tuple(key int not null, cat text not null); +step fill_10: INSERT INTO kill_prior_tuple(key, cat) SELECT g.i, 'a' FROM generate_series(1, 10) g(i); +step create_ext_btree_gist: CREATE EXTENSION btree_gist; +step create_gist: CREATE INDEX kill_prior_tuple_gist ON kill_prior_tuple USING gist (key); +step flush: SELECT FROM pg_stat_force_next_flush(); +step disable_seq: SET enable_seqscan = false; +step disable_bitmap: SET enable_bitmapscan = false; +step measure: UPDATE counter SET heap_accesses = (SELECT heap_blks_read + heap_blks_hit FROM pg_statio_all_tables WHERE relname = 'kill_prior_tuple'); +step access: EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF) SELECT * FROM kill_prior_tuple WHERE key = 1; +QUERY PLAN +------------------------------------------------------------------------------------- +Index Scan using kill_prior_tuple_gist on kill_prior_tuple (actual rows=1.00 loops=1) + Index Cond: (key = 1) + Index Searches: 1 +(3 rows) + +step flush: SELECT FROM pg_stat_force_next_flush(); +step result: SELECT heap_blks_read + heap_blks_hit - counter.heap_accesses AS new_heap_accesses FROM counter, pg_statio_all_tables WHERE relname = 'kill_prior_tuple'; +new_heap_accesses +----------------- + 1 +(1 row) + +step measure: UPDATE counter SET heap_accesses = (SELECT heap_blks_read + heap_blks_hit FROM pg_statio_all_tables WHERE relname = 'kill_prior_tuple'); +step access: EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF) SELECT * FROM kill_prior_tuple WHERE key = 1; +QUERY PLAN +------------------------------------------------------------------------------------- +Index Scan using kill_prior_tuple_gist on kill_prior_tuple (actual rows=1.00 loops=1) + Index Cond: (key = 1) + Index Searches: 1 +(3 rows) + +step flush: SELECT FROM pg_stat_force_next_flush(); +step result: SELECT heap_blks_read + heap_blks_hit - counter.heap_accesses AS new_heap_accesses FROM counter, pg_statio_all_tables WHERE relname = 'kill_prior_tuple'; +new_heap_accesses +----------------- + 1 +(1 row) + +step delete: DELETE FROM kill_prior_tuple; +step flush: SELECT FROM pg_stat_force_next_flush(); +step measure: UPDATE counter SET heap_accesses = (SELECT heap_blks_read + heap_blks_hit FROM pg_statio_all_tables WHERE relname = 'kill_prior_tuple'); +step access: EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF) SELECT * FROM kill_prior_tuple WHERE key = 1; +QUERY PLAN +------------------------------------------------------------------------------------- +Index Scan using kill_prior_tuple_gist on kill_prior_tuple (actual rows=0.00 loops=1) + Index Cond: (key = 1) + Index Searches: 1 +(3 rows) + +step flush: SELECT FROM pg_stat_force_next_flush(); +step result: SELECT heap_blks_read + heap_blks_hit - counter.heap_accesses AS new_heap_accesses FROM counter, pg_statio_all_tables WHERE relname = 'kill_prior_tuple'; +new_heap_accesses +----------------- + 1 +(1 row) + +step measure: UPDATE counter SET heap_accesses = (SELECT heap_blks_read + heap_blks_hit FROM pg_statio_all_tables WHERE relname = 'kill_prior_tuple'); +step access: EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF) SELECT * FROM kill_prior_tuple WHERE key = 1; +QUERY PLAN +------------------------------------------------------------------------------------- +Index Scan using kill_prior_tuple_gist on kill_prior_tuple (actual rows=0.00 loops=1) + Index Cond: (key = 1) + Index Searches: 1 +(3 rows) + +step flush: SELECT FROM pg_stat_force_next_flush(); +step result: SELECT heap_blks_read + heap_blks_hit - counter.heap_accesses AS new_heap_accesses FROM counter, pg_statio_all_tables WHERE relname = 'kill_prior_tuple'; +new_heap_accesses +----------------- + 1 +(1 row) + +step drop_table: DROP TABLE IF EXISTS kill_prior_tuple; +step drop_ext_btree_gist: DROP EXTENSION btree_gist; + +starting permutation: create_table fill_500 create_hash flush disable_seq disable_bitmap measure access flush result measure access flush result delete flush measure access flush result measure access flush result drop_table +step create_table: CREATE TEMPORARY TABLE kill_prior_tuple(key int not null, cat text not null); +step fill_500: INSERT INTO kill_prior_tuple(key, cat) SELECT g.i, 'a' FROM generate_series(1, 500) g(i); +step create_hash: CREATE INDEX kill_prior_tuple_hash ON kill_prior_tuple USING hash (key); +step flush: SELECT FROM pg_stat_force_next_flush(); +step disable_seq: SET enable_seqscan = false; +step disable_bitmap: SET enable_bitmapscan = false; +step measure: UPDATE counter SET heap_accesses = (SELECT heap_blks_read + heap_blks_hit FROM pg_statio_all_tables WHERE relname = 'kill_prior_tuple'); +step access: EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF) SELECT * FROM kill_prior_tuple WHERE key = 1; +QUERY PLAN +------------------------------------------------------------------------------------- +Index Scan using kill_prior_tuple_hash on kill_prior_tuple (actual rows=1.00 loops=1) + Index Cond: (key = 1) + Index Searches: 1 +(3 rows) + +step flush: SELECT FROM pg_stat_force_next_flush(); +step result: SELECT heap_blks_read + heap_blks_hit - counter.heap_accesses AS new_heap_accesses FROM counter, pg_statio_all_tables WHERE relname = 'kill_prior_tuple'; +new_heap_accesses +----------------- + 1 +(1 row) + +step measure: UPDATE counter SET heap_accesses = (SELECT heap_blks_read + heap_blks_hit FROM pg_statio_all_tables WHERE relname = 'kill_prior_tuple'); +step access: EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF) SELECT * FROM kill_prior_tuple WHERE key = 1; +QUERY PLAN +------------------------------------------------------------------------------------- +Index Scan using kill_prior_tuple_hash on kill_prior_tuple (actual rows=1.00 loops=1) + Index Cond: (key = 1) + Index Searches: 1 +(3 rows) + +step flush: SELECT FROM pg_stat_force_next_flush(); +step result: SELECT heap_blks_read + heap_blks_hit - counter.heap_accesses AS new_heap_accesses FROM counter, pg_statio_all_tables WHERE relname = 'kill_prior_tuple'; +new_heap_accesses +----------------- + 1 +(1 row) + +step delete: DELETE FROM kill_prior_tuple; +step flush: SELECT FROM pg_stat_force_next_flush(); +step measure: UPDATE counter SET heap_accesses = (SELECT heap_blks_read + heap_blks_hit FROM pg_statio_all_tables WHERE relname = 'kill_prior_tuple'); +step access: EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF) SELECT * FROM kill_prior_tuple WHERE key = 1; +QUERY PLAN +------------------------------------------------------------------------------------- +Index Scan using kill_prior_tuple_hash on kill_prior_tuple (actual rows=0.00 loops=1) + Index Cond: (key = 1) + Index Searches: 1 +(3 rows) + +step flush: SELECT FROM pg_stat_force_next_flush(); +step result: SELECT heap_blks_read + heap_blks_hit - counter.heap_accesses AS new_heap_accesses FROM counter, pg_statio_all_tables WHERE relname = 'kill_prior_tuple'; +new_heap_accesses +----------------- + 1 +(1 row) + +step measure: UPDATE counter SET heap_accesses = (SELECT heap_blks_read + heap_blks_hit FROM pg_statio_all_tables WHERE relname = 'kill_prior_tuple'); +step access: EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF) SELECT * FROM kill_prior_tuple WHERE key = 1; +QUERY PLAN +------------------------------------------------------------------------------------- +Index Scan using kill_prior_tuple_hash on kill_prior_tuple (actual rows=0.00 loops=1) + Index Cond: (key = 1) + Index Searches: 1 +(3 rows) + +step flush: SELECT FROM pg_stat_force_next_flush(); +step result: SELECT heap_blks_read + heap_blks_hit - counter.heap_accesses AS new_heap_accesses FROM counter, pg_statio_all_tables WHERE relname = 'kill_prior_tuple'; +new_heap_accesses +----------------- + 0 +(1 row) + +step drop_table: DROP TABLE IF EXISTS kill_prior_tuple; + +starting permutation: create_table fill_500 create_ext_btree_gin create_gin flush disable_seq delete flush measure access flush result measure access flush result drop_table drop_ext_btree_gin +step create_table: CREATE TEMPORARY TABLE kill_prior_tuple(key int not null, cat text not null); +step fill_500: INSERT INTO kill_prior_tuple(key, cat) SELECT g.i, 'a' FROM generate_series(1, 500) g(i); +step create_ext_btree_gin: CREATE EXTENSION btree_gin; +step create_gin: CREATE INDEX kill_prior_tuple_gin ON kill_prior_tuple USING gin (key); +step flush: SELECT FROM pg_stat_force_next_flush(); +step disable_seq: SET enable_seqscan = false; +step delete: DELETE FROM kill_prior_tuple; +step flush: SELECT FROM pg_stat_force_next_flush(); +step measure: UPDATE counter SET heap_accesses = (SELECT heap_blks_read + heap_blks_hit FROM pg_statio_all_tables WHERE relname = 'kill_prior_tuple'); +step access: EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF) SELECT * FROM kill_prior_tuple WHERE key = 1; +QUERY PLAN +-------------------------------------------------------------------------- +Bitmap Heap Scan on kill_prior_tuple (actual rows=0.00 loops=1) + Recheck Cond: (key = 1) + Heap Blocks: exact=1 + -> Bitmap Index Scan on kill_prior_tuple_gin (actual rows=1.00 loops=1) + Index Cond: (key = 1) + Index Searches: 1 +(6 rows) + +step flush: SELECT FROM pg_stat_force_next_flush(); +step result: SELECT heap_blks_read + heap_blks_hit - counter.heap_accesses AS new_heap_accesses FROM counter, pg_statio_all_tables WHERE relname = 'kill_prior_tuple'; +new_heap_accesses +----------------- + 1 +(1 row) + +step measure: UPDATE counter SET heap_accesses = (SELECT heap_blks_read + heap_blks_hit FROM pg_statio_all_tables WHERE relname = 'kill_prior_tuple'); +step access: EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF) SELECT * FROM kill_prior_tuple WHERE key = 1; +QUERY PLAN +-------------------------------------------------------------------------- +Bitmap Heap Scan on kill_prior_tuple (actual rows=0.00 loops=1) + Recheck Cond: (key = 1) + Heap Blocks: exact=1 + -> Bitmap Index Scan on kill_prior_tuple_gin (actual rows=1.00 loops=1) + Index Cond: (key = 1) + Index Searches: 1 +(6 rows) + +step flush: SELECT FROM pg_stat_force_next_flush(); +step result: SELECT heap_blks_read + heap_blks_hit - counter.heap_accesses AS new_heap_accesses FROM counter, pg_statio_all_tables WHERE relname = 'kill_prior_tuple'; +new_heap_accesses +----------------- + 1 +(1 row) + +step drop_table: DROP TABLE IF EXISTS kill_prior_tuple; +step drop_ext_btree_gin: DROP EXTENSION btree_gin; diff --git a/src/test/isolation/isolation_schedule b/src/test/isolation/isolation_schedule index e3c669a29c7aa..4411d3c86ddad 100644 --- a/src/test/isolation/isolation_schedule +++ b/src/test/isolation/isolation_schedule @@ -16,6 +16,7 @@ test: ri-trigger test: partial-index test: two-ids test: multiple-row-versions +test: index-killtuples test: index-only-scan test: index-only-bitmapscan test: predicate-lock-hot-tuple diff --git a/src/test/isolation/specs/index-killtuples.spec b/src/test/isolation/specs/index-killtuples.spec new file mode 100644 index 0000000000000..77fe8c689a793 --- /dev/null +++ b/src/test/isolation/specs/index-killtuples.spec @@ -0,0 +1,127 @@ +# Basic testing of killtuples / kill_prior_tuples / all_dead testing +# for various index AMs +# +# This tests just enough to ensure that the kill* routines are actually +# executed and does something approximately reasonable. It's *not* sufficient +# testing for adding killitems support to a new AM! +# +# This doesn't really need to be an isolation test, it could be written as a +# regular regression test. However, writing it as an isolation test ends up a +# *lot* less verbose. + +setup +{ + CREATE TABLE counter(heap_accesses int); + INSERT INTO counter(heap_accesses) VALUES (0); +} + +teardown +{ + DROP TABLE counter; +} + +session s1 +# to ensure GUCs are reset +setup { RESET ALL; } + +step disable_seq { SET enable_seqscan = false; } + +step disable_bitmap { SET enable_bitmapscan = false; } + +# use a temporary table to make sure no other session can interfere with +# visibility determinations +step create_table { CREATE TEMPORARY TABLE kill_prior_tuple(key int not null, cat text not null); } + +step fill_10 { INSERT INTO kill_prior_tuple(key, cat) SELECT g.i, 'a' FROM generate_series(1, 10) g(i); } + +step fill_500 { INSERT INTO kill_prior_tuple(key, cat) SELECT g.i, 'a' FROM generate_series(1, 500) g(i); } + +# column-less select to make output easier to read +step flush { SELECT FROM pg_stat_force_next_flush(); } + +step measure { UPDATE counter SET heap_accesses = (SELECT heap_blks_read + heap_blks_hit FROM pg_statio_all_tables WHERE relname = 'kill_prior_tuple'); } + +step result { SELECT heap_blks_read + heap_blks_hit - counter.heap_accesses AS new_heap_accesses FROM counter, pg_statio_all_tables WHERE relname = 'kill_prior_tuple'; } + +step access { EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF, BUFFERS OFF) SELECT * FROM kill_prior_tuple WHERE key = 1; } + +step delete { DELETE FROM kill_prior_tuple; } + +step drop_table { DROP TABLE IF EXISTS kill_prior_tuple; } + +### steps for testing btree indexes ### +step create_btree { CREATE INDEX kill_prior_tuple_btree ON kill_prior_tuple USING btree (key); } + +### steps for testing gist indexes ### +# Creating the extensions takes time, so we don't want to do so when testing +# other AMs +step create_ext_btree_gist { CREATE EXTENSION btree_gist; } +step drop_ext_btree_gist { DROP EXTENSION btree_gist; } +step create_gist { CREATE INDEX kill_prior_tuple_gist ON kill_prior_tuple USING gist (key); } + +### steps for testing gin indexes ### +# See create_ext_btree_gist +step create_ext_btree_gin { CREATE EXTENSION btree_gin; } +step drop_ext_btree_gin { DROP EXTENSION btree_gin; } +step create_gin { CREATE INDEX kill_prior_tuple_gin ON kill_prior_tuple USING gin (key); } + +### steps for testing hash indexes ### +step create_hash { CREATE INDEX kill_prior_tuple_hash ON kill_prior_tuple USING hash (key); } + + +# test killtuples with btree index +permutation + create_table fill_500 create_btree flush + disable_seq disable_bitmap + # show each access to non-deleted tuple increments heap_blks_* + measure access flush result + measure access flush result + delete flush + # first access after accessing deleted tuple still needs to access heap + measure access flush result + # but after kill_prior_tuple did its thing, we shouldn't access heap anymore + measure access flush result + drop_table + +# Same as first permutation, except testing gist +permutation + create_table fill_500 create_ext_btree_gist create_gist flush + disable_seq disable_bitmap + measure access flush result + measure access flush result + delete flush + measure access flush result + measure access flush result + drop_table drop_ext_btree_gist + +# Test gist, but with fewer rows - shows that killitems doesn't work anymore! +permutation + create_table fill_10 create_ext_btree_gist create_gist flush + disable_seq disable_bitmap + measure access flush result + measure access flush result + delete flush + measure access flush result + measure access flush result + drop_table drop_ext_btree_gist + +# Same as first permutation, except testing hash +permutation + create_table fill_500 create_hash flush + disable_seq disable_bitmap + measure access flush result + measure access flush result + delete flush + measure access flush result + measure access flush result + drop_table + +# # Similar to first permutation, except that gin does not have killtuples support +permutation + create_table fill_500 create_ext_btree_gin create_gin flush + disable_seq + delete flush + measure access flush result + # will still fetch from heap + measure access flush result + drop_table drop_ext_btree_gin From 66f8765c5331a7a27eafb1d498a12ac04bfbdab2 Mon Sep 17 00:00:00 2001 From: Andres Freund Date: Wed, 13 Aug 2025 15:52:10 -0400 Subject: [PATCH 496/602] ci: windows: Stop using DEBUG:FASTLINK Currently the pdb file for libpq and some other libraries are named the same for the static and shared libraries. That has been the case for a long time, but recently started failing, after an image update started using a newer ninja version. The issue is not itself caused by ninja, but just made visible, as the newer version optimizes the build order and builds the shared libpq earlier than the static library. Previously both static and shared libraries were built at the same time, which prevented msvc from detecting the issue. When using /DEBUG:FASTLINK pdb files cannot be updated, triggering the error. We were using /DEBUG:FASTLINK due to running out of memory in the past, but that was when using container based CI images, rather than full VMs. This isn't really the correct fix (that'd be to deconflict the pdb file names), but we'd like to get CI to become green again, and a proper fix (in meson) will presumably take longer. Suggested-by: Andres Freund Author: Nazir Bilal Yavuz Reviewed-by: Jacob Champion Reviewed-by: Peter Eisentraut Reviewed-by: Tom Lane Discussion: https://postgr.es/m/CAN55FZ1RuBhJmPWs3Oi%3D9UoezDfrtO-VaU67db5%2B0_uy19uF%2BA%40mail.gmail.com Backpatch-through: 16 --- .cirrus.tasks.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.cirrus.tasks.yml b/.cirrus.tasks.yml index 1a366975d824f..30891801339b5 100644 --- a/.cirrus.tasks.yml +++ b/.cirrus.tasks.yml @@ -730,10 +730,9 @@ task: echo 127.0.0.3 pg-loadbalancetest >> c:\Windows\System32\Drivers\etc\hosts type c:\Windows\System32\Drivers\etc\hosts - # Use /DEBUG:FASTLINK to avoid high memory usage during linking configure_script: | vcvarsall x64 - meson setup --backend ninja --buildtype debug -Dc_link_args=/DEBUG:FASTLINK -Dcassert=true -Dinjection_points=true -Db_pch=true -Dextra_lib_dirs=c:\openssl\1.1\lib -Dextra_include_dirs=c:\openssl\1.1\include -DTAR=%TAR% build + meson setup --backend ninja --buildtype debug -Dcassert=true -Dinjection_points=true -Db_pch=true -Dextra_lib_dirs=c:\openssl\1.1\lib -Dextra_include_dirs=c:\openssl\1.1\include -DTAR=%TAR% build build_script: | vcvarsall x64 From 2a600a93c7be5b0bf8cacb1af78009db12bc4857 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Wed, 13 Aug 2025 16:35:06 -0400 Subject: [PATCH 497/602] Make type Datum be 8 bytes wide everywhere. This patch makes sizeof(Datum) be 8 on all platforms including 32-bit ones. The objective is to allow USE_FLOAT8_BYVAL to be true everywhere, and in consequence to remove a lot of code that is specific to pass-by-reference handling of float8, int8, etc. The code for abbreviated sort keys can be simplified similarly. In this way we can reduce the maintenance effort involved in supporting 32-bit platforms, without going so far as to actually desupport them. Since Datum is strictly an in-memory concept, this has no impact on on-disk storage, though an initdb or pg_upgrade will be needed to fix affected catalog entries. We have required platforms to support [u]int64 for ages, so this breaks no supported platform. We can expect that this change will make 32-bit builds a bit slower and more memory-hungry, although being able to use pass-by-value handling of 8-byte types may buy back some of that. But we stopped optimizing for 32-bit cases a long time ago, and this seems like just another step on that path. This initial patch simply forces the correct type definition and USE_FLOAT8_BYVAL setting, and cleans up a couple of minor compiler complaints that ensued. This is sufficient for testing purposes. In the wake of a bunch of Datum-conversion cleanups by Peter Eisentraut, this now compiles cleanly with gcc on a 32-bit platform. (I'd only tested the previous version with clang, which it turns out is less picky than gcc about width-changing coercions.) There is a good deal of now-dead code that I'll remove in separate follow-up patches. A catversion bump is required because this affects initial catalog contents (on 32-bit machines) in two ways: pg_type.typbyval changes for some built-in types, and Const nodes in stored views/rules will now have 8 bytes not 4 for pass-by-value types. Author: Tom Lane Reviewed-by: Peter Eisentraut Discussion: https://postgr.es/m/1749799.1752797397@sss.pgh.pa.us --- src/backend/storage/ipc/ipc.c | 2 +- src/backend/utils/resowner/resowner.c | 7 ++----- src/include/catalog/catversion.h | 2 +- src/include/nodes/nodes.h | 8 ++++++-- src/include/pg_config_manual.h | 13 ++++--------- src/include/postgres.h | 21 +++++++++++++-------- 6 files changed, 27 insertions(+), 26 deletions(-) diff --git a/src/backend/storage/ipc/ipc.c b/src/backend/storage/ipc/ipc.c index 567739b5be93a..2704e80b3a7d9 100644 --- a/src/backend/storage/ipc/ipc.c +++ b/src/backend/storage/ipc/ipc.c @@ -399,7 +399,7 @@ cancel_before_shmem_exit(pg_on_exit_callback function, Datum arg) before_shmem_exit_list[before_shmem_exit_index - 1].arg == arg) --before_shmem_exit_index; else - elog(ERROR, "before_shmem_exit callback (%p,0x%" PRIxPTR ") is not the latest entry", + elog(ERROR, "before_shmem_exit callback (%p,0x%" PRIx64 ") is not the latest entry", function, arg); } diff --git a/src/backend/utils/resowner/resowner.c b/src/backend/utils/resowner/resowner.c index d39f3e1b655cd..fca84ded6ddc3 100644 --- a/src/backend/utils/resowner/resowner.c +++ b/src/backend/utils/resowner/resowner.c @@ -231,11 +231,8 @@ hash_resource_elem(Datum value, const ResourceOwnerDesc *kind) * 'kind' into the hash. Just add it with hash_combine(), it perturbs the * result enough for our purposes. */ -#if SIZEOF_DATUM == 8 - return hash_combine64(murmurhash64((uint64) value), (uint64) kind); -#else - return hash_combine(murmurhash32((uint32) value), (uint32) kind); -#endif + return hash_combine64(murmurhash64((uint64) value), + (uint64) (uintptr_t) kind); } /* diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index c4fe8b991af46..0ca415b426148 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -57,6 +57,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 202508051 +#define CATALOG_VERSION_NO 202508131 #endif diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index fbe333d88fac9..b2dc380b57b10 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -188,6 +188,8 @@ castNodeImpl(NodeTag type, void *ptr) * ---------------------------------------------------------------- */ +#ifndef FRONTEND + /* * nodes/{outfuncs.c,print.c} */ @@ -198,7 +200,7 @@ extern void outNode(struct StringInfoData *str, const void *obj); extern void outToken(struct StringInfoData *str, const char *s); extern void outBitmapset(struct StringInfoData *str, const struct Bitmapset *bms); -extern void outDatum(struct StringInfoData *str, uintptr_t value, +extern void outDatum(struct StringInfoData *str, Datum value, int typlen, bool typbyval); extern char *nodeToString(const void *obj); extern char *nodeToStringWithLocations(const void *obj); @@ -212,7 +214,7 @@ extern void *stringToNode(const char *str); extern void *stringToNodeWithLocations(const char *str); #endif extern struct Bitmapset *readBitmapset(void); -extern uintptr_t readDatum(bool typbyval); +extern Datum readDatum(bool typbyval); extern bool *readBoolCols(int numCols); extern int *readIntCols(int numCols); extern Oid *readOidCols(int numCols); @@ -235,6 +237,8 @@ extern void *copyObjectImpl(const void *from); */ extern bool equal(const void *a, const void *b); +#endif /* !FRONTEND */ + /* * Typedef for parse location. This is just an int, but this way diff --git a/src/include/pg_config_manual.h b/src/include/pg_config_manual.h index 125d3eb5fff5e..7e1aa4223326e 100644 --- a/src/include/pg_config_manual.h +++ b/src/include/pg_config_manual.h @@ -74,17 +74,12 @@ #define PARTITION_MAX_KEYS 32 /* - * Decide whether built-in 8-byte types, including float8, int8, and - * timestamp, are passed by value. This is on by default if sizeof(Datum) >= - * 8 (that is, on 64-bit platforms). If sizeof(Datum) < 8 (32-bit platforms), - * this must be off. We keep this here as an option so that it is easy to - * test the pass-by-reference code paths on 64-bit platforms. - * - * Changing this requires an initdb. + * This symbol is now vestigial: built-in 8-byte types, including float8, + * int8, and timestamp, are always passed by value since we require Datum + * to be wide enough to permit that. We continue to define the symbol here + * so as not to unnecessarily break extension code. */ -#if SIZEOF_VOID_P >= 8 #define USE_FLOAT8_BYVAL 1 -#endif /* diff --git a/src/include/postgres.h b/src/include/postgres.h index 8a41a6686877f..e81829bfa6fef 100644 --- a/src/include/postgres.h +++ b/src/include/postgres.h @@ -58,15 +58,22 @@ /* * A Datum contains either a value of a pass-by-value type or a pointer to a - * value of a pass-by-reference type. Therefore, we require: - * - * sizeof(Datum) == sizeof(void *) == 4 or 8 + * value of a pass-by-reference type. Therefore, we must have + * sizeof(Datum) >= sizeof(void *). No current or foreseeable Postgres + * platform has pointers wider than 8 bytes, and standardizing on Datum being + * exactly 8 bytes has advantages in reducing cross-platform differences. * * The functions below and the analogous functions for other types should be used to * convert between a Datum and the appropriate C type. */ -typedef uintptr_t Datum; +typedef uint64_t Datum; + +/* + * This symbol is now vestigial, but we continue to define it so as not to + * unnecessarily break extension code. + */ +#define SIZEOF_DATUM 8 /* * A NullableDatum is used in places where both a Datum and its nullness needs @@ -83,8 +90,6 @@ typedef struct NullableDatum /* due to alignment padding this could be used for flags for free */ } NullableDatum; -#define SIZEOF_DATUM SIZEOF_VOID_P - /* * DatumGetBool * Returns boolean value of a datum. @@ -316,7 +321,7 @@ CommandIdGetDatum(CommandId X) static inline Pointer DatumGetPointer(Datum X) { - return (Pointer) X; + return (Pointer) (uintptr_t) X; } /* @@ -326,7 +331,7 @@ DatumGetPointer(Datum X) static inline Datum PointerGetDatum(const void *X) { - return (Datum) X; + return (Datum) (uintptr_t) X; } /* From 6aebedc38497ecebda22caf61bcadf78a8331b52 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Wed, 13 Aug 2025 17:11:54 -0400 Subject: [PATCH 498/602] Grab the low-hanging fruit from forcing sizeof(Datum) to 8. Remove conditionally-compiled code for smaller Datum widths, and simplify comments that describe cases no longer of interest. I also fixed up a few more places that were not using DatumGetIntXX where they should, and made some cosmetic adjustments such as using sizeof(int64) not sizeof(Datum) in places where that fit better with the surrounding code. One thing I remembered while preparing this part is that SP-GiST stores pass-by-value prefix keys as Datums, so that the on-disk representation depends on sizeof(Datum). That's even more unfortunate than the existing commentary makes it out to be, because now there is a hazard that the change of sizeof(Datum) will break SP-GiST indexes on 32-bit machines. It appears that there are no existing SP-GiST opclasses that are actually affected; and if there are some that I didn't find, the number of installations that are using them on 32-bit machines is doubtless tiny. So I'm proceeding on the assumption that we can get away with this, but it's something to worry about. (gininsert.c looks like it has a similar problem, but it's okay because the "tuples" it's constructing are just transient data within the tuplesort step. That's pretty poorly documented though, so I added some comments.) Author: Tom Lane Reviewed-by: Peter Eisentraut Discussion: https://postgr.es/m/1749799.1752797397@sss.pgh.pa.us --- doc/src/sgml/xfunc.sgml | 3 +- src/backend/access/gin/gininsert.c | 5 +- src/backend/access/gist/gistproc.c | 10 +-- src/backend/access/nbtree/nbtcompare.c | 20 ----- src/backend/catalog/pg_type.c | 4 +- src/backend/utils/adt/mac.c | 27 +++--- src/backend/utils/adt/network.c | 54 +++--------- src/backend/utils/adt/numeric.c | 117 +++---------------------- src/backend/utils/adt/timestamp.c | 21 ----- src/backend/utils/adt/uuid.c | 6 +- src/backend/utils/adt/varlena.c | 27 +++--- src/backend/utils/sort/tuplesort.c | 8 -- src/include/access/gin_tuple.h | 4 +- src/include/access/spgist_private.h | 14 +-- src/include/access/tupmacs.h | 15 +--- src/include/port/pg_bswap.h | 17 +--- src/include/utils/sortsupport.h | 4 - 17 files changed, 72 insertions(+), 284 deletions(-) diff --git a/doc/src/sgml/xfunc.sgml b/doc/src/sgml/xfunc.sgml index 30219f432d970..f116d0648e559 100644 --- a/doc/src/sgml/xfunc.sgml +++ b/doc/src/sgml/xfunc.sgml @@ -2051,8 +2051,7 @@ PG_MODULE_MAGIC_EXT( - By-value types can only be 1, 2, or 4 bytes in length - (also 8 bytes, if sizeof(Datum) is 8 on your machine). + By-value types can only be 1, 2, 4, or 8 bytes in length. You should be careful to define your types such that they will be the same size (in bytes) on all architectures. For example, the long type is dangerous because it is 4 bytes on some diff --git a/src/backend/access/gin/gininsert.c b/src/backend/access/gin/gininsert.c index 47b1898a06463..e9d4b27427e59 100644 --- a/src/backend/access/gin/gininsert.c +++ b/src/backend/access/gin/gininsert.c @@ -2189,7 +2189,10 @@ typedef struct * we simply copy the whole Datum, so that we don't have to care about stuff * like endianess etc. We could make it a little bit smaller, but it's not * worth it - it's a tiny fraction of the data, and we need to MAXALIGN the - * start of the TID list anyway. So we wouldn't save anything. + * start of the TID list anyway. So we wouldn't save anything. (This would + * not be a good idea for the permanent in-index data, since we'd prefer + * that that not depend on sizeof(Datum). But this is just a transient + * representation to use while sorting the data.) * * The TID list is serialized as compressed - it's highly compressible, and * we already have ginCompressPostingList for this purpose. The list may be diff --git a/src/backend/access/gist/gistproc.c b/src/backend/access/gist/gistproc.c index 392163cb22900..f2ec6cbe2e524 100644 --- a/src/backend/access/gist/gistproc.c +++ b/src/backend/access/gist/gistproc.c @@ -1707,8 +1707,8 @@ gist_bbox_zorder_cmp(Datum a, Datum b, SortSupport ssup) * Abbreviated version of Z-order comparison * * The abbreviated format is a Z-order value computed from the two 32-bit - * floats. If SIZEOF_DATUM == 8, the 64-bit Z-order value fits fully in the - * abbreviated Datum, otherwise use its most significant bits. + * floats. Now that sizeof(Datum) is always 8, the 64-bit Z-order value + * always fits fully in the abbreviated Datum. */ static Datum gist_bbox_zorder_abbrev_convert(Datum original, SortSupport ssup) @@ -1718,11 +1718,7 @@ gist_bbox_zorder_abbrev_convert(Datum original, SortSupport ssup) z = point_zorder_internal(p->x, p->y); -#if SIZEOF_DATUM == 8 - return (Datum) z; -#else - return (Datum) (z >> 32); -#endif + return UInt64GetDatum(z); } /* diff --git a/src/backend/access/nbtree/nbtcompare.c b/src/backend/access/nbtree/nbtcompare.c index e1b52acd20dc2..188c27b4925f7 100644 --- a/src/backend/access/nbtree/nbtcompare.c +++ b/src/backend/access/nbtree/nbtcompare.c @@ -278,32 +278,12 @@ btint8cmp(PG_FUNCTION_ARGS) PG_RETURN_INT32(A_LESS_THAN_B); } -#if SIZEOF_DATUM < 8 -static int -btint8fastcmp(Datum x, Datum y, SortSupport ssup) -{ - int64 a = DatumGetInt64(x); - int64 b = DatumGetInt64(y); - - if (a > b) - return A_GREATER_THAN_B; - else if (a == b) - return 0; - else - return A_LESS_THAN_B; -} -#endif - Datum btint8sortsupport(PG_FUNCTION_ARGS) { SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0); -#if SIZEOF_DATUM >= 8 ssup->comparator = ssup_datum_signed_cmp; -#else - ssup->comparator = btint8fastcmp; -#endif PG_RETURN_VOID(); } diff --git a/src/backend/catalog/pg_type.c b/src/backend/catalog/pg_type.c index 1ec523ee3e59e..3cd9b69edc575 100644 --- a/src/backend/catalog/pg_type.c +++ b/src/backend/catalog/pg_type.c @@ -285,8 +285,7 @@ TypeCreate(Oid newTypeOid, errmsg("alignment \"%c\" is invalid for passed-by-value type of size %d", alignment, internalSize))); } -#if SIZEOF_DATUM == 8 - else if (internalSize == (int16) sizeof(Datum)) + else if (internalSize == (int16) sizeof(int64)) { if (alignment != TYPALIGN_DOUBLE) ereport(ERROR, @@ -294,7 +293,6 @@ TypeCreate(Oid newTypeOid, errmsg("alignment \"%c\" is invalid for passed-by-value type of size %d", alignment, internalSize))); } -#endif else ereport(ERROR, (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), diff --git a/src/backend/utils/adt/mac.c b/src/backend/utils/adt/mac.c index 3644e9735f5d0..bb38ef2f5e440 100644 --- a/src/backend/utils/adt/mac.c +++ b/src/backend/utils/adt/mac.c @@ -481,33 +481,26 @@ macaddr_abbrev_convert(Datum original, SortSupport ssup) Datum res; /* - * On a 64-bit machine, zero out the 8-byte datum and copy the 6 bytes of - * the MAC address in. There will be two bytes of zero padding on the end - * of the least significant bits. + * Zero out the 8-byte Datum and copy in the 6 bytes of the MAC address. + * There will be two bytes of zero padding on the end of the least + * significant bits. */ -#if SIZEOF_DATUM == 8 - memset(&res, 0, SIZEOF_DATUM); + StaticAssertStmt(sizeof(res) >= sizeof(macaddr), + "Datum is too small for macaddr"); + memset(&res, 0, sizeof(res)); memcpy(&res, authoritative, sizeof(macaddr)); -#else /* SIZEOF_DATUM != 8 */ - memcpy(&res, authoritative, SIZEOF_DATUM); -#endif uss->input_count += 1; /* - * Cardinality estimation. The estimate uses uint32, so on a 64-bit - * architecture, XOR the two 32-bit halves together to produce slightly - * more entropy. The two zeroed bytes won't have any practical impact on - * this operation. + * Cardinality estimation. The estimate uses uint32, so XOR the two 32-bit + * halves together to produce slightly more entropy. The two zeroed bytes + * won't have any practical impact on this operation. */ if (uss->estimating) { uint32 tmp; -#if SIZEOF_DATUM == 8 - tmp = (uint32) res ^ (uint32) ((uint64) res >> 32); -#else /* SIZEOF_DATUM != 8 */ - tmp = (uint32) res; -#endif + tmp = DatumGetUInt32(res) ^ (uint32) (DatumGetUInt64(res) >> 32); addHyperLogLog(&uss->abbr_card, DatumGetUInt32(hash_uint32(tmp))); } diff --git a/src/backend/utils/adt/network.c b/src/backend/utils/adt/network.c index 9fd211b2d4576..3cb0ab6829ae8 100644 --- a/src/backend/utils/adt/network.c +++ b/src/backend/utils/adt/network.c @@ -567,24 +567,11 @@ network_abbrev_abort(int memtupcount, SortSupport ssup) * * When generating abbreviated keys for SortSupport, we pack as much as we can * into a datum while ensuring that when comparing those keys as integers, - * these rules will be respected. Exact contents depend on IP family and datum - * size. + * these rules will be respected. Exact contents depend on IP family: * * IPv4 * ---- * - * 4 byte datums: - * - * Start with 1 bit for the IP family (IPv4 or IPv6; this bit is present in - * every case below) followed by all but 1 of the netmasked bits. - * - * +----------+---------------------+ - * | 1 bit IP | 31 bits network | (1 bit network - * | family | (truncated) | omitted) - * +----------+---------------------+ - * - * 8 byte datums: - * * We have space to store all netmasked bits, followed by the netmask size, * followed by 25 bits of the subnet (25 bits is usually more than enough in * practice). cidr datums always have all-zero subnet bits. @@ -597,15 +584,6 @@ network_abbrev_abort(int memtupcount, SortSupport ssup) * IPv6 * ---- * - * 4 byte datums: - * - * +----------+---------------------+ - * | 1 bit IP | 31 bits network | (up to 97 bits - * | family | (truncated) | network omitted) - * +----------+---------------------+ - * - * 8 byte datums: - * * +----------+---------------------------------+ * | 1 bit IP | 63 bits network | (up to 65 bits * | family | (truncated) | network omitted) @@ -628,8 +606,7 @@ network_abbrev_convert(Datum original, SortSupport ssup) /* * Get an unsigned integer representation of the IP address by taking its * first 4 or 8 bytes. Always take all 4 bytes of an IPv4 address. Take - * the first 8 bytes of an IPv6 address with an 8 byte datum and 4 bytes - * otherwise. + * the first 8 bytes of an IPv6 address. * * We're consuming an array of unsigned char, so byteswap on little endian * systems (an inet's ipaddr field stores the most significant byte @@ -659,7 +636,7 @@ network_abbrev_convert(Datum original, SortSupport ssup) ipaddr_datum = DatumBigEndianToNative(ipaddr_datum); /* Initialize result with ipfamily (most significant) bit set */ - res = ((Datum) 1) << (SIZEOF_DATUM * BITS_PER_BYTE - 1); + res = ((Datum) 1) << (sizeof(Datum) * BITS_PER_BYTE - 1); } /* @@ -668,8 +645,7 @@ network_abbrev_convert(Datum original, SortSupport ssup) * while low order bits go in "subnet" component when there is space for * one. This is often accomplished by generating a temp datum subnet * bitmask, which we may reuse later when generating the subnet bits - * themselves. (Note that subnet bits are only used with IPv4 datums on - * platforms where datum is 8 bytes.) + * themselves. * * The number of bits in subnet is used to generate a datum subnet * bitmask. For example, with a /24 IPv4 datum there are 8 subnet bits @@ -681,14 +657,14 @@ network_abbrev_convert(Datum original, SortSupport ssup) subnet_size = ip_maxbits(authoritative) - ip_bits(authoritative); Assert(subnet_size >= 0); /* subnet size must work with prefix ipaddr cases */ - subnet_size %= SIZEOF_DATUM * BITS_PER_BYTE; + subnet_size %= sizeof(Datum) * BITS_PER_BYTE; if (ip_bits(authoritative) == 0) { /* Fit as many ipaddr bits as possible into subnet */ subnet_bitmask = ((Datum) 0) - 1; network = 0; } - else if (ip_bits(authoritative) < SIZEOF_DATUM * BITS_PER_BYTE) + else if (ip_bits(authoritative) < sizeof(Datum) * BITS_PER_BYTE) { /* Split ipaddr bits between network and subnet */ subnet_bitmask = (((Datum) 1) << subnet_size) - 1; @@ -701,12 +677,11 @@ network_abbrev_convert(Datum original, SortSupport ssup) network = ipaddr_datum; } -#if SIZEOF_DATUM == 8 if (ip_family(authoritative) == PGSQL_AF_INET) { /* - * IPv4 with 8 byte datums: keep all 32 netmasked bits, netmask size, - * and most significant 25 subnet bits + * IPv4: keep all 32 netmasked bits, netmask size, and most + * significant 25 subnet bits */ Datum netmask_size = (Datum) ip_bits(authoritative); Datum subnet; @@ -750,12 +725,11 @@ network_abbrev_convert(Datum original, SortSupport ssup) res |= network | netmask_size | subnet; } else -#endif { /* - * 4 byte datums, or IPv6 with 8 byte datums: Use as many of the - * netmasked bits as will fit in final abbreviated key. Avoid - * clobbering the ipfamily bit that was set earlier. + * IPv6: Use as many of the netmasked bits as will fit in final + * abbreviated key. Avoid clobbering the ipfamily bit that was set + * earlier. */ res |= network >> 1; } @@ -767,11 +741,7 @@ network_abbrev_convert(Datum original, SortSupport ssup) { uint32 tmp; -#if SIZEOF_DATUM == 8 - tmp = (uint32) res ^ (uint32) ((uint64) res >> 32); -#else /* SIZEOF_DATUM != 8 */ - tmp = (uint32) res; -#endif + tmp = DatumGetUInt32(res) ^ (uint32) (DatumGetUInt64(res) >> 32); addHyperLogLog(&uss->abbr_card, DatumGetUInt32(hash_uint32(tmp))); } diff --git a/src/backend/utils/adt/numeric.c b/src/backend/utils/adt/numeric.c index 122f2efab8b37..a79aea5f7824c 100644 --- a/src/backend/utils/adt/numeric.c +++ b/src/backend/utils/adt/numeric.c @@ -392,30 +392,21 @@ typedef struct NumericSumAccum /* * We define our own macros for packing and unpacking abbreviated-key - * representations for numeric values in order to avoid depending on - * USE_FLOAT8_BYVAL. The type of abbreviation we use is based only on - * the size of a datum, not the argument-passing convention for float8. + * representations, just to have a notational indication that that's + * what we're doing. Now that sizeof(Datum) is always 8, we can rely + * on fitting an int64 into Datum. * - * The range of abbreviations for finite values is from +PG_INT64/32_MAX - * to -PG_INT64/32_MAX. NaN has the abbreviation PG_INT64/32_MIN, and we + * The range of abbreviations for finite values is from +PG_INT64_MAX + * to -PG_INT64_MAX. NaN has the abbreviation PG_INT64_MIN, and we * define the sort ordering to make that work out properly (see further * comments below). PINF and NINF share the abbreviations of the largest * and smallest finite abbreviation classes. */ -#define NUMERIC_ABBREV_BITS (SIZEOF_DATUM * BITS_PER_BYTE) -#if SIZEOF_DATUM == 8 -#define NumericAbbrevGetDatum(X) ((Datum) (X)) -#define DatumGetNumericAbbrev(X) ((int64) (X)) +#define NumericAbbrevGetDatum(X) Int64GetDatum(X) +#define DatumGetNumericAbbrev(X) DatumGetInt64(X) #define NUMERIC_ABBREV_NAN NumericAbbrevGetDatum(PG_INT64_MIN) #define NUMERIC_ABBREV_PINF NumericAbbrevGetDatum(-PG_INT64_MAX) #define NUMERIC_ABBREV_NINF NumericAbbrevGetDatum(PG_INT64_MAX) -#else -#define NumericAbbrevGetDatum(X) ((Datum) (X)) -#define DatumGetNumericAbbrev(X) ((int32) (X)) -#define NUMERIC_ABBREV_NAN NumericAbbrevGetDatum(PG_INT32_MIN) -#define NUMERIC_ABBREV_PINF NumericAbbrevGetDatum(-PG_INT32_MAX) -#define NUMERIC_ABBREV_NINF NumericAbbrevGetDatum(PG_INT32_MAX) -#endif /* ---------- @@ -2096,12 +2087,11 @@ compute_bucket(Numeric operand, Numeric bound1, Numeric bound2, * while this could be worked on itself, the abbreviation strategy gives more * speedup in many common cases. * - * Two different representations are used for the abbreviated form, one in - * int32 and one in int64, whichever fits into a by-value Datum. In both cases - * the representation is negated relative to the original value, because we use - * the largest negative value for NaN, which sorts higher than other values. We - * convert the absolute value of the numeric to a 31-bit or 63-bit positive - * value, and then negate it if the original number was positive. + * The abbreviated format is an int64. The representation is negated relative + * to the original value, because we use the largest negative value for NaN, + * which sorts higher than other values. We convert the absolute value of the + * numeric to a 63-bit positive value, and then negate it if the original + * number was positive. * * We abort the abbreviation process if the abbreviation cardinality is below * 0.01% of the row count (1 per 10k non-null rows). The actual break-even @@ -2328,7 +2318,7 @@ numeric_cmp_abbrev(Datum x, Datum y, SortSupport ssup) } /* - * Abbreviate a NumericVar according to the available bit size. + * Abbreviate a NumericVar into the 64-bit sortsupport size. * * The 31-bit value is constructed as: * @@ -2372,9 +2362,6 @@ numeric_cmp_abbrev(Datum x, Datum y, SortSupport ssup) * with all bits zero. This allows simple comparisons to work on the composite * value. */ - -#if NUMERIC_ABBREV_BITS == 64 - static Datum numeric_abbrev_convert_var(const NumericVar *var, NumericSortSupport *nss) { @@ -2426,84 +2413,6 @@ numeric_abbrev_convert_var(const NumericVar *var, NumericSortSupport *nss) return NumericAbbrevGetDatum(result); } -#endif /* NUMERIC_ABBREV_BITS == 64 */ - -#if NUMERIC_ABBREV_BITS == 32 - -static Datum -numeric_abbrev_convert_var(const NumericVar *var, NumericSortSupport *nss) -{ - int ndigits = var->ndigits; - int weight = var->weight; - int32 result; - - if (ndigits == 0 || weight < -11) - { - result = 0; - } - else if (weight > 20) - { - result = PG_INT32_MAX; - } - else - { - NumericDigit nxt1 = (ndigits > 1) ? var->digits[1] : 0; - - weight = (weight + 11) * 4; - - result = var->digits[0]; - - /* - * "result" now has 1 to 4 nonzero decimal digits. We pack in more - * digits to make 7 in total (largest we can fit in 24 bits) - */ - - if (result > 999) - { - /* already have 4 digits, add 3 more */ - result = (result * 1000) + (nxt1 / 10); - weight += 3; - } - else if (result > 99) - { - /* already have 3 digits, add 4 more */ - result = (result * 10000) + nxt1; - weight += 2; - } - else if (result > 9) - { - NumericDigit nxt2 = (ndigits > 2) ? var->digits[2] : 0; - - /* already have 2 digits, add 5 more */ - result = (result * 100000) + (nxt1 * 10) + (nxt2 / 1000); - weight += 1; - } - else - { - NumericDigit nxt2 = (ndigits > 2) ? var->digits[2] : 0; - - /* already have 1 digit, add 6 more */ - result = (result * 1000000) + (nxt1 * 100) + (nxt2 / 100); - } - - result = result | (weight << 24); - } - - /* the abbrev is negated relative to the original */ - if (var->sign == NUMERIC_POS) - result = -result; - - if (nss->estimating) - { - uint32 tmp = (uint32) result; - - addHyperLogLog(&nss->abbr_card, DatumGetUInt32(hash_uint32(tmp))); - } - - return NumericAbbrevGetDatum(result); -} - -#endif /* NUMERIC_ABBREV_BITS == 32 */ /* * Ordinary (non-sortsupport) comparisons follow. diff --git a/src/backend/utils/adt/timestamp.c b/src/backend/utils/adt/timestamp.c index e640b48205b40..3e5f9dc1458e1 100644 --- a/src/backend/utils/adt/timestamp.c +++ b/src/backend/utils/adt/timestamp.c @@ -2275,33 +2275,12 @@ timestamp_cmp(PG_FUNCTION_ARGS) PG_RETURN_INT32(timestamp_cmp_internal(dt1, dt2)); } -#if SIZEOF_DATUM < 8 -/* note: this is used for timestamptz also */ -static int -timestamp_fastcmp(Datum x, Datum y, SortSupport ssup) -{ - Timestamp a = DatumGetTimestamp(x); - Timestamp b = DatumGetTimestamp(y); - - return timestamp_cmp_internal(a, b); -} -#endif - Datum timestamp_sortsupport(PG_FUNCTION_ARGS) { SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0); -#if SIZEOF_DATUM >= 8 - - /* - * If this build has pass-by-value timestamps, then we can use a standard - * comparator function. - */ ssup->comparator = ssup_datum_signed_cmp; -#else - ssup->comparator = timestamp_fastcmp; -#endif PG_RETURN_VOID(); } diff --git a/src/backend/utils/adt/uuid.c b/src/backend/utils/adt/uuid.c index bce7309c1833a..7413239f7af16 100644 --- a/src/backend/utils/adt/uuid.c +++ b/src/backend/utils/adt/uuid.c @@ -398,11 +398,7 @@ uuid_abbrev_convert(Datum original, SortSupport ssup) { uint32 tmp; -#if SIZEOF_DATUM == 8 - tmp = (uint32) res ^ (uint32) ((uint64) res >> 32); -#else /* SIZEOF_DATUM != 8 */ - tmp = (uint32) res; -#endif + tmp = DatumGetUInt32(res) ^ (uint32) (DatumGetUInt64(res) >> 32); addHyperLogLog(&uss->abbr_card, DatumGetUInt32(hash_uint32(tmp))); } diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index 11b442a5941c9..2c398cd9e5cb1 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -1671,14 +1671,13 @@ varstr_sortsupport(SortSupport ssup, Oid typid, Oid collid) * * Even apart from the risk of broken locales, it's possible that * there are platforms where the use of abbreviated keys should be - * disabled at compile time. Having only 4 byte datums could make - * worst-case performance drastically more likely, for example. - * Moreover, macOS's strxfrm() implementation is known to not - * effectively concentrate a significant amount of entropy from the - * original string in earlier transformed blobs. It's possible that - * other supported platforms are similarly encumbered. So, if we ever - * get past disabling this categorically, we may still want or need to - * disable it for particular platforms. + * disabled at compile time. For example, macOS's strxfrm() + * implementation is known to not effectively concentrate a + * significant amount of entropy from the original string in earlier + * transformed blobs. It's possible that other supported platforms + * are similarly encumbered. So, if we ever get past disabling this + * categorically, we may still want or need to disable it for + * particular platforms. */ if (!pg_strxfrm_enabled(locale)) abbreviate = false; @@ -2132,18 +2131,12 @@ varstr_abbrev_convert(Datum original, SortSupport ssup) addHyperLogLog(&sss->full_card, hash); /* Hash abbreviated key */ -#if SIZEOF_DATUM == 8 { - uint32 lohalf, - hihalf; + uint32 tmp; - lohalf = (uint32) res; - hihalf = (uint32) (res >> 32); - hash = DatumGetUInt32(hash_uint32(lohalf ^ hihalf)); + tmp = DatumGetUInt32(res) ^ (uint32) (DatumGetUInt64(res) >> 32); + hash = DatumGetUInt32(hash_uint32(tmp)); } -#else /* SIZEOF_DATUM != 8 */ - hash = DatumGetUInt32(hash_uint32((uint32) res)); -#endif addHyperLogLog(&sss->abbr_card, hash); diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c index 65ab83fff8b26..5d4411dc33fee 100644 --- a/src/backend/utils/sort/tuplesort.c +++ b/src/backend/utils/sort/tuplesort.c @@ -512,7 +512,6 @@ qsort_tuple_unsigned_compare(SortTuple *a, SortTuple *b, Tuplesortstate *state) return state->base.comparetup_tiebreak(a, b, state); } -#if SIZEOF_DATUM >= 8 /* Used if first key's comparator is ssup_datum_signed_cmp */ static pg_attribute_always_inline int qsort_tuple_signed_compare(SortTuple *a, SortTuple *b, Tuplesortstate *state) @@ -535,7 +534,6 @@ qsort_tuple_signed_compare(SortTuple *a, SortTuple *b, Tuplesortstate *state) return state->base.comparetup_tiebreak(a, b, state); } -#endif /* Used if first key's comparator is ssup_datum_int32_cmp */ static pg_attribute_always_inline int @@ -578,7 +576,6 @@ qsort_tuple_int32_compare(SortTuple *a, SortTuple *b, Tuplesortstate *state) #define ST_DEFINE #include "lib/sort_template.h" -#if SIZEOF_DATUM >= 8 #define ST_SORT qsort_tuple_signed #define ST_ELEMENT_TYPE SortTuple #define ST_COMPARE(a, b, state) qsort_tuple_signed_compare(a, b, state) @@ -587,7 +584,6 @@ qsort_tuple_int32_compare(SortTuple *a, SortTuple *b, Tuplesortstate *state) #define ST_SCOPE static #define ST_DEFINE #include "lib/sort_template.h" -#endif #define ST_SORT qsort_tuple_int32 #define ST_ELEMENT_TYPE SortTuple @@ -2692,7 +2688,6 @@ tuplesort_sort_memtuples(Tuplesortstate *state) state); return; } -#if SIZEOF_DATUM >= 8 else if (state->base.sortKeys[0].comparator == ssup_datum_signed_cmp) { qsort_tuple_signed(state->memtuples, @@ -2700,7 +2695,6 @@ tuplesort_sort_memtuples(Tuplesortstate *state) state); return; } -#endif else if (state->base.sortKeys[0].comparator == ssup_datum_int32_cmp) { qsort_tuple_int32(state->memtuples, @@ -3146,7 +3140,6 @@ ssup_datum_unsigned_cmp(Datum x, Datum y, SortSupport ssup) return 0; } -#if SIZEOF_DATUM >= 8 int ssup_datum_signed_cmp(Datum x, Datum y, SortSupport ssup) { @@ -3160,7 +3153,6 @@ ssup_datum_signed_cmp(Datum x, Datum y, SortSupport ssup) else return 0; } -#endif int ssup_datum_int32_cmp(Datum x, Datum y, SortSupport ssup) diff --git a/src/include/access/gin_tuple.h b/src/include/access/gin_tuple.h index 702f7d12889d8..b4f103dec9afd 100644 --- a/src/include/access/gin_tuple.h +++ b/src/include/access/gin_tuple.h @@ -15,7 +15,9 @@ #include "utils/sortsupport.h" /* - * Data for one key in a GIN index. + * Data for one key in a GIN index. (This is not the permanent in-index + * representation, but just a convenient format to use during the tuplesort + * stage of building a new GIN index.) */ typedef struct GinTuple { diff --git a/src/include/access/spgist_private.h b/src/include/access/spgist_private.h index cb43a278f4667..56ac64f0597eb 100644 --- a/src/include/access/spgist_private.h +++ b/src/include/access/spgist_private.h @@ -285,10 +285,12 @@ typedef struct SpGistCache * If the prefix datum is of a pass-by-value type, it is stored in its * Datum representation, that is its on-disk representation is of length * sizeof(Datum). This is a fairly unfortunate choice, because in no other - * place does Postgres use Datum as an on-disk representation; it creates - * an unnecessary incompatibility between 32-bit and 64-bit builds. But the - * compatibility loss is mostly theoretical since MAXIMUM_ALIGNOF typically - * differs between such builds, too. Anyway we're stuck with it now. + * place does Postgres use Datum as an on-disk representation. Formerly it + * meant an unnecessary incompatibility between 32-bit and 64-bit builds, and + * as of v19 it instead creates a hazard for binary upgrades on 32-bit builds. + * Fortunately, that hazard seems mostly theoretical for lack of affected + * opclasses. Going forward, we will be using a fixed size of Datum so that + * there's no longer any pressing reason to change this. */ typedef struct SpGistInnerTupleData { @@ -377,8 +379,8 @@ typedef SpGistNodeTupleData *SpGistNodeTuple; * * size must be a multiple of MAXALIGN; also, it must be at least SGDTSIZE * so that the tuple can be converted to REDIRECT status later. (This - * restriction only adds bytes for a NULL leaf datum stored on a 32-bit - * machine; otherwise alignment restrictions force it anyway.) + * restriction only adds bytes for a NULL leaf datum; otherwise alignment + * restrictions force it anyway.) */ typedef struct SpGistLeafTupleData { diff --git a/src/include/access/tupmacs.h b/src/include/access/tupmacs.h index 6240ec930e7a9..84b3e7fd896e0 100644 --- a/src/include/access/tupmacs.h +++ b/src/include/access/tupmacs.h @@ -39,9 +39,6 @@ att_isnull(int ATT, const bits8 *BITS) * return the correct number of bytes fetched from the data area and extended * to Datum form. * - * On machines where Datum is 8 bytes, we support fetching 8-byte byval - * attributes; otherwise, only 1, 2, and 4-byte values are supported. - * * Note that T must already be properly aligned for this to work correctly. */ #define fetchatt(A,T) fetch_att(T, (A)->attbyval, (A)->attlen) @@ -62,10 +59,8 @@ fetch_att(const void *T, bool attbyval, int attlen) return Int16GetDatum(*((const int16 *) T)); case sizeof(int32): return Int32GetDatum(*((const int32 *) T)); -#if SIZEOF_DATUM == 8 - case sizeof(Datum): - return *((const Datum *) T); -#endif + case sizeof(int64): + return Int64GetDatum(*((const int64 *) T)); default: elog(ERROR, "unsupported byval length: %d", attlen); return 0; @@ -221,11 +216,9 @@ store_att_byval(void *T, Datum newdatum, int attlen) case sizeof(int32): *(int32 *) T = DatumGetInt32(newdatum); break; -#if SIZEOF_DATUM == 8 - case sizeof(Datum): - *(Datum *) T = newdatum; + case sizeof(int64): + *(int64 *) T = DatumGetInt64(newdatum); break; -#endif default: elog(ERROR, "unsupported byval length: %d", attlen); } diff --git a/src/include/port/pg_bswap.h b/src/include/port/pg_bswap.h index 33648433c6377..b15f6f6ac3819 100644 --- a/src/include/port/pg_bswap.h +++ b/src/include/port/pg_bswap.h @@ -130,8 +130,7 @@ pg_bswap64(uint64 x) /* * Rearrange the bytes of a Datum from big-endian order into the native byte - * order. On big-endian machines, this does nothing at all. Note that the C - * type Datum is an unsigned integer type on all platforms. + * order. On big-endian machines, this does nothing at all. * * One possible application of the DatumBigEndianToNative() macro is to make * bitwise comparisons cheaper. A simple 3-way comparison of Datums @@ -139,23 +138,11 @@ pg_bswap64(uint64 x) * the same result as a memcmp() of the corresponding original Datums, but can * be much cheaper. It's generally safe to do this on big-endian systems * without any special transformation occurring first. - * - * If SIZEOF_DATUM is not defined, then postgres.h wasn't included and these - * macros probably shouldn't be used, so we define nothing. Note that - * SIZEOF_DATUM == 8 would evaluate as 0 == 8 in that case, potentially - * leading to the wrong implementation being selected and confusing errors, so - * defining nothing is safest. */ -#ifdef SIZEOF_DATUM #ifdef WORDS_BIGENDIAN #define DatumBigEndianToNative(x) (x) #else /* !WORDS_BIGENDIAN */ -#if SIZEOF_DATUM == 8 -#define DatumBigEndianToNative(x) pg_bswap64(x) -#else /* SIZEOF_DATUM != 8 */ -#define DatumBigEndianToNative(x) pg_bswap32(x) -#endif /* SIZEOF_DATUM == 8 */ +#define DatumBigEndianToNative(x) UInt64GetDatum(pg_bswap64(DatumGetUInt64(x))) #endif /* WORDS_BIGENDIAN */ -#endif /* SIZEOF_DATUM */ #endif /* PG_BSWAP_H */ diff --git a/src/include/utils/sortsupport.h b/src/include/utils/sortsupport.h index b7abaf7802de0..c64527e2ee907 100644 --- a/src/include/utils/sortsupport.h +++ b/src/include/utils/sortsupport.h @@ -262,7 +262,6 @@ ApplyUnsignedSortComparator(Datum datum1, bool isNull1, return compare; } -#if SIZEOF_DATUM >= 8 static inline int ApplySignedSortComparator(Datum datum1, bool isNull1, Datum datum2, bool isNull2, @@ -296,7 +295,6 @@ ApplySignedSortComparator(Datum datum1, bool isNull1, return compare; } -#endif static inline int ApplyInt32SortComparator(Datum datum1, bool isNull1, @@ -376,9 +374,7 @@ ApplySortAbbrevFullComparator(Datum datum1, bool isNull1, * are eligible for faster sorting. */ extern int ssup_datum_unsigned_cmp(Datum x, Datum y, SortSupport ssup); -#if SIZEOF_DATUM >= 8 extern int ssup_datum_signed_cmp(Datum x, Datum y, SortSupport ssup); -#endif extern int ssup_datum_int32_cmp(Datum x, Datum y, SortSupport ssup); /* Other functions in utils/sort/sortsupport.c */ From ee54046601de2d14ca9107ba04c50178d9b52fe6 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Wed, 13 Aug 2025 17:18:13 -0400 Subject: [PATCH 499/602] Grab the low-hanging fruit from forcing USE_FLOAT8_BYVAL to true. Remove conditionally-compiled code for the other case. Replace uses of FLOAT8PASSBYVAL with constant "true", mainly because it was quite confusing in cases where the type we were dealing with wasn't float8. I left the associated pg_control and Pg_magic_struct fields in place. Perhaps we should get rid of them, but it would save little, so it doesn't seem worth thinking hard about the compatibility implications. I just labeled them "vestigial" in places where that seemed helpful. Author: Tom Lane Reviewed-by: Peter Eisentraut Discussion: https://postgr.es/m/1749799.1752797397@sss.pgh.pa.us --- contrib/btree_gist/btree_time.c | 51 ++++++------- contrib/btree_gist/btree_ts.c | 39 ++++------ src/backend/access/common/tupdesc.c | 2 +- src/backend/access/index/indexam.c | 5 -- src/backend/access/transam/xlog.c | 20 +---- src/backend/catalog/genbki.pl | 3 +- src/backend/optimizer/plan/planagg.c | 2 +- src/backend/optimizer/plan/planner.c | 4 +- src/backend/parser/parse_node.c | 2 +- src/backend/rewrite/rewriteSearchCycle.c | 2 +- src/backend/utils/adt/arrayfuncs.c | 6 +- src/backend/utils/adt/int8.c | 75 ++++--------------- src/backend/utils/adt/numeric.c | 74 ++++-------------- src/backend/utils/adt/orderedsetaggs.c | 2 +- src/backend/utils/adt/rangetypes_typanalyze.c | 2 +- src/backend/utils/fmgr/fmgr.c | 35 --------- src/bin/initdb/initdb.c | 3 - src/bin/pg_resetwal/pg_resetwal.c | 2 +- src/include/c.h | 8 +- src/include/catalog/pg_type.dat | 16 ++-- src/include/fmgr.h | 2 +- src/include/postgres.h | 58 +------------- 22 files changed, 100 insertions(+), 313 deletions(-) diff --git a/contrib/btree_gist/btree_time.c b/contrib/btree_gist/btree_time.c index 1dba95057ba9f..fd6a2e05bc7b1 100644 --- a/contrib/btree_gist/btree_time.c +++ b/contrib/btree_gist/btree_time.c @@ -31,13 +31,6 @@ PG_FUNCTION_INFO_V1(gbt_time_sortsupport); PG_FUNCTION_INFO_V1(gbt_timetz_sortsupport); -#ifdef USE_FLOAT8_BYVAL -#define TimeADTGetDatumFast(X) TimeADTGetDatum(X) -#else -#define TimeADTGetDatumFast(X) PointerGetDatum(&(X)) -#endif - - static bool gbt_timegt(const void *a, const void *b, FmgrInfo *flinfo) { @@ -45,8 +38,8 @@ gbt_timegt(const void *a, const void *b, FmgrInfo *flinfo) const TimeADT *bb = (const TimeADT *) b; return DatumGetBool(DirectFunctionCall2(time_gt, - TimeADTGetDatumFast(*aa), - TimeADTGetDatumFast(*bb))); + TimeADTGetDatum(*aa), + TimeADTGetDatum(*bb))); } static bool @@ -56,8 +49,8 @@ gbt_timege(const void *a, const void *b, FmgrInfo *flinfo) const TimeADT *bb = (const TimeADT *) b; return DatumGetBool(DirectFunctionCall2(time_ge, - TimeADTGetDatumFast(*aa), - TimeADTGetDatumFast(*bb))); + TimeADTGetDatum(*aa), + TimeADTGetDatum(*bb))); } static bool @@ -67,8 +60,8 @@ gbt_timeeq(const void *a, const void *b, FmgrInfo *flinfo) const TimeADT *bb = (const TimeADT *) b; return DatumGetBool(DirectFunctionCall2(time_eq, - TimeADTGetDatumFast(*aa), - TimeADTGetDatumFast(*bb))); + TimeADTGetDatum(*aa), + TimeADTGetDatum(*bb))); } static bool @@ -78,8 +71,8 @@ gbt_timele(const void *a, const void *b, FmgrInfo *flinfo) const TimeADT *bb = (const TimeADT *) b; return DatumGetBool(DirectFunctionCall2(time_le, - TimeADTGetDatumFast(*aa), - TimeADTGetDatumFast(*bb))); + TimeADTGetDatum(*aa), + TimeADTGetDatum(*bb))); } static bool @@ -89,8 +82,8 @@ gbt_timelt(const void *a, const void *b, FmgrInfo *flinfo) const TimeADT *bb = (const TimeADT *) b; return DatumGetBool(DirectFunctionCall2(time_lt, - TimeADTGetDatumFast(*aa), - TimeADTGetDatumFast(*bb))); + TimeADTGetDatum(*aa), + TimeADTGetDatum(*bb))); } static int @@ -100,9 +93,9 @@ gbt_timekey_cmp(const void *a, const void *b, FmgrInfo *flinfo) timeKEY *ib = (timeKEY *) (((const Nsrt *) b)->t); int res; - res = DatumGetInt32(DirectFunctionCall2(time_cmp, TimeADTGetDatumFast(ia->lower), TimeADTGetDatumFast(ib->lower))); + res = DatumGetInt32(DirectFunctionCall2(time_cmp, TimeADTGetDatum(ia->lower), TimeADTGetDatum(ib->lower))); if (res == 0) - return DatumGetInt32(DirectFunctionCall2(time_cmp, TimeADTGetDatumFast(ia->upper), TimeADTGetDatumFast(ib->upper))); + return DatumGetInt32(DirectFunctionCall2(time_cmp, TimeADTGetDatum(ia->upper), TimeADTGetDatum(ib->upper))); return res; } @@ -115,8 +108,8 @@ gbt_time_dist(const void *a, const void *b, FmgrInfo *flinfo) Interval *i; i = DatumGetIntervalP(DirectFunctionCall2(time_mi_time, - TimeADTGetDatumFast(*aa), - TimeADTGetDatumFast(*bb))); + TimeADTGetDatum(*aa), + TimeADTGetDatum(*bb))); return fabs(INTERVAL_TO_SEC(i)); } @@ -279,14 +272,14 @@ gbt_time_penalty(PG_FUNCTION_ARGS) double res2; intr = DatumGetIntervalP(DirectFunctionCall2(time_mi_time, - TimeADTGetDatumFast(newentry->upper), - TimeADTGetDatumFast(origentry->upper))); + TimeADTGetDatum(newentry->upper), + TimeADTGetDatum(origentry->upper))); res = INTERVAL_TO_SEC(intr); res = Max(res, 0); intr = DatumGetIntervalP(DirectFunctionCall2(time_mi_time, - TimeADTGetDatumFast(origentry->lower), - TimeADTGetDatumFast(newentry->lower))); + TimeADTGetDatum(origentry->lower), + TimeADTGetDatum(newentry->lower))); res2 = INTERVAL_TO_SEC(intr); res2 = Max(res2, 0); @@ -297,8 +290,8 @@ gbt_time_penalty(PG_FUNCTION_ARGS) if (res > 0) { intr = DatumGetIntervalP(DirectFunctionCall2(time_mi_time, - TimeADTGetDatumFast(origentry->upper), - TimeADTGetDatumFast(origentry->lower))); + TimeADTGetDatum(origentry->upper), + TimeADTGetDatum(origentry->lower))); *result += FLT_MIN; *result += (float) (res / (res + INTERVAL_TO_SEC(intr))); *result *= (FLT_MAX / (((GISTENTRY *) PG_GETARG_POINTER(0))->rel->rd_att->natts + 1)); @@ -334,8 +327,8 @@ gbt_timekey_ssup_cmp(Datum x, Datum y, SortSupport ssup) /* for leaf items we expect lower == upper, so only compare lower */ return DatumGetInt32(DirectFunctionCall2(time_cmp, - TimeADTGetDatumFast(arg1->lower), - TimeADTGetDatumFast(arg2->lower))); + TimeADTGetDatum(arg1->lower), + TimeADTGetDatum(arg2->lower))); } Datum diff --git a/contrib/btree_gist/btree_ts.c b/contrib/btree_gist/btree_ts.c index eb899c4d21363..1e8f83f0f0a49 100644 --- a/contrib/btree_gist/btree_ts.c +++ b/contrib/btree_gist/btree_ts.c @@ -33,13 +33,6 @@ PG_FUNCTION_INFO_V1(gbt_ts_same); PG_FUNCTION_INFO_V1(gbt_ts_sortsupport); -#ifdef USE_FLOAT8_BYVAL -#define TimestampGetDatumFast(X) TimestampGetDatum(X) -#else -#define TimestampGetDatumFast(X) PointerGetDatum(&(X)) -#endif - - /* define for comparison */ static bool @@ -49,8 +42,8 @@ gbt_tsgt(const void *a, const void *b, FmgrInfo *flinfo) const Timestamp *bb = (const Timestamp *) b; return DatumGetBool(DirectFunctionCall2(timestamp_gt, - TimestampGetDatumFast(*aa), - TimestampGetDatumFast(*bb))); + TimestampGetDatum(*aa), + TimestampGetDatum(*bb))); } static bool @@ -60,8 +53,8 @@ gbt_tsge(const void *a, const void *b, FmgrInfo *flinfo) const Timestamp *bb = (const Timestamp *) b; return DatumGetBool(DirectFunctionCall2(timestamp_ge, - TimestampGetDatumFast(*aa), - TimestampGetDatumFast(*bb))); + TimestampGetDatum(*aa), + TimestampGetDatum(*bb))); } static bool @@ -71,8 +64,8 @@ gbt_tseq(const void *a, const void *b, FmgrInfo *flinfo) const Timestamp *bb = (const Timestamp *) b; return DatumGetBool(DirectFunctionCall2(timestamp_eq, - TimestampGetDatumFast(*aa), - TimestampGetDatumFast(*bb))); + TimestampGetDatum(*aa), + TimestampGetDatum(*bb))); } static bool @@ -82,8 +75,8 @@ gbt_tsle(const void *a, const void *b, FmgrInfo *flinfo) const Timestamp *bb = (const Timestamp *) b; return DatumGetBool(DirectFunctionCall2(timestamp_le, - TimestampGetDatumFast(*aa), - TimestampGetDatumFast(*bb))); + TimestampGetDatum(*aa), + TimestampGetDatum(*bb))); } static bool @@ -93,8 +86,8 @@ gbt_tslt(const void *a, const void *b, FmgrInfo *flinfo) const Timestamp *bb = (const Timestamp *) b; return DatumGetBool(DirectFunctionCall2(timestamp_lt, - TimestampGetDatumFast(*aa), - TimestampGetDatumFast(*bb))); + TimestampGetDatum(*aa), + TimestampGetDatum(*bb))); } static int @@ -104,9 +97,9 @@ gbt_tskey_cmp(const void *a, const void *b, FmgrInfo *flinfo) tsKEY *ib = (tsKEY *) (((const Nsrt *) b)->t); int res; - res = DatumGetInt32(DirectFunctionCall2(timestamp_cmp, TimestampGetDatumFast(ia->lower), TimestampGetDatumFast(ib->lower))); + res = DatumGetInt32(DirectFunctionCall2(timestamp_cmp, TimestampGetDatum(ia->lower), TimestampGetDatum(ib->lower))); if (res == 0) - return DatumGetInt32(DirectFunctionCall2(timestamp_cmp, TimestampGetDatumFast(ia->upper), TimestampGetDatumFast(ib->upper))); + return DatumGetInt32(DirectFunctionCall2(timestamp_cmp, TimestampGetDatum(ia->upper), TimestampGetDatum(ib->upper))); return res; } @@ -122,8 +115,8 @@ gbt_ts_dist(const void *a, const void *b, FmgrInfo *flinfo) return get_float8_infinity(); i = DatumGetIntervalP(DirectFunctionCall2(timestamp_mi, - TimestampGetDatumFast(*aa), - TimestampGetDatumFast(*bb))); + TimestampGetDatum(*aa), + TimestampGetDatum(*bb))); return fabs(INTERVAL_TO_SEC(i)); } @@ -404,8 +397,8 @@ gbt_ts_ssup_cmp(Datum x, Datum y, SortSupport ssup) /* for leaf items we expect lower == upper, so only compare lower */ return DatumGetInt32(DirectFunctionCall2(timestamp_cmp, - TimestampGetDatumFast(arg1->lower), - TimestampGetDatumFast(arg2->lower))); + TimestampGetDatum(arg1->lower), + TimestampGetDatum(arg2->lower))); } Datum diff --git a/src/backend/access/common/tupdesc.c b/src/backend/access/common/tupdesc.c index be60005ae4600..568edacb9bdae 100644 --- a/src/backend/access/common/tupdesc.c +++ b/src/backend/access/common/tupdesc.c @@ -993,7 +993,7 @@ TupleDescInitBuiltinEntry(TupleDesc desc, case INT8OID: att->attlen = 8; - att->attbyval = FLOAT8PASSBYVAL; + att->attbyval = true; att->attalign = TYPALIGN_DOUBLE; att->attstorage = TYPSTORAGE_PLAIN; att->attcompression = InvalidCompressionMethod; diff --git a/src/backend/access/index/indexam.c b/src/backend/access/index/indexam.c index 219df1971da66..1a4f36fe0a970 100644 --- a/src/backend/access/index/indexam.c +++ b/src/backend/access/index/indexam.c @@ -986,11 +986,6 @@ index_store_float8_orderby_distances(IndexScanDesc scan, Oid *orderByTypes, { if (orderByTypes[i] == FLOAT8OID) { -#ifndef USE_FLOAT8_BYVAL - /* must free any old value to avoid memory leakage */ - if (!scan->xs_orderbynulls[i]) - pfree(DatumGetPointer(scan->xs_orderbyvals[i])); -#endif if (distances && !distances[i].isnull) { scan->xs_orderbyvals[i] = Float8GetDatum(distances[i].value); diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 9a4de1616bcc9..e8909406686d0 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -4390,7 +4390,7 @@ WriteControlFile(void) ControlFile->toast_max_chunk_size = TOAST_MAX_CHUNK_SIZE; ControlFile->loblksize = LOBLKSIZE; - ControlFile->float8ByVal = FLOAT8PASSBYVAL; + ControlFile->float8ByVal = true; /* vestigial */ /* * Initialize the default 'char' signedness. @@ -4651,23 +4651,7 @@ ReadControlFile(void) "LOBLKSIZE", (int) LOBLKSIZE), errhint("It looks like you need to recompile or initdb."))); -#ifdef USE_FLOAT8_BYVAL - if (ControlFile->float8ByVal != true) - ereport(FATAL, - (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), - errmsg("database files are incompatible with server"), - errdetail("The database cluster was initialized without USE_FLOAT8_BYVAL" - " but the server was compiled with USE_FLOAT8_BYVAL."), - errhint("It looks like you need to recompile or initdb."))); -#else - if (ControlFile->float8ByVal != false) - ereport(FATAL, - (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), - errmsg("database files are incompatible with server"), - errdetail("The database cluster was initialized with USE_FLOAT8_BYVAL" - " but the server was compiled without USE_FLOAT8_BYVAL."), - errhint("It looks like you need to recompile or initdb."))); -#endif + Assert(ControlFile->float8ByVal); /* vestigial, not worth an error msg */ wal_segment_size = ControlFile->xlog_seg_size; diff --git a/src/backend/catalog/genbki.pl b/src/backend/catalog/genbki.pl index df3231fcd41c2..6c02aee726754 100644 --- a/src/backend/catalog/genbki.pl +++ b/src/backend/catalog/genbki.pl @@ -1054,8 +1054,7 @@ sub morph_row_for_schemapg } # Expand booleans from 'f'/'t' to 'false'/'true'. - # Some values might be other macros (eg FLOAT8PASSBYVAL), - # don't change. + # Some values might be other macros, if so don't change. elsif ($atttype eq 'bool') { $row->{$attname} = 'true' if $row->{$attname} eq 't'; diff --git a/src/backend/optimizer/plan/planagg.c b/src/backend/optimizer/plan/planagg.c index 64605be31781f..2ef0bb7f66365 100644 --- a/src/backend/optimizer/plan/planagg.c +++ b/src/backend/optimizer/plan/planagg.c @@ -410,7 +410,7 @@ build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo, parse->limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid, sizeof(int64), Int64GetDatum(1), false, - FLOAT8PASSBYVAL); + true); /* * Generate the best paths for this query, telling query_planner that we diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 5ba0d22befd3a..0d5a692e5fdca 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -4915,7 +4915,7 @@ create_partial_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel, limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid, sizeof(int64), Int64GetDatum(1), false, - FLOAT8PASSBYVAL); + true); /* * Apply a LimitPath onto the partial path to restrict the @@ -5118,7 +5118,7 @@ create_final_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel, limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid, sizeof(int64), Int64GetDatum(1), false, - FLOAT8PASSBYVAL); + true); /* * If the query already has a LIMIT clause, then we could diff --git a/src/backend/parser/parse_node.c b/src/backend/parser/parse_node.c index d6feb16aef375..203b7a321782d 100644 --- a/src/backend/parser/parse_node.c +++ b/src/backend/parser/parse_node.c @@ -408,7 +408,7 @@ make_const(ParseState *pstate, A_Const *aconst) typeid = INT8OID; typelen = sizeof(int64); - typebyval = FLOAT8PASSBYVAL; /* int8 and float8 alike */ + typebyval = true; } } else diff --git a/src/backend/rewrite/rewriteSearchCycle.c b/src/backend/rewrite/rewriteSearchCycle.c index 19b89dee0d096..9f95d4dc1b0e8 100644 --- a/src/backend/rewrite/rewriteSearchCycle.c +++ b/src/backend/rewrite/rewriteSearchCycle.c @@ -320,7 +320,7 @@ rewriteSearchAndCycle(CommonTableExpr *cte) if (cte->search_clause->search_breadth_first) { search_col_rowexpr->args = lcons(makeConst(INT8OID, -1, InvalidOid, sizeof(int64), - Int64GetDatum(0), false, FLOAT8PASSBYVAL), + Int64GetDatum(0), false, true), search_col_rowexpr->args); search_col_rowexpr->colnames = lcons(makeString("*DEPTH*"), search_col_rowexpr->colnames); texpr = (Expr *) search_col_rowexpr; diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c index c8f53c6fbe788..c833e7df1fd9e 100644 --- a/src/backend/utils/adt/arrayfuncs.c +++ b/src/backend/utils/adt/arrayfuncs.c @@ -3406,7 +3406,7 @@ construct_array_builtin(Datum *elems, int nelems, Oid elmtype) case FLOAT8OID: elmlen = sizeof(float8); - elmbyval = FLOAT8PASSBYVAL; + elmbyval = true; elmalign = TYPALIGN_DOUBLE; break; @@ -3424,7 +3424,7 @@ construct_array_builtin(Datum *elems, int nelems, Oid elmtype) case INT8OID: elmlen = sizeof(int64); - elmbyval = FLOAT8PASSBYVAL; + elmbyval = true; elmalign = TYPALIGN_DOUBLE; break; @@ -3718,7 +3718,7 @@ deconstruct_array_builtin(ArrayType *array, case FLOAT8OID: elmlen = sizeof(float8); - elmbyval = FLOAT8PASSBYVAL; + elmbyval = true; elmalign = TYPALIGN_DOUBLE; break; diff --git a/src/backend/utils/adt/int8.c b/src/backend/utils/adt/int8.c index 9dd5889f34c62..bdea490202a64 100644 --- a/src/backend/utils/adt/int8.c +++ b/src/backend/utils/adt/int8.c @@ -718,76 +718,29 @@ int8lcm(PG_FUNCTION_ARGS) Datum int8inc(PG_FUNCTION_ARGS) { - /* - * When int8 is pass-by-reference, we provide this special case to avoid - * palloc overhead for COUNT(): when called as an aggregate, we know that - * the argument is modifiable local storage, so just update it in-place. - * (If int8 is pass-by-value, then of course this is useless as well as - * incorrect, so just ifdef it out.) - */ -#ifndef USE_FLOAT8_BYVAL /* controls int8 too */ - if (AggCheckCallContext(fcinfo, NULL)) - { - int64 *arg = (int64 *) PG_GETARG_POINTER(0); - - if (unlikely(pg_add_s64_overflow(*arg, 1, arg))) - ereport(ERROR, - (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), - errmsg("bigint out of range"))); - - PG_RETURN_POINTER(arg); - } - else -#endif - { - /* Not called as an aggregate, so just do it the dumb way */ - int64 arg = PG_GETARG_INT64(0); - int64 result; + int64 arg = PG_GETARG_INT64(0); + int64 result; - if (unlikely(pg_add_s64_overflow(arg, 1, &result))) - ereport(ERROR, - (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), - errmsg("bigint out of range"))); + if (unlikely(pg_add_s64_overflow(arg, 1, &result))) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("bigint out of range"))); - PG_RETURN_INT64(result); - } + PG_RETURN_INT64(result); } Datum int8dec(PG_FUNCTION_ARGS) { - /* - * When int8 is pass-by-reference, we provide this special case to avoid - * palloc overhead for COUNT(): when called as an aggregate, we know that - * the argument is modifiable local storage, so just update it in-place. - * (If int8 is pass-by-value, then of course this is useless as well as - * incorrect, so just ifdef it out.) - */ -#ifndef USE_FLOAT8_BYVAL /* controls int8 too */ - if (AggCheckCallContext(fcinfo, NULL)) - { - int64 *arg = (int64 *) PG_GETARG_POINTER(0); - - if (unlikely(pg_sub_s64_overflow(*arg, 1, arg))) - ereport(ERROR, - (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), - errmsg("bigint out of range"))); - PG_RETURN_POINTER(arg); - } - else -#endif - { - /* Not called as an aggregate, so just do it the dumb way */ - int64 arg = PG_GETARG_INT64(0); - int64 result; + int64 arg = PG_GETARG_INT64(0); + int64 result; - if (unlikely(pg_sub_s64_overflow(arg, 1, &result))) - ereport(ERROR, - (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), - errmsg("bigint out of range"))); + if (unlikely(pg_sub_s64_overflow(arg, 1, &result))) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("bigint out of range"))); - PG_RETURN_INT64(result); - } + PG_RETURN_INT64(result); } diff --git a/src/backend/utils/adt/numeric.c b/src/backend/utils/adt/numeric.c index a79aea5f7824c..b6287f5d97305 100644 --- a/src/backend/utils/adt/numeric.c +++ b/src/backend/utils/adt/numeric.c @@ -6362,6 +6362,7 @@ numeric_poly_stddev_pop(PG_FUNCTION_ARGS) Datum int2_sum(PG_FUNCTION_ARGS) { + int64 oldsum; int64 newval; if (PG_ARGISNULL(0)) @@ -6374,43 +6375,22 @@ int2_sum(PG_FUNCTION_ARGS) PG_RETURN_INT64(newval); } - /* - * If we're invoked as an aggregate, we can cheat and modify our first - * parameter in-place to avoid palloc overhead. If not, we need to return - * the new value of the transition variable. (If int8 is pass-by-value, - * then of course this is useless as well as incorrect, so just ifdef it - * out.) - */ -#ifndef USE_FLOAT8_BYVAL /* controls int8 too */ - if (AggCheckCallContext(fcinfo, NULL)) - { - int64 *oldsum = (int64 *) PG_GETARG_POINTER(0); + oldsum = PG_GETARG_INT64(0); - /* Leave the running sum unchanged in the new input is null */ - if (!PG_ARGISNULL(1)) - *oldsum = *oldsum + (int64) PG_GETARG_INT16(1); - - PG_RETURN_POINTER(oldsum); - } - else -#endif - { - int64 oldsum = PG_GETARG_INT64(0); - - /* Leave sum unchanged if new input is null. */ - if (PG_ARGISNULL(1)) - PG_RETURN_INT64(oldsum); + /* Leave sum unchanged if new input is null. */ + if (PG_ARGISNULL(1)) + PG_RETURN_INT64(oldsum); - /* OK to do the addition. */ - newval = oldsum + (int64) PG_GETARG_INT16(1); + /* OK to do the addition. */ + newval = oldsum + (int64) PG_GETARG_INT16(1); - PG_RETURN_INT64(newval); - } + PG_RETURN_INT64(newval); } Datum int4_sum(PG_FUNCTION_ARGS) { + int64 oldsum; int64 newval; if (PG_ARGISNULL(0)) @@ -6423,38 +6403,16 @@ int4_sum(PG_FUNCTION_ARGS) PG_RETURN_INT64(newval); } - /* - * If we're invoked as an aggregate, we can cheat and modify our first - * parameter in-place to avoid palloc overhead. If not, we need to return - * the new value of the transition variable. (If int8 is pass-by-value, - * then of course this is useless as well as incorrect, so just ifdef it - * out.) - */ -#ifndef USE_FLOAT8_BYVAL /* controls int8 too */ - if (AggCheckCallContext(fcinfo, NULL)) - { - int64 *oldsum = (int64 *) PG_GETARG_POINTER(0); + oldsum = PG_GETARG_INT64(0); - /* Leave the running sum unchanged in the new input is null */ - if (!PG_ARGISNULL(1)) - *oldsum = *oldsum + (int64) PG_GETARG_INT32(1); - - PG_RETURN_POINTER(oldsum); - } - else -#endif - { - int64 oldsum = PG_GETARG_INT64(0); - - /* Leave sum unchanged if new input is null. */ - if (PG_ARGISNULL(1)) - PG_RETURN_INT64(oldsum); + /* Leave sum unchanged if new input is null. */ + if (PG_ARGISNULL(1)) + PG_RETURN_INT64(oldsum); - /* OK to do the addition. */ - newval = oldsum + (int64) PG_GETARG_INT32(1); + /* OK to do the addition. */ + newval = oldsum + (int64) PG_GETARG_INT32(1); - PG_RETURN_INT64(newval); - } + PG_RETURN_INT64(newval); } /* diff --git a/src/backend/utils/adt/orderedsetaggs.c b/src/backend/utils/adt/orderedsetaggs.c index 9457d23971581..c41b191be6217 100644 --- a/src/backend/utils/adt/orderedsetaggs.c +++ b/src/backend/utils/adt/orderedsetaggs.c @@ -1007,7 +1007,7 @@ percentile_cont_float8_multi_final(PG_FUNCTION_ARGS) FLOAT8OID, /* hard-wired info on type float8 */ sizeof(float8), - FLOAT8PASSBYVAL, + true, TYPALIGN_DOUBLE, float8_lerp); } diff --git a/src/backend/utils/adt/rangetypes_typanalyze.c b/src/backend/utils/adt/rangetypes_typanalyze.c index a18196d8a34a5..36e885af2dd17 100644 --- a/src/backend/utils/adt/rangetypes_typanalyze.c +++ b/src/backend/utils/adt/rangetypes_typanalyze.c @@ -397,7 +397,7 @@ compute_range_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, stats->numvalues[slot_idx] = num_hist; stats->statypid[slot_idx] = FLOAT8OID; stats->statyplen[slot_idx] = sizeof(float8); - stats->statypbyval[slot_idx] = FLOAT8PASSBYVAL; + stats->statypbyval[slot_idx] = true; stats->statypalign[slot_idx] = 'd'; /* Store the fraction of empty ranges */ diff --git a/src/backend/utils/fmgr/fmgr.c b/src/backend/utils/fmgr/fmgr.c index 782291d999832..5543440a33e6c 100644 --- a/src/backend/utils/fmgr/fmgr.c +++ b/src/backend/utils/fmgr/fmgr.c @@ -1788,41 +1788,6 @@ OidSendFunctionCall(Oid functionId, Datum val) } -/*------------------------------------------------------------------------- - * Support routines for standard maybe-pass-by-reference datatypes - * - * int8 and float8 can be passed by value if Datum is wide enough. - * (For backwards-compatibility reasons, we allow pass-by-ref to be chosen - * at compile time even if pass-by-val is possible.) - * - * Note: there is only one switch controlling the pass-by-value option for - * both int8 and float8; this is to avoid making things unduly complicated - * for the timestamp types, which might have either representation. - *------------------------------------------------------------------------- - */ - -#ifndef USE_FLOAT8_BYVAL /* controls int8 too */ - -Datum -Int64GetDatum(int64 X) -{ - int64 *retval = (int64 *) palloc(sizeof(int64)); - - *retval = X; - return PointerGetDatum(retval); -} - -Datum -Float8GetDatum(float8 X) -{ - float8 *retval = (float8 *) palloc(sizeof(float8)); - - *retval = X; - return PointerGetDatum(retval); -} -#endif /* USE_FLOAT8_BYVAL */ - - /*------------------------------------------------------------------------- * Support routines for toastable datatypes *------------------------------------------------------------------------- diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c index 62bbd08d9f658..92fe2f531f7a8 100644 --- a/src/bin/initdb/initdb.c +++ b/src/bin/initdb/initdb.c @@ -1580,9 +1580,6 @@ bootstrap_template1(void) bki_lines = replace_token(bki_lines, "ALIGNOF_POINTER", (sizeof(Pointer) == 4) ? "i" : "d"); - bki_lines = replace_token(bki_lines, "FLOAT8PASSBYVAL", - FLOAT8PASSBYVAL ? "true" : "false"); - bki_lines = replace_token(bki_lines, "POSTGRES", escape_quotes_bki(username)); diff --git a/src/bin/pg_resetwal/pg_resetwal.c b/src/bin/pg_resetwal/pg_resetwal.c index e876f35f38ed4..7a4e4eb95706e 100644 --- a/src/bin/pg_resetwal/pg_resetwal.c +++ b/src/bin/pg_resetwal/pg_resetwal.c @@ -719,7 +719,7 @@ GuessControlValues(void) ControlFile.indexMaxKeys = INDEX_MAX_KEYS; ControlFile.toast_max_chunk_size = TOAST_MAX_CHUNK_SIZE; ControlFile.loblksize = LOBLKSIZE; - ControlFile.float8ByVal = FLOAT8PASSBYVAL; + ControlFile.float8ByVal = true; /* vestigial */ /* * XXX eventually, should try to grovel through old XLOG to develop more diff --git a/src/include/c.h b/src/include/c.h index bbdaa88c63a6f..39022f8a9dd75 100644 --- a/src/include/c.h +++ b/src/include/c.h @@ -609,11 +609,11 @@ typedef signed int Offset; typedef float float4; typedef double float8; -#ifdef USE_FLOAT8_BYVAL +/* + * float8, int8, and related datatypes are now always pass-by-value. + * We keep this symbol to avoid breaking extension code that may use it. + */ #define FLOAT8PASSBYVAL true -#else -#define FLOAT8PASSBYVAL false -#endif /* * Oid, RegProcedure, TransactionId, SubTransactionId, MultiXactId, diff --git a/src/include/catalog/pg_type.dat b/src/include/catalog/pg_type.dat index 29e4ffffc9806..cb730aeac8646 100644 --- a/src/include/catalog/pg_type.dat +++ b/src/include/catalog/pg_type.dat @@ -54,7 +54,7 @@ typcollation => 'C' }, { oid => '20', array_type_oid => '1016', descr => '~18 digit integer, 8-byte storage', - typname => 'int8', typlen => '8', typbyval => 'FLOAT8PASSBYVAL', + typname => 'int8', typlen => '8', typbyval => 't', typcategory => 'N', typinput => 'int8in', typoutput => 'int8out', typreceive => 'int8recv', typsend => 'int8send', typalign => 'd' }, { oid => '21', array_type_oid => '1005', @@ -172,7 +172,7 @@ typoutput => 'pg_ddl_command_out', typreceive => 'pg_ddl_command_recv', typsend => 'pg_ddl_command_send', typalign => 'ALIGNOF_POINTER' }, { oid => '5069', array_type_oid => '271', descr => 'full transaction id', - typname => 'xid8', typlen => '8', typbyval => 'FLOAT8PASSBYVAL', + typname => 'xid8', typlen => '8', typbyval => 't', typcategory => 'U', typinput => 'xid8in', typoutput => 'xid8out', typreceive => 'xid8recv', typsend => 'xid8send', typalign => 'd' }, @@ -222,7 +222,7 @@ typsend => 'float4send', typalign => 'i' }, { oid => '701', array_type_oid => '1022', descr => 'double-precision floating point number, 8-byte storage', - typname => 'float8', typlen => '8', typbyval => 'FLOAT8PASSBYVAL', + typname => 'float8', typlen => '8', typbyval => 't', typcategory => 'N', typispreferred => 't', typinput => 'float8in', typoutput => 'float8out', typreceive => 'float8recv', typsend => 'float8send', typalign => 'd' }, @@ -237,7 +237,7 @@ typreceive => 'circle_recv', typsend => 'circle_send', typalign => 'd' }, { oid => '790', array_type_oid => '791', descr => 'monetary amounts, $d,ddd.cc', - typname => 'money', typlen => '8', typbyval => 'FLOAT8PASSBYVAL', + typname => 'money', typlen => '8', typbyval => 't', typcategory => 'N', typinput => 'cash_in', typoutput => 'cash_out', typreceive => 'cash_recv', typsend => 'cash_send', typalign => 'd' }, @@ -290,7 +290,7 @@ typinput => 'date_in', typoutput => 'date_out', typreceive => 'date_recv', typsend => 'date_send', typalign => 'i' }, { oid => '1083', array_type_oid => '1183', descr => 'time of day', - typname => 'time', typlen => '8', typbyval => 'FLOAT8PASSBYVAL', + typname => 'time', typlen => '8', typbyval => 't', typcategory => 'D', typinput => 'time_in', typoutput => 'time_out', typreceive => 'time_recv', typsend => 'time_send', typmodin => 'timetypmodin', typmodout => 'timetypmodout', typalign => 'd' }, @@ -298,14 +298,14 @@ # OIDS 1100 - 1199 { oid => '1114', array_type_oid => '1115', descr => 'date and time', - typname => 'timestamp', typlen => '8', typbyval => 'FLOAT8PASSBYVAL', + typname => 'timestamp', typlen => '8', typbyval => 't', typcategory => 'D', typinput => 'timestamp_in', typoutput => 'timestamp_out', typreceive => 'timestamp_recv', typsend => 'timestamp_send', typmodin => 'timestamptypmodin', typmodout => 'timestamptypmodout', typalign => 'd' }, { oid => '1184', array_type_oid => '1185', descr => 'date and time with time zone', - typname => 'timestamptz', typlen => '8', typbyval => 'FLOAT8PASSBYVAL', + typname => 'timestamptz', typlen => '8', typbyval => 't', typcategory => 'D', typispreferred => 't', typinput => 'timestamptz_in', typoutput => 'timestamptz_out', typreceive => 'timestamptz_recv', typsend => 'timestamptz_send', typmodin => 'timestamptztypmodin', @@ -413,7 +413,7 @@ # pg_lsn { oid => '3220', array_type_oid => '3221', descr => 'PostgreSQL LSN', - typname => 'pg_lsn', typlen => '8', typbyval => 'FLOAT8PASSBYVAL', + typname => 'pg_lsn', typlen => '8', typbyval => 't', typcategory => 'U', typinput => 'pg_lsn_in', typoutput => 'pg_lsn_out', typreceive => 'pg_lsn_recv', typsend => 'pg_lsn_send', typalign => 'd' }, diff --git a/src/include/fmgr.h b/src/include/fmgr.h index 0fe7b4ebc7719..c7236e4297242 100644 --- a/src/include/fmgr.h +++ b/src/include/fmgr.h @@ -469,7 +469,7 @@ typedef struct int funcmaxargs; /* FUNC_MAX_ARGS */ int indexmaxkeys; /* INDEX_MAX_KEYS */ int namedatalen; /* NAMEDATALEN */ - int float8byval; /* FLOAT8PASSBYVAL */ + int float8byval; /* FLOAT8PASSBYVAL (now vestigial) */ char abi_extra[32]; /* see pg_config_manual.h */ } Pg_abi_values; diff --git a/src/include/postgres.h b/src/include/postgres.h index e81829bfa6fef..357cbd6fd961e 100644 --- a/src/include/postgres.h +++ b/src/include/postgres.h @@ -388,68 +388,41 @@ NameGetDatum(const NameData *X) /* * DatumGetInt64 * Returns 64-bit integer value of a datum. - * - * Note: this function hides whether int64 is pass by value or by reference. */ static inline int64 DatumGetInt64(Datum X) { -#ifdef USE_FLOAT8_BYVAL return (int64) X; -#else - return *((int64 *) DatumGetPointer(X)); -#endif } /* * Int64GetDatum * Returns datum representation for a 64-bit integer. - * - * Note: if int64 is pass by reference, this function returns a reference - * to palloc'd space. */ -#ifdef USE_FLOAT8_BYVAL static inline Datum Int64GetDatum(int64 X) { return (Datum) X; } -#else -extern Datum Int64GetDatum(int64 X); -#endif - /* * DatumGetUInt64 * Returns 64-bit unsigned integer value of a datum. - * - * Note: this function hides whether int64 is pass by value or by reference. */ static inline uint64 DatumGetUInt64(Datum X) { -#ifdef USE_FLOAT8_BYVAL return (uint64) X; -#else - return *((uint64 *) DatumGetPointer(X)); -#endif } /* * UInt64GetDatum * Returns datum representation for a 64-bit unsigned integer. - * - * Note: if int64 is pass by reference, this function returns a reference - * to palloc'd space. */ static inline Datum UInt64GetDatum(uint64 X) { -#ifdef USE_FLOAT8_BYVAL return (Datum) X; -#else - return Int64GetDatum((int64) X); -#endif } /* @@ -497,13 +470,10 @@ Float4GetDatum(float4 X) /* * DatumGetFloat8 * Returns 8-byte floating point value of a datum. - * - * Note: this function hides whether float8 is pass by value or by reference. */ static inline float8 DatumGetFloat8(Datum X) { -#ifdef USE_FLOAT8_BYVAL union { int64 value; @@ -512,19 +482,12 @@ DatumGetFloat8(Datum X) myunion.value = DatumGetInt64(X); return myunion.retval; -#else - return *((float8 *) DatumGetPointer(X)); -#endif } /* * Float8GetDatum * Returns datum representation for an 8-byte floating point number. - * - * Note: if float8 is pass by reference, this function returns a reference - * to palloc'd space. */ -#ifdef USE_FLOAT8_BYVAL static inline Datum Float8GetDatum(float8 X) { @@ -537,35 +500,22 @@ Float8GetDatum(float8 X) myunion.value = X; return Int64GetDatum(myunion.retval); } -#else -extern Datum Float8GetDatum(float8 X); -#endif - /* * Int64GetDatumFast * Float8GetDatumFast * - * These macros are intended to allow writing code that does not depend on + * These macros were intended to allow writing code that does not depend on * whether int64 and float8 are pass-by-reference types, while not - * sacrificing performance when they are. The argument must be a variable - * that will exist and have the same value for as long as the Datum is needed. - * In the pass-by-ref case, the address of the variable is taken to use as - * the Datum. In the pass-by-val case, these are the same as the non-Fast - * functions, except for asserting that the variable is of the correct type. + * sacrificing performance when they are. They are no longer different + * from the regular functions, though we keep the assertions to protect + * code that might get back-patched into older branches. */ -#ifdef USE_FLOAT8_BYVAL #define Int64GetDatumFast(X) \ (AssertVariableIsOfTypeMacro(X, int64), Int64GetDatum(X)) #define Float8GetDatumFast(X) \ (AssertVariableIsOfTypeMacro(X, double), Float8GetDatum(X)) -#else -#define Int64GetDatumFast(X) \ - (AssertVariableIsOfTypeMacro(X, int64), PointerGetDatum(&(X))) -#define Float8GetDatumFast(X) \ - (AssertVariableIsOfTypeMacro(X, double), PointerGetDatum(&(X))) -#endif /* ---------------------------------------------------------------- From 12f3639ee70254dc5d83f170c2f34cfeb444980e Mon Sep 17 00:00:00 2001 From: Fujii Masao Date: Thu, 14 Aug 2025 11:12:03 +0900 Subject: [PATCH 500/602] Fix incorrect LSN format in comment. The comment previously used %X/08X, which is wrong. Updated it to the standardized format %X/%08X. Author: Japin Li Discussion: https://postgr.es/m/ME0P300MB0445A37908EFCCD15E6D749DB62BA@ME0P300MB0445.AUSP300.PROD.OUTLOOK.COM --- src/include/access/xlogdefs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/include/access/xlogdefs.h b/src/include/access/xlogdefs.h index 514f03df0b69b..2397fb2411530 100644 --- a/src/include/access/xlogdefs.h +++ b/src/include/access/xlogdefs.h @@ -38,7 +38,7 @@ typedef uint64 XLogRecPtr; /* * Handy macro for printing XLogRecPtr in conventional format, e.g., * - * printf("%X/08X", LSN_FORMAT_ARGS(lsn)); + * printf("%X/%08X", LSN_FORMAT_ARGS(lsn)); * * To avoid breaking translatable messages, we're directly applying the * LSN format instead of using a macro. From e9a31c0cc604b015363d0aa5b018e8cea9d1d9a7 Mon Sep 17 00:00:00 2001 From: Fujii Masao Date: Thu, 14 Aug 2025 12:33:14 +0900 Subject: [PATCH 501/602] Revert logical snapshot filename format change in SnapBuildSnapshotExists(). Commit 2633dae2e48 standardized LSN formatting but mistakenly changed the logical snapshot filename format in SnapBuildSnapshotExists() from "%X-%X.snap" to "%08X-%08X.snap". Other code still used the original "%X-%X.snap" format, causing the replication slot synchronization worker to fail to find existing snapshot files and produce excessive log messages. This commit restores the original "%X-%X.snap" format in SnapBuildSnapshotExists() to resolve the issue. Author: Shveta Malik Discussion: https://postgr.es/m/CAHGQGwHuHPB-ucAk_Tq3uSs4Fdziu1Jp_AA_RD3m5Ycky7m48w@mail.gmail.com --- src/backend/replication/logical/snapbuild.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/backend/replication/logical/snapbuild.c b/src/backend/replication/logical/snapbuild.c index 8532bfd27e53f..98ddee2092905 100644 --- a/src/backend/replication/logical/snapbuild.c +++ b/src/backend/replication/logical/snapbuild.c @@ -2061,7 +2061,7 @@ SnapBuildSnapshotExists(XLogRecPtr lsn) int ret; struct stat stat_buf; - sprintf(path, "%s/%08X-%08X.snap", + sprintf(path, "%s/%X-%X.snap", PG_LOGICAL_SNAPSHOTS_DIR, LSN_FORMAT_ARGS(lsn)); From 6304256e79c70f6446e840023fbf1cbee47fb08e Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Thu, 14 Aug 2025 16:21:50 +0900 Subject: [PATCH 502/602] Fix compilation warning with SerializeClientConnectionInfo() This function uses an argument named "maxsize" that is only used in assertions, being set once outside the assertion area. Recent gcc versions with -Wunused-but-set-parameter complain about a warning when building without assertions enabled, because of that. In order to fix this issue, PG_USED_FOR_ASSERTS_ONLY is added to the function argument of SerializeClientConnectionInfo(), which is the first time we are doing so in the tree. The CI is fine with the change, but let's see what the buildfarm has to say on the matter. Reviewed-by: Andres Freund Reviewed-by: Jacob Champion Discussion: https://postgr.es/m/pevajesswhxafjkivoq3yvwxga77tbncghlf3gq5fvchsvfuda@6uivg25sb3nx Backpatch-through: 16 --- src/backend/utils/init/miscinit.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/backend/utils/init/miscinit.c b/src/backend/utils/init/miscinit.c index 65d8cbfaed585..545d1e90fbd41 100644 --- a/src/backend/utils/init/miscinit.c +++ b/src/backend/utils/init/miscinit.c @@ -1099,7 +1099,8 @@ EstimateClientConnectionInfoSpace(void) * Serialize MyClientConnectionInfo for use by parallel workers. */ void -SerializeClientConnectionInfo(Size maxsize, char *start_address) +SerializeClientConnectionInfo(Size maxsize PG_USED_FOR_ASSERTS_ONLY, + char *start_address) { SerializedClientConnectionInfo serialized = {0}; From 4ec6e22b4358e8c8fbfa7cfd6d3314ae41663247 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Thu, 14 Aug 2025 13:31:18 +0300 Subject: [PATCH 503/602] Fix LSN format in debug message Commit 2633dae2e48 standardized all existing messages to use `%X/%08X` for LSNs, but this one crept back in after the commit. --- src/backend/replication/logical/worker.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c index 0fdc5de57ba89..59b6ae7719ab1 100644 --- a/src/backend/replication/logical/worker.c +++ b/src/backend/replication/logical/worker.c @@ -4587,7 +4587,7 @@ wait_for_local_flush(RetainDeadTuplesData *rdt_data) MyLogicalRepWorker->oldest_nonremovable_xid = rdt_data->candidate_xid; SpinLockRelease(&MyLogicalRepWorker->relmutex); - elog(DEBUG2, "confirmed flush up to remote lsn %X/%X: new oldest_nonremovable_xid %u", + elog(DEBUG2, "confirmed flush up to remote lsn %X/%08X: new oldest_nonremovable_xid %u", LSN_FORMAT_ARGS(rdt_data->remote_lsn), rdt_data->candidate_xid); From ed0736172170bdae800b28e3555241b82854f09f Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Thu, 14 Aug 2025 11:39:19 -0400 Subject: [PATCH 504/602] Don't leak memory during failure exit from SelectConfigFiles(). Make sure the memory allocated by make_absolute_path() is freed when SelectConfigFiles() fails. Since all the callers will exit immediately in that case, there's no practical gain here, but silencing Valgrind leak complaints seems useful. In any case, it was inconsistent that only one of the failure exits did this. Author: Aleksander Alekseev Reviewed-by: Tom Lane Discussion: https://postgr.es/m/CAJ7c6TMByXE8dc7zDvDWTQjk6o-XXAdRg_RAg5CBaUOgFPV3LQ%40mail.gmail.com --- src/backend/utils/misc/guc.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index e404c345e6eab..46fdefebe353e 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -1803,7 +1803,7 @@ SelectConfigFiles(const char *userDoption, const char *progname) configdir); if (errno == ENOENT) write_stderr("Run initdb or pg_basebackup to initialize a PostgreSQL data directory.\n"); - return false; + goto fail; } /* @@ -1830,7 +1830,7 @@ SelectConfigFiles(const char *userDoption, const char *progname) "You must specify the --config-file or -D invocation " "option or set the PGDATA environment variable.\n", progname); - return false; + goto fail; } /* @@ -1851,8 +1851,7 @@ SelectConfigFiles(const char *userDoption, const char *progname) { write_stderr("%s: could not access the server configuration file \"%s\": %m\n", progname, ConfigFileName); - free(configdir); - return false; + goto fail; } /* @@ -1882,7 +1881,7 @@ SelectConfigFiles(const char *userDoption, const char *progname) "or by the -D invocation option, or by the " "PGDATA environment variable.\n", progname, ConfigFileName); - return false; + goto fail; } /* @@ -1934,7 +1933,7 @@ SelectConfigFiles(const char *userDoption, const char *progname) "or by the -D invocation option, or by the " "PGDATA environment variable.\n", progname, ConfigFileName); - return false; + goto fail; } SetConfigOption("hba_file", fname, PGC_POSTMASTER, PGC_S_OVERRIDE); @@ -1965,7 +1964,7 @@ SelectConfigFiles(const char *userDoption, const char *progname) "or by the -D invocation option, or by the " "PGDATA environment variable.\n", progname, ConfigFileName); - return false; + goto fail; } SetConfigOption("ident_file", fname, PGC_POSTMASTER, PGC_S_OVERRIDE); @@ -1977,6 +1976,11 @@ SelectConfigFiles(const char *userDoption, const char *progname) free(configdir); return true; + +fail: + free(configdir); + + return false; } /* From d0e7e04ede165abc95ca16bd9fa93284cc4dac6d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Herrera?= Date: Thu, 14 Aug 2025 17:48:46 +0200 Subject: [PATCH 505/602] Avoid including tableam.h and xlogreader.h in nbtree.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Doing that seems rather random and unnecessary. This commit removes those and fixes fallout, which is pretty minimal. We do need to add a forward declaration of struct TM_IndexDeleteOp (whose full definition appears in tableam.h) so that _bt_delitems_delete_check()'s declaration can use it. Author: Álvaro Herrera Reviewed-by: Bertrand Drouvot Discussion: https://postgr.es/m/202508051109.lzk3lcuzsaxo@alvherre.pgsql --- contrib/btree_gist/btree_bool.c | 1 + contrib/btree_gist/btree_cash.c | 1 + contrib/btree_gist/btree_date.c | 1 + contrib/btree_gist/btree_enum.c | 1 + contrib/btree_gist/btree_float4.c | 1 + contrib/btree_gist/btree_float8.c | 1 + contrib/btree_gist/btree_inet.c | 1 + contrib/btree_gist/btree_int2.c | 1 + contrib/btree_gist/btree_int4.c | 1 + contrib/btree_gist/btree_int8.c | 1 + contrib/btree_gist/btree_interval.c | 1 + contrib/btree_gist/btree_macaddr.c | 1 + contrib/btree_gist/btree_macaddr8.c | 1 + contrib/btree_gist/btree_oid.c | 1 + contrib/btree_gist/btree_time.c | 1 + contrib/btree_gist/btree_ts.c | 1 + contrib/btree_gist/btree_utils_var.c | 1 + contrib/btree_gist/btree_uuid.c | 1 + src/backend/access/nbtree/nbtdedup.c | 1 + src/backend/access/nbtree/nbtinsert.c | 1 + src/backend/access/nbtree/nbtpreprocesskeys.c | 2 ++ src/backend/access/nbtree/nbtsort.c | 1 + src/backend/access/nbtree/nbtsplitloc.c | 1 + src/backend/access/nbtree/nbtutils.c | 3 +++ src/backend/utils/sort/tuplesortvariants.c | 1 + src/include/access/nbtree.h | 6 +++--- 26 files changed, 31 insertions(+), 3 deletions(-) diff --git a/contrib/btree_gist/btree_bool.c b/contrib/btree_gist/btree_bool.c index 1127597bb6017..344f059c78fde 100644 --- a/contrib/btree_gist/btree_bool.c +++ b/contrib/btree_gist/btree_bool.c @@ -5,6 +5,7 @@ #include "btree_gist.h" #include "btree_utils_num.h" +#include "utils/rel.h" #include "utils/sortsupport.h" typedef struct boolkey diff --git a/contrib/btree_gist/btree_cash.c b/contrib/btree_gist/btree_cash.c index 01c8d5a5f4074..282d5c5731fef 100644 --- a/contrib/btree_gist/btree_cash.c +++ b/contrib/btree_gist/btree_cash.c @@ -7,6 +7,7 @@ #include "btree_utils_num.h" #include "common/int.h" #include "utils/cash.h" +#include "utils/rel.h" #include "utils/sortsupport.h" typedef struct diff --git a/contrib/btree_gist/btree_date.c b/contrib/btree_gist/btree_date.c index c008dc61ba5f5..1f1a3f32b56a9 100644 --- a/contrib/btree_gist/btree_date.c +++ b/contrib/btree_gist/btree_date.c @@ -7,6 +7,7 @@ #include "btree_utils_num.h" #include "utils/fmgrprotos.h" #include "utils/date.h" +#include "utils/rel.h" #include "utils/sortsupport.h" typedef struct diff --git a/contrib/btree_gist/btree_enum.c b/contrib/btree_gist/btree_enum.c index c54cf3c7bae87..8f1ffff46965f 100644 --- a/contrib/btree_gist/btree_enum.c +++ b/contrib/btree_gist/btree_enum.c @@ -8,6 +8,7 @@ #include "fmgr.h" #include "utils/fmgrprotos.h" #include "utils/fmgroids.h" +#include "utils/rel.h" #include "utils/sortsupport.h" /* enums are really Oids, so we just use the same structure */ diff --git a/contrib/btree_gist/btree_float4.c b/contrib/btree_gist/btree_float4.c index bec026a923a18..d9c859835dacc 100644 --- a/contrib/btree_gist/btree_float4.c +++ b/contrib/btree_gist/btree_float4.c @@ -6,6 +6,7 @@ #include "btree_gist.h" #include "btree_utils_num.h" #include "utils/float.h" +#include "utils/rel.h" #include "utils/sortsupport.h" typedef struct float4key diff --git a/contrib/btree_gist/btree_float8.c b/contrib/btree_gist/btree_float8.c index 43e7cde2b6958..567beede178ad 100644 --- a/contrib/btree_gist/btree_float8.c +++ b/contrib/btree_gist/btree_float8.c @@ -6,6 +6,7 @@ #include "btree_gist.h" #include "btree_utils_num.h" #include "utils/float.h" +#include "utils/rel.h" #include "utils/sortsupport.h" typedef struct float8key diff --git a/contrib/btree_gist/btree_inet.c b/contrib/btree_gist/btree_inet.c index 8b23853bafbb7..52bf3e2446e95 100644 --- a/contrib/btree_gist/btree_inet.c +++ b/contrib/btree_gist/btree_inet.c @@ -7,6 +7,7 @@ #include "btree_utils_num.h" #include "catalog/pg_type.h" #include "utils/builtins.h" +#include "utils/rel.h" #include "utils/sortsupport.h" typedef struct inetkey diff --git a/contrib/btree_gist/btree_int2.c b/contrib/btree_gist/btree_int2.c index 33eccdedd7049..faf456997bbf1 100644 --- a/contrib/btree_gist/btree_int2.c +++ b/contrib/btree_gist/btree_int2.c @@ -6,6 +6,7 @@ #include "btree_gist.h" #include "btree_utils_num.h" #include "common/int.h" +#include "utils/rel.h" #include "utils/sortsupport.h" typedef struct int16key diff --git a/contrib/btree_gist/btree_int4.c b/contrib/btree_gist/btree_int4.c index a82cee9a58a8c..0bdb9e58c5601 100644 --- a/contrib/btree_gist/btree_int4.c +++ b/contrib/btree_gist/btree_int4.c @@ -5,6 +5,7 @@ #include "btree_gist.h" #include "btree_utils_num.h" #include "common/int.h" +#include "utils/rel.h" #include "utils/sortsupport.h" typedef struct int32key diff --git a/contrib/btree_gist/btree_int8.c b/contrib/btree_gist/btree_int8.c index f0c56e017269a..a9a7b56927847 100644 --- a/contrib/btree_gist/btree_int8.c +++ b/contrib/btree_gist/btree_int8.c @@ -6,6 +6,7 @@ #include "btree_gist.h" #include "btree_utils_num.h" #include "common/int.h" +#include "utils/rel.h" #include "utils/sortsupport.h" typedef struct int64key diff --git a/contrib/btree_gist/btree_interval.c b/contrib/btree_gist/btree_interval.c index b5e365c6e09b4..19eefc60cde3f 100644 --- a/contrib/btree_gist/btree_interval.c +++ b/contrib/btree_gist/btree_interval.c @@ -6,6 +6,7 @@ #include "btree_gist.h" #include "btree_utils_num.h" #include "utils/fmgrprotos.h" +#include "utils/rel.h" #include "utils/sortsupport.h" #include "utils/timestamp.h" diff --git a/contrib/btree_gist/btree_macaddr.c b/contrib/btree_gist/btree_macaddr.c index 3b2f26719d5dc..c444a709853a7 100644 --- a/contrib/btree_gist/btree_macaddr.c +++ b/contrib/btree_gist/btree_macaddr.c @@ -7,6 +7,7 @@ #include "btree_utils_num.h" #include "utils/fmgrprotos.h" #include "utils/inet.h" +#include "utils/rel.h" #include "utils/sortsupport.h" typedef struct diff --git a/contrib/btree_gist/btree_macaddr8.c b/contrib/btree_gist/btree_macaddr8.c index f2b104617e680..6d9837d90a340 100644 --- a/contrib/btree_gist/btree_macaddr8.c +++ b/contrib/btree_gist/btree_macaddr8.c @@ -7,6 +7,7 @@ #include "btree_utils_num.h" #include "utils/fmgrprotos.h" #include "utils/inet.h" +#include "utils/rel.h" #include "utils/sortsupport.h" typedef struct diff --git a/contrib/btree_gist/btree_oid.c b/contrib/btree_gist/btree_oid.c index ffe0d7983e40f..b8f2f661076c6 100644 --- a/contrib/btree_gist/btree_oid.c +++ b/contrib/btree_gist/btree_oid.c @@ -5,6 +5,7 @@ #include "btree_gist.h" #include "btree_utils_num.h" +#include "utils/rel.h" #include "utils/sortsupport.h" typedef struct diff --git a/contrib/btree_gist/btree_time.c b/contrib/btree_gist/btree_time.c index fd6a2e05bc7b1..9483846c4738b 100644 --- a/contrib/btree_gist/btree_time.c +++ b/contrib/btree_gist/btree_time.c @@ -7,6 +7,7 @@ #include "btree_utils_num.h" #include "utils/fmgrprotos.h" #include "utils/date.h" +#include "utils/rel.h" #include "utils/sortsupport.h" #include "utils/timestamp.h" diff --git a/contrib/btree_gist/btree_ts.c b/contrib/btree_gist/btree_ts.c index 1e8f83f0f0a49..b7bbae2f4d6fb 100644 --- a/contrib/btree_gist/btree_ts.c +++ b/contrib/btree_gist/btree_ts.c @@ -10,6 +10,7 @@ #include "utils/fmgrprotos.h" #include "utils/timestamp.h" #include "utils/float.h" +#include "utils/rel.h" #include "utils/sortsupport.h" typedef struct diff --git a/contrib/btree_gist/btree_utils_var.c b/contrib/btree_gist/btree_utils_var.c index d9df2356cd1e4..fb466e5aa32ba 100644 --- a/contrib/btree_gist/btree_utils_var.c +++ b/contrib/btree_gist/btree_utils_var.c @@ -11,6 +11,7 @@ #include "btree_utils_var.h" #include "mb/pg_wchar.h" #include "utils/rel.h" +#include "varatt.h" /* used for key sorting */ typedef struct diff --git a/contrib/btree_gist/btree_uuid.c b/contrib/btree_gist/btree_uuid.c index 23a307a6a71d5..07f304f39f14c 100644 --- a/contrib/btree_gist/btree_uuid.c +++ b/contrib/btree_gist/btree_uuid.c @@ -6,6 +6,7 @@ #include "btree_gist.h" #include "btree_utils_num.h" #include "port/pg_bswap.h" +#include "utils/rel.h" #include "utils/sortsupport.h" #include "utils/uuid.h" diff --git a/src/backend/access/nbtree/nbtdedup.c b/src/backend/access/nbtree/nbtdedup.c index 08884116aecbe..ab0b6946cb031 100644 --- a/src/backend/access/nbtree/nbtdedup.c +++ b/src/backend/access/nbtree/nbtdedup.c @@ -16,6 +16,7 @@ #include "access/nbtree.h" #include "access/nbtxlog.h" +#include "access/tableam.h" #include "access/xloginsert.h" #include "miscadmin.h" #include "utils/rel.h" diff --git a/src/backend/access/nbtree/nbtinsert.c b/src/backend/access/nbtree/nbtinsert.c index aa82cede30aa4..be60781fc98ec 100644 --- a/src/backend/access/nbtree/nbtinsert.c +++ b/src/backend/access/nbtree/nbtinsert.c @@ -17,6 +17,7 @@ #include "access/nbtree.h" #include "access/nbtxlog.h" +#include "access/tableam.h" #include "access/transam.h" #include "access/xloginsert.h" #include "common/int.h" diff --git a/src/backend/access/nbtree/nbtpreprocesskeys.c b/src/backend/access/nbtree/nbtpreprocesskeys.c index 21c519cd108ed..936b93f157a8b 100644 --- a/src/backend/access/nbtree/nbtpreprocesskeys.c +++ b/src/backend/access/nbtree/nbtpreprocesskeys.c @@ -16,11 +16,13 @@ #include "postgres.h" #include "access/nbtree.h" +#include "access/relscan.h" #include "common/int.h" #include "lib/qunique.h" #include "utils/array.h" #include "utils/lsyscache.h" #include "utils/memutils.h" +#include "utils/rel.h" typedef struct BTScanKeyPreproc { diff --git a/src/backend/access/nbtree/nbtsort.c b/src/backend/access/nbtree/nbtsort.c index 9d70e89c1f3ce..8828a7a8f8952 100644 --- a/src/backend/access/nbtree/nbtsort.c +++ b/src/backend/access/nbtree/nbtsort.c @@ -44,6 +44,7 @@ #include "access/parallel.h" #include "access/relscan.h" #include "access/table.h" +#include "access/tableam.h" #include "access/xact.h" #include "catalog/index.h" #include "commands/progress.h" diff --git a/src/backend/access/nbtree/nbtsplitloc.c b/src/backend/access/nbtree/nbtsplitloc.c index e6c9aaa0454dd..b88c396195a42 100644 --- a/src/backend/access/nbtree/nbtsplitloc.c +++ b/src/backend/access/nbtree/nbtsplitloc.c @@ -15,6 +15,7 @@ #include "postgres.h" #include "access/nbtree.h" +#include "access/tableam.h" #include "common/int.h" typedef enum diff --git a/src/backend/access/nbtree/nbtutils.c b/src/backend/access/nbtree/nbtutils.c index 9aed207995f52..edfea2acaff66 100644 --- a/src/backend/access/nbtree/nbtutils.c +++ b/src/backend/access/nbtree/nbtutils.c @@ -19,10 +19,13 @@ #include "access/nbtree.h" #include "access/reloptions.h" +#include "access/relscan.h" #include "commands/progress.h" #include "miscadmin.h" #include "utils/datum.h" #include "utils/lsyscache.h" +#include "utils/rel.h" + #define LOOK_AHEAD_REQUIRED_RECHECKS 3 #define LOOK_AHEAD_DEFAULT_DISTANCE 5 diff --git a/src/backend/utils/sort/tuplesortvariants.c b/src/backend/utils/sort/tuplesortvariants.c index c5d18e46716fd..890cdbe120418 100644 --- a/src/backend/utils/sort/tuplesortvariants.c +++ b/src/backend/utils/sort/tuplesortvariants.c @@ -31,6 +31,7 @@ #include "utils/datum.h" #include "utils/guc.h" #include "utils/lsyscache.h" +#include "utils/rel.h" #include "utils/tuplesort.h" diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h index e709d2e0afe94..9ab467cb8fdec 100644 --- a/src/include/access/nbtree.h +++ b/src/include/access/nbtree.h @@ -17,9 +17,8 @@ #include "access/amapi.h" #include "access/itup.h" #include "access/sdir.h" -#include "access/tableam.h" -#include "access/xlogreader.h" #include "catalog/pg_am_d.h" +#include "catalog/pg_class.h" #include "catalog/pg_index.h" #include "lib/stringinfo.h" #include "storage/bufmgr.h" @@ -1285,9 +1284,10 @@ extern void _bt_pageinit(Page page, Size size); extern void _bt_delitems_vacuum(Relation rel, Buffer buf, OffsetNumber *deletable, int ndeletable, BTVacuumPosting *updatable, int nupdatable); +struct TM_IndexDeleteOp; /* avoid including tableam.h here */ extern void _bt_delitems_delete_check(Relation rel, Buffer buf, Relation heapRel, - TM_IndexDeleteOp *delstate); + struct TM_IndexDeleteOp *delstate); extern void _bt_pagedel(Relation rel, Buffer leafbuf, BTVacState *vstate); extern void _bt_pendingfsm_init(Relation rel, BTVacState *vstate, bool cleanuponly); From 49cba82bec1f4fc815f929cb57064d780ee0032a Mon Sep 17 00:00:00 2001 From: Andres Freund Date: Thu, 14 Aug 2025 11:22:58 -0400 Subject: [PATCH 506/602] ci: Per-repo configuration for manually trigger tasks We do not want to trigger some tasks by default, to avoid using too many compute credits. These tasks have to be manually triggered to be run. But e.g. for cfbot we do have sufficient resources, so we always want to start those tasks. With this commit, an individual repository can be configured to trigger them automatically using an environment variable defined under "Repository Settings", for example: REPO_CI_AUTOMATIC_TRIGGER_TASKS="mingw netbsd openbsd" This will enable cfbot to turn them on by default when running tests for the Commitfest app. Backpatch this back to PG 15, even though PG 15 does not have any manually triggered task. Keeping the CI infrastructure the same seems advantageous. Author: Andres Freund Co-authored-by: Thomas Munro Co-authored-by: Nazir Bilal Yavuz Reviewed-by: Nazir Bilal Yavuz Discussion: https://postgr.es/m/20240413021221.hg53rvqlvldqh57i%40awork3.anarazel.de Backpatch-through: 16 --- .cirrus.star | 50 +++++++++++++++++++++++++++++++++++++++++---- .cirrus.tasks.yml | 15 +++++++------- .cirrus.yml | 12 +++++++++-- src/tools/ci/README | 11 ++++++++++ 4 files changed, 75 insertions(+), 13 deletions(-) diff --git a/.cirrus.star b/.cirrus.star index 36233872d1e50..e73b842b82bf9 100644 --- a/.cirrus.star +++ b/.cirrus.star @@ -7,7 +7,7 @@ https://github.com/bazelbuild/starlark/blob/master/spec.md See also .cirrus.yml and src/tools/ci/README """ -load("cirrus", "env", "fs") +load("cirrus", "env", "fs", "yaml") def main(): @@ -18,19 +18,36 @@ def main(): 1) the contents of .cirrus.yml - 2) if defined, the contents of the file referenced by the, repository + 2) computed environment variables + + 3) if defined, the contents of the file referenced by the, repository level, REPO_CI_CONFIG_GIT_URL variable (see https://cirrus-ci.org/guide/programming-tasks/#fs for the accepted format) - 3) .cirrus.tasks.yml + 4) .cirrus.tasks.yml """ output = "" # 1) is evaluated implicitly + # Add 2) + additional_env = compute_environment_vars() + env_fmt = """ +### +# Computed environment variables start here +### +{0} +### +# Computed environment variables end here +### +""" + output += env_fmt.format(yaml.dumps({'env': additional_env})) + + + # Add 3) repo_config_url = env.get("REPO_CI_CONFIG_GIT_URL") if repo_config_url != None: print("loading additional configuration from \"{}\"".format(repo_config_url)) @@ -38,12 +55,37 @@ def main(): else: output += "\n# REPO_CI_CONFIG_URL was not set\n" - # Add 3) + + # Add 4) output += config_from(".cirrus.tasks.yml") + return output +def compute_environment_vars(): + cenv = {} + + # Some tasks are manually triggered by default because they might use too + # many resources for users of free Cirrus credits, but they can be + # triggered automatically by naming them in an environment variable e.g. + # REPO_CI_AUTOMATIC_TRIGGER_TASKS="task_name other_task" under "Repository + # Settings" on Cirrus CI's website. + + default_manual_trigger_tasks = ['mingw', 'netbsd', 'openbsd'] + + repo_ci_automatic_trigger_tasks = env.get('REPO_CI_AUTOMATIC_TRIGGER_TASKS', '') + for task in default_manual_trigger_tasks: + name = 'CI_TRIGGER_TYPE_' + task.upper() + if repo_ci_automatic_trigger_tasks.find(task) != -1: + value = 'automatic' + else: + value = 'manual' + cenv[name] = value + + return cenv + + def config_from(config_src): """return contents of config file `config_src`, surrounded by markers indicating start / end of the included file diff --git a/.cirrus.tasks.yml b/.cirrus.tasks.yml index 30891801339b5..b5927db5bf740 100644 --- a/.cirrus.tasks.yml +++ b/.cirrus.tasks.yml @@ -239,7 +239,6 @@ task: task: depends_on: SanityCheck - trigger_type: manual env: # Below are experimentally derived to be a decent choice. @@ -257,6 +256,8 @@ task: matrix: - name: NetBSD - Meson + # See REPO_CI_AUTOMATIC_TRIGGER_TASKS in .cirrus.star + trigger_type: $CI_TRIGGER_TYPE_NETBSD only_if: $CIRRUS_CHANGE_MESSAGE !=~ '.*\nci-os-only:.*' || $CIRRUS_CHANGE_MESSAGE =~ '.*\nci-os-only:[^\n]*netbsd.*' env: OS_NAME: netbsd @@ -274,6 +275,8 @@ task: <<: *netbsd_task_template - name: OpenBSD - Meson + # See REPO_CI_AUTOMATIC_TRIGGER_TASKS in .cirrus.star + trigger_type: $CI_TRIGGER_TYPE_OPENBSD only_if: $CIRRUS_CHANGE_MESSAGE !=~ '.*\nci-os-only:.*' || $CIRRUS_CHANGE_MESSAGE =~ '.*\nci-os-only:[^\n]*openbsd.*' env: OS_NAME: openbsd @@ -754,13 +757,11 @@ task: << : *WINDOWS_ENVIRONMENT_BASE name: Windows - Server 2019, MinGW64 - Meson - # due to resource constraints we don't run this task by default for now - trigger_type: manual - # worth using only_if despite being manual, otherwise this task will show up - # when e.g. ci-os-only: linux is used. - only_if: $CIRRUS_CHANGE_MESSAGE !=~ '.*\nci-os-only:.*' || $CIRRUS_CHANGE_MESSAGE =~ '.*\nci-os-only:[^\n]*mingw.*' - # otherwise it'll be sorted before other tasks + # See REPO_CI_AUTOMATIC_TRIGGER_TASKS in .cirrus.star. + trigger_type: $CI_TRIGGER_TYPE_MINGW + depends_on: SanityCheck + only_if: $CIRRUS_CHANGE_MESSAGE !=~ '.*\nci-os-only:.*' || $CIRRUS_CHANGE_MESSAGE =~ '.*\nci-os-only:[^\n]*mingw.*' env: TEST_JOBS: 4 # higher concurrency causes occasional failures diff --git a/.cirrus.yml b/.cirrus.yml index 33c6e481d746a..3f75852e84ecb 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -10,12 +10,20 @@ # # 1) the contents of this file # -# 2) if defined, the contents of the file referenced by the, repository +# 2) computed environment variables +# +# Used to enable/disable tasks based on the execution environment. See +# .cirrus.star: compute_environment_vars() +# +# 3) if defined, the contents of the file referenced by the, repository # level, REPO_CI_CONFIG_GIT_URL variable (see # https://cirrus-ci.org/guide/programming-tasks/#fs for the accepted # format) # -# 3) .cirrus.tasks.yml +# This allows running tasks in a different execution environment than the +# default, e.g. to have sufficient resources for cfbot. +# +# 4) .cirrus.tasks.yml # # This composition is done by .cirrus.star diff --git a/src/tools/ci/README b/src/tools/ci/README index 12c1e7c308fa9..d183648a8d02b 100644 --- a/src/tools/ci/README +++ b/src/tools/ci/README @@ -82,3 +82,14 @@ defined in .cirrus.yml, by redefining the relevant yaml anchors. Custom compute resources can be provided using - https://cirrus-ci.org/guide/supported-computing-services/ - https://cirrus-ci.org/guide/persistent-workers/ + + +Enabling manual tasks by default +================================ + +Some tasks are not triggered automatically by default, to avoid using up CI +credits too quickly. This can be changed on the repository level, e.g. when +custom compute resources are configured. + +The following repository level environment variables are recognized: +- REPO_CI_AUTOMATIC_TRIGGER_TASKS - space-separated list of (mingw|netbsd|openbsd) From 60b64e6a31a21449f4db49017417c225ee322986 Mon Sep 17 00:00:00 2001 From: Andres Freund Date: Thu, 14 Aug 2025 11:48:04 -0400 Subject: [PATCH 507/602] ci: Simplify ci-os-only handling Handle 'ci-os-only' occurrences in the .cirrus.star file instead of .cirrus.tasks.yml file. Now, 'ci-os-only' occurrences are controlled from one central place instead of dealing with them in each task. Author: Andres Freund Reviewed-by: Nazir Bilal Yavuz Discussion: https://postgr.es/m/20240413021221.hg53rvqlvldqh57i%40awork3.anarazel.de Backpatch: 15-, where CI support was added --- .cirrus.star | 40 +++++++++++++++++++++++++++++++++++++++- .cirrus.tasks.yml | 21 ++++++++++----------- 2 files changed, 49 insertions(+), 12 deletions(-) diff --git a/.cirrus.star b/.cirrus.star index e73b842b82bf9..e9bb672b95936 100644 --- a/.cirrus.star +++ b/.cirrus.star @@ -7,7 +7,7 @@ https://github.com/bazelbuild/starlark/blob/master/spec.md See also .cirrus.yml and src/tools/ci/README """ -load("cirrus", "env", "fs", "yaml") +load("cirrus", "env", "fs", "re", "yaml") def main(): @@ -66,6 +66,7 @@ def main(): def compute_environment_vars(): cenv = {} + ### # Some tasks are manually triggered by default because they might use too # many resources for users of free Cirrus credits, but they can be # triggered automatically by naming them in an environment variable e.g. @@ -82,6 +83,43 @@ def compute_environment_vars(): else: value = 'manual' cenv[name] = value + ### + + ### + # Parse "ci-os-only:" tag in commit message and set + # CI_{$OS}_ENABLED variable for each OS + + # We want to disable SanityCheck if testing just a specific OS. This + # shortens push-wait-for-ci cycle time a bit when debugging operating + # system specific failures. Just treating it as an OS in that case + # suffices. + + operating_systems = [ + 'compilerwarnings', + 'freebsd', + 'linux', + 'macos', + 'mingw', + 'netbsd', + 'openbsd', + 'sanitycheck', + 'windows', + ] + commit_message = env.get('CIRRUS_CHANGE_MESSAGE') + match_re = r"(^|.*\n)ci-os-only: ([^\n]+)($|\n.*)" + + # re.match() returns an array with a tuple of (matched-string, match_1, ...) + m = re.match(match_re, commit_message) + if m and len(m) > 0: + os_only = m[0][2] + os_only_list = re.split(r'[, ]+', os_only) + else: + os_only_list = operating_systems + + for os in operating_systems: + os_enabled = os in os_only_list + cenv['CI_{0}_ENABLED'.format(os.upper())] = os_enabled + ### return cenv diff --git a/.cirrus.tasks.yml b/.cirrus.tasks.yml index b5927db5bf740..d84875168bb64 100644 --- a/.cirrus.tasks.yml +++ b/.cirrus.tasks.yml @@ -72,7 +72,7 @@ task: # push-wait-for-ci cycle time a bit when debugging operating system specific # failures. Uses skip instead of only_if, as cirrus otherwise warns about # only_if conditions not matching. - skip: $CIRRUS_CHANGE_MESSAGE =~ '.*\nci-os-only:.*' + skip: $CI_SANITYCHECK_ENABLED == false env: CPUS: 4 @@ -167,7 +167,7 @@ task: <<: *freebsd_task_template depends_on: SanityCheck - only_if: $CIRRUS_CHANGE_MESSAGE !=~ '.*\nci-os-only:.*' || $CIRRUS_CHANGE_MESSAGE =~ '.*\nci-os-only:[^\n]*freebsd.*' + only_if: $CI_FREEBSD_ENABLED sysinfo_script: | id @@ -258,7 +258,7 @@ task: - name: NetBSD - Meson # See REPO_CI_AUTOMATIC_TRIGGER_TASKS in .cirrus.star trigger_type: $CI_TRIGGER_TYPE_NETBSD - only_if: $CIRRUS_CHANGE_MESSAGE !=~ '.*\nci-os-only:.*' || $CIRRUS_CHANGE_MESSAGE =~ '.*\nci-os-only:[^\n]*netbsd.*' + only_if: $CI_NETBSD_ENABLED env: OS_NAME: netbsd IMAGE_FAMILY: pg-ci-netbsd-postgres @@ -277,7 +277,7 @@ task: - name: OpenBSD - Meson # See REPO_CI_AUTOMATIC_TRIGGER_TASKS in .cirrus.star trigger_type: $CI_TRIGGER_TYPE_OPENBSD - only_if: $CIRRUS_CHANGE_MESSAGE !=~ '.*\nci-os-only:.*' || $CIRRUS_CHANGE_MESSAGE =~ '.*\nci-os-only:[^\n]*openbsd.*' + only_if: $CI_OPENBSD_ENABLED env: OS_NAME: openbsd IMAGE_FAMILY: pg-ci-openbsd-postgres @@ -417,7 +417,7 @@ task: <<: *linux_task_template depends_on: SanityCheck - only_if: $CIRRUS_CHANGE_MESSAGE !=~ '.*\nci-os-only:.*' || $CIRRUS_CHANGE_MESSAGE =~ '.*\nci-os-only:[^\n]*linux.*' + only_if: $CI_LINUX_ENABLED ccache_cache: folder: ${CCACHE_DIR} @@ -616,7 +616,7 @@ task: <<: *macos_task_template depends_on: SanityCheck - only_if: $CIRRUS_CHANGE_MESSAGE !=~ '.*\nci-os-only:.*' || $CIRRUS_CHANGE_MESSAGE =~ '.*\nci-os-only:[^\n]*(macos|darwin|osx).*' + only_if: $CI_MACOS_ENABLED sysinfo_script: | id @@ -722,7 +722,7 @@ task: <<: *windows_task_template depends_on: SanityCheck - only_if: $CIRRUS_CHANGE_MESSAGE !=~ '.*\nci-os-only:.*' || $CIRRUS_CHANGE_MESSAGE =~ '.*\nci-os-only:[^\n]*windows.*' + only_if: $CI_WINDOWS_ENABLED setup_additional_packages_script: | REM choco install -y --no-progress ... @@ -761,7 +761,7 @@ task: trigger_type: $CI_TRIGGER_TYPE_MINGW depends_on: SanityCheck - only_if: $CIRRUS_CHANGE_MESSAGE !=~ '.*\nci-os-only:.*' || $CIRRUS_CHANGE_MESSAGE =~ '.*\nci-os-only:[^\n]*mingw.*' + only_if: $CI_MINGW_ENABLED env: TEST_JOBS: 4 # higher concurrency causes occasional failures @@ -815,10 +815,9 @@ task: # To limit unnecessary work only run this once the SanityCheck # succeeds. This is particularly important for this task as we intentionally - # use always: to continue after failures. Task that did not run count as a - # success, so we need to recheck SanityChecks's condition here ... + # use always: to continue after failures. depends_on: SanityCheck - only_if: $CIRRUS_CHANGE_MESSAGE !=~ '.*\nci-os-only:.*' + only_if: $CI_COMPILERWARNINGS_ENABLED env: CPUS: 4 From 69f75d6714753e594fb383d2e53a8003a8b54dfd Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Fri, 15 Aug 2025 12:30:36 +0900 Subject: [PATCH 508/602] Add SQL test for TOAST value allocations on rewrite The SQL test added in this commit check a specific code path that had no coverage until now. When a TOAST datum is rewritten, toast_save_datum() has a dedicated path to make sure that a new value is allocated if it does not exist on the TOAST table yet. This test uses a trick with PLAIN and EXTERNAL storage, with a tuple large enough to be toasted and small enough to fit on a page. It is initially stored in plain more, and the rewrite forces the tuple to be stored externally. The key point is that there is no value allocated during the initial insert, and that there is one after the rewrite. A second pattern checked is the reuse of the same value across rewrites, using \gset. A set of patches under discussion is messing up with this area of the code, so this makes sure that such rewrite cases remain consistent across the board. Author: Nikhil Kumar Veldanda Co-authored-by: Michael Paquier Discussion: https://postgr.es/m/CAFAfj_E+kw5P713S8_jZyVgQAGVFfzFiTUJPrgo-TTtJJoazQw@mail.gmail.com --- src/test/regress/expected/vacuum.out | 46 ++++++++++++++++++++++++++++ src/test/regress/sql/vacuum.sql | 30 ++++++++++++++++++ 2 files changed, 76 insertions(+) diff --git a/src/test/regress/expected/vacuum.out b/src/test/regress/expected/vacuum.out index 0abcc99989e07..85c783e2e56ce 100644 --- a/src/test/regress/expected/vacuum.out +++ b/src/test/regress/expected/vacuum.out @@ -686,3 +686,49 @@ RESET ROLE; DROP TABLE vacowned; DROP TABLE vacowned_parted; DROP ROLE regress_vacuum; +-- Test checking how new toast values are allocated on rewrite. +-- Create table with plain storage (forces inline storage initially). +CREATE TABLE vac_rewrite_toast (id int, f1 TEXT STORAGE plain); +-- Insert tuple large enough to trigger toast storage on rewrite, still +-- small enough to fit on a page. +INSERT INTO vac_rewrite_toast values (1, repeat('a', 7000)); +-- Switch to external storage to force toast table usage. +ALTER TABLE vac_rewrite_toast ALTER COLUMN f1 SET STORAGE EXTERNAL; +-- This second tuple is toasted, its value should still be the +-- same after rewrite. +INSERT INTO vac_rewrite_toast values (2, repeat('a', 7000)); +SELECT pg_column_toast_chunk_id(f1) AS id_2_chunk FROM vac_rewrite_toast + WHERE id = 2 \gset +-- Check initial state of the data. +SELECT id, pg_column_toast_chunk_id(f1) IS NULL AS f1_chunk_null, + substr(f1, 5, 10) AS f1_data, + pg_column_compression(f1) AS f1_comp + FROM vac_rewrite_toast ORDER BY id; + id | f1_chunk_null | f1_data | f1_comp +----+---------------+------------+--------- + 1 | t | aaaaaaaaaa | + 2 | f | aaaaaaaaaa | +(2 rows) + +-- VACUUM FULL forces toast data rewrite. +VACUUM FULL vac_rewrite_toast; +-- Check after rewrite. +SELECT id, pg_column_toast_chunk_id(f1) IS NULL AS f1_chunk_null, + substr(f1, 5, 10) AS f1_data, + pg_column_compression(f1) AS f1_comp + FROM vac_rewrite_toast ORDER BY id; + id | f1_chunk_null | f1_data | f1_comp +----+---------------+------------+--------- + 1 | f | aaaaaaaaaa | + 2 | f | aaaaaaaaaa | +(2 rows) + +-- The same value is reused for the tuple toasted before the rewrite. +SELECT pg_column_toast_chunk_id(f1) = :'id_2_chunk' AS same_chunk + FROM vac_rewrite_toast WHERE id = 2; + same_chunk +------------ + t +(1 row) + +DROP TABLE vac_rewrite_toast; diff --git a/src/test/regress/sql/vacuum.sql b/src/test/regress/sql/vacuum.sql index a72bdb5b619d9..247b8e23b2357 100644 --- a/src/test/regress/sql/vacuum.sql +++ b/src/test/regress/sql/vacuum.sql @@ -495,3 +495,33 @@ RESET ROLE; DROP TABLE vacowned; DROP TABLE vacowned_parted; DROP ROLE regress_vacuum; + +-- Test checking how new toast values are allocated on rewrite. +-- Create table with plain storage (forces inline storage initially). +CREATE TABLE vac_rewrite_toast (id int, f1 TEXT STORAGE plain); +-- Insert tuple large enough to trigger toast storage on rewrite, still +-- small enough to fit on a page. +INSERT INTO vac_rewrite_toast values (1, repeat('a', 7000)); +-- Switch to external storage to force toast table usage. +ALTER TABLE vac_rewrite_toast ALTER COLUMN f1 SET STORAGE EXTERNAL; +-- This second tuple is toasted, its value should still be the +-- same after rewrite. +INSERT INTO vac_rewrite_toast values (2, repeat('a', 7000)); +SELECT pg_column_toast_chunk_id(f1) AS id_2_chunk FROM vac_rewrite_toast + WHERE id = 2 \gset +-- Check initial state of the data. +SELECT id, pg_column_toast_chunk_id(f1) IS NULL AS f1_chunk_null, + substr(f1, 5, 10) AS f1_data, + pg_column_compression(f1) AS f1_comp + FROM vac_rewrite_toast ORDER BY id; +-- VACUUM FULL forces toast data rewrite. +VACUUM FULL vac_rewrite_toast; +-- Check after rewrite. +SELECT id, pg_column_toast_chunk_id(f1) IS NULL AS f1_chunk_null, + substr(f1, 5, 10) AS f1_data, + pg_column_compression(f1) AS f1_comp + FROM vac_rewrite_toast ORDER BY id; +-- The same value is reused for the tuple toasted before the rewrite. +SELECT pg_column_toast_chunk_id(f1) = :'id_2_chunk' AS same_chunk + FROM vac_rewrite_toast WHERE id = 2; +DROP TABLE vac_rewrite_toast; From b4632883d44eae892e28dafbf674e6ac3dfd824d Mon Sep 17 00:00:00 2001 From: David Rowley Date: Fri, 15 Aug 2025 16:33:07 +1200 Subject: [PATCH 509/602] Add Asserts to validate prevbit values in bms_prev_member bms_prev_member() could attempt to access memory outside of the words[] array in cases where the prevbit was a number < -1 or > a->nwords * BITS_PER_BITMAPWORD + 1. Here we add the Asserts to help draw attention to bogus callers so we're more likely to catch them during development. In passing, fix wording of bms_prev_member's header comment which talks about how we expect the callers to ensure only valid prevbit values are used. Author: Greg Burd Reviewed-by: David Rowley Reviewed-by: Tom Lane Discussion: https://postgr.es/m/2000A717-1FFE-4031-827B-9330FB2E9065%40getmailspring.com --- src/backend/nodes/bitmapset.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/backend/nodes/bitmapset.c b/src/backend/nodes/bitmapset.c index bf512cf806ff7..b4ecf0b039017 100644 --- a/src/backend/nodes/bitmapset.c +++ b/src/backend/nodes/bitmapset.c @@ -1343,7 +1343,7 @@ bms_next_member(const Bitmapset *a, int prevbit) * * Returns largest member less than "prevbit", or -2 if there is none. * "prevbit" must NOT be more than one above the highest possible bit that can - * be set at the Bitmapset at its current size. + * be set in the Bitmapset at its current size. * * To ease finding the highest set bit for the initial loop, the special * prevbit value of -1 can be passed to have the function find the highest @@ -1379,6 +1379,10 @@ bms_prev_member(const Bitmapset *a, int prevbit) if (a == NULL || prevbit == 0) return -2; + /* Validate callers didn't give us something out of range */ + Assert(prevbit <= a->nwords * BITS_PER_BITMAPWORD); + Assert(prevbit >= -1); + /* transform -1 to the highest possible bit we could have set */ if (prevbit == -1) prevbit = a->nwords * BITS_PER_BITMAPWORD - 1; From ca38912512a9b913542fa4f535885b4e5ccff02f Mon Sep 17 00:00:00 2001 From: David Rowley Date: Fri, 15 Aug 2025 17:23:45 +1200 Subject: [PATCH 510/602] Fix failing -D HASH_STATISTICS builds This seems to have been broken for a few years by cc5ef90ed. Author: Hayato Kuroda (Fujitsu) Reviewed-by: David Rowley Reviewed-by: Tom Lane Discussion: https://postgr.es/m/OSCPR01MB14966E11EEFB37D7857FCEDB7F535A@OSCPR01MB14966.jpnprd01.prod.outlook.com Backpatch-through: 17 --- src/backend/utils/hash/dynahash.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/backend/utils/hash/dynahash.c b/src/backend/utils/hash/dynahash.c index 81da03629f0d2..30755e6b6786e 100644 --- a/src/backend/utils/hash/dynahash.c +++ b/src/backend/utils/hash/dynahash.c @@ -1174,6 +1174,8 @@ hash_update_hash_key(HTAB *hashp, HashCompareFunc match; #ifdef HASH_STATISTICS + HASHHDR *hctl = hashp->hctl; + hash_accesses++; hctl->accesses++; #endif From 296cba276081192a18c484c0716bf290c16bf362 Mon Sep 17 00:00:00 2001 From: David Rowley Date: Fri, 15 Aug 2025 18:05:44 +1200 Subject: [PATCH 511/602] Fix invalid format string in HASH_DEBUG code This seems to have been broken back in be0a66666. Reported-by: Hayato Kuroda (Fujitsu) Author: David Rowley Discussion: https://postgr.es/m/OSCPR01MB14966E11EEFB37D7857FCEDB7F535A@OSCPR01MB14966.jpnprd01.prod.outlook.com Backpatch-through: 14 --- src/backend/utils/hash/dynahash.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/backend/utils/hash/dynahash.c b/src/backend/utils/hash/dynahash.c index 30755e6b6786e..d08435510076e 100644 --- a/src/backend/utils/hash/dynahash.c +++ b/src/backend/utils/hash/dynahash.c @@ -776,7 +776,7 @@ init_htab(HTAB *hashp, long nelem) hctl->nelem_alloc = choose_nelem_alloc(hctl->entrysize); #ifdef HASH_DEBUG - fprintf(stderr, "init_htab:\n%s%p\n%s%ld\n%s%ld\n%s%d\n%s%ld\n%s%u\n%s%x\n%s%x\n%s%ld\n", + fprintf(stderr, "init_htab:\n%s%p\n%s%ld\n%s%ld\n%s%d\n%s%u\n%s%x\n%s%x\n%s%ld\n", "TABLE POINTER ", hashp, "DIRECTORY SIZE ", hctl->dsize, "SEGMENT SIZE ", hctl->ssize, From 8212c8393906fe31d999a6b0d1e4cacb206529a3 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Fri, 15 Aug 2025 09:04:54 +0200 Subject: [PATCH 512/602] Add TAP tests for LDAP connection parameter lookup Add TAP tests that tests the LDAP Lookup of Connection Parameters functionality in libpq. Prior to this commit, LDAP test coverage only existed for the server-side authentication functionality and for connection service file with parameters directly specified in the file. The tests included here test a pg_service.conf that contains a link to an LDAP system that contains all of the connection parameters. Author: Andrew Jackson Discussion: https://www.postgresql.org/message-id/CAKK5BkHixcivSCA9pfd_eUp7wkLRhvQ6OtGLAYrWC%3Dk7E76LDQ%40mail.gmail.com --- src/test/ldap/meson.build | 1 + .../t/003_ldap_connection_param_lookup.pl | 216 ++++++++++++++++++ 2 files changed, 217 insertions(+) create mode 100644 src/test/ldap/t/003_ldap_connection_param_lookup.pl diff --git a/src/test/ldap/meson.build b/src/test/ldap/meson.build index 7eaa393212ad0..04c738d27580c 100644 --- a/src/test/ldap/meson.build +++ b/src/test/ldap/meson.build @@ -8,6 +8,7 @@ tests += { 'tests': [ 't/001_auth.pl', 't/002_bindpasswd.pl', + 't/003_ldap_connection_param_lookup.pl', ], 'env': { 'with_ldap': ldap.found() ? 'yes' : 'no', diff --git a/src/test/ldap/t/003_ldap_connection_param_lookup.pl b/src/test/ldap/t/003_ldap_connection_param_lookup.pl new file mode 100644 index 0000000000000..8c1e1caf992a4 --- /dev/null +++ b/src/test/ldap/t/003_ldap_connection_param_lookup.pl @@ -0,0 +1,216 @@ + +# Copyright (c) 2025, PostgreSQL Global Development Group + +use strict; +use warnings FATAL => 'all'; + +use FindBin; +use lib "$FindBin::RealBin/.."; + +use File::Copy; +use LdapServer; +use PostgreSQL::Test::Utils; +use PostgreSQL::Test::Cluster; +use Test::More; + +if ($ENV{with_ldap} ne 'yes') +{ + plan skip_all => 'LDAP not supported by this build'; +} +elsif (!$ENV{PG_TEST_EXTRA} || $ENV{PG_TEST_EXTRA} !~ /\bldap\b/) +{ + plan skip_all => + 'Potentially unsafe test LDAP not enabled in PG_TEST_EXTRA'; +} +elsif (!$LdapServer::setup) +{ + plan skip_all => $LdapServer::setup_error; +} + +# This tests scenarios related to the service name and the service file, +# for the connection options and their environment variables. +my $dummy_node = PostgreSQL::Test::Cluster->new('dummy_node'); +$dummy_node->init; + +my $node = PostgreSQL::Test::Cluster->new('node'); +$node->init; +$node->start; + +note "setting up LDAP server"; + +my $ldap_rootpw = 'secret'; +my $ldap = LdapServer->new($ldap_rootpw, 'anonymous'); # use anonymous auth +$ldap->ldapadd_file('authdata.ldif'); +$ldap->ldapsetpw('uid=test1,dc=example,dc=net', 'secret1'); +$ldap->ldapsetpw('uid=test2,dc=example,dc=net', 'secret2'); + +# Windows vs non-Windows: CRLF vs LF for the file's newline, relying on +# the fact that libpq uses fgets() when reading the lines of a service file. +my $newline = $windows_os ? "\r\n" : "\n"; + +my $td = PostgreSQL::Test::Utils::tempdir; + +# create ldap file based on postgres connection info +my $ldif_valid = "$td/connection_params.ldif"; +append_to_file($ldif_valid, "version:1"); +append_to_file($ldif_valid, $newline); +append_to_file($ldif_valid, "dn:cn=mydatabase,dc=example,dc=net"); +append_to_file($ldif_valid, $newline); +append_to_file($ldif_valid, "changetype:add"); +append_to_file($ldif_valid, $newline); +append_to_file($ldif_valid, "objectclass:top"); +append_to_file($ldif_valid, $newline); +append_to_file($ldif_valid, "objectclass:device"); +append_to_file($ldif_valid, $newline); +append_to_file($ldif_valid, "cn:mydatabase"); +append_to_file($ldif_valid, $newline); +append_to_file($ldif_valid, "description:host="); +append_to_file($ldif_valid, $node->host); +append_to_file($ldif_valid, $newline); +append_to_file($ldif_valid, "description:port="); +append_to_file($ldif_valid, $node->port); + +$ldap->ldapadd_file($ldif_valid); + +my ($ldap_server, $ldap_port, $ldaps_port, $ldap_url, + $ldaps_url, $ldap_basedn, $ldap_rootdn +) = $ldap->prop(qw(server port s_port url s_url basedn rootdn)); + +# don't bother to check the server's cert (though perhaps we should) +$ENV{'LDAPTLS_REQCERT'} = "never"; + +note "setting up PostgreSQL instance"; + +# Create the set of service files used in the tests. + +# File that includes a valid service name, that uses a decomposed +# connection string for its contents, split on spaces. +my $srvfile_valid = "$td/pg_service_valid.conf"; +append_to_file($srvfile_valid, "[my_srv]"); +append_to_file($srvfile_valid, $newline); +append_to_file($srvfile_valid, "ldap://localhost:"); +append_to_file($srvfile_valid, $ldap_port); +append_to_file($srvfile_valid, + "/dc=example,dc=net?description?one?(cn=mydatabase)"); + +# File defined with no contents, used as default value for +# PGSERVICEFILE, so that no lookup is attempted in the user's home +# directory. +my $srvfile_empty = "$td/pg_service_empty.conf"; +append_to_file($srvfile_empty, ''); + +# Default service file in PGSYSCONFDIR. +my $srvfile_default = "$td/pg_service.conf"; + +# Missing service file. +my $srvfile_missing = "$td/pg_service_missing.conf"; + +# Set the fallback directory lookup of the service file to the +# temporary directory of this test. PGSYSCONFDIR is used if the +# service file defined in PGSERVICEFILE cannot be found, or when a +# service file is found but not the service name. +local $ENV{PGSYSCONFDIR} = $td; + +# Force PGSERVICEFILE to a default location, so as this test never +# tries to look at a home directory. This value needs to remain at +# the top of this script before running any tests, and should never be +# changed. +local $ENV{PGSERVICEFILE} = "$srvfile_empty"; + +# Checks combinations of service name and a valid service file. +{ + local $ENV{PGSERVICEFILE} = $srvfile_valid; + + $dummy_node->connect_ok( + 'service=my_srv', + 'connection with correct "service" string and PGSERVICEFILE', + sql => "SELECT 'connect1_1'", + expected_stdout => qr/connect1_1/); + + $dummy_node->connect_ok( + 'postgres://?service=my_srv', + 'connection with correct "service" URI and PGSERVICEFILE', + sql => "SELECT 'connect1_2'", + expected_stdout => qr/connect1_2/); + + $dummy_node->connect_fails( + 'service=undefined-service', + 'connection with incorrect "service" string and PGSERVICEFILE', + expected_stderr => + qr/definition of service "undefined-service" not found/); + + local $ENV{PGSERVICE} = 'my_srv'; + + $dummy_node->connect_ok( + '', + 'connection with correct PGSERVICE and PGSERVICEFILE', + sql => "SELECT 'connect1_3'", + expected_stdout => qr/connect1_3/); + + local $ENV{PGSERVICE} = 'undefined-service'; + + $dummy_node->connect_fails( + '', + 'connection with incorrect PGSERVICE and PGSERVICEFILE', + expected_stdout => + qr/definition of service "undefined-service" not found/); +} + +# Checks case of incorrect service file. +{ + local $ENV{PGSERVICEFILE} = $srvfile_missing; + + $dummy_node->connect_fails( + 'service=my_srv', + 'connection with correct "service" string and incorrect PGSERVICEFILE', + expected_stderr => + qr/service file ".*pg_service_missing.conf" not found/); +} + +# Checks case of service file named "pg_service.conf" in PGSYSCONFDIR. +{ + # Create copy of valid file + my $srvfile_default = "$td/pg_service.conf"; + copy($srvfile_valid, $srvfile_default); + + $dummy_node->connect_ok( + 'service=my_srv', + 'connection with correct "service" string and pg_service.conf', + sql => "SELECT 'connect2_1'", + expected_stdout => qr/connect2_1/); + + $dummy_node->connect_ok( + 'postgres://?service=my_srv', + 'connection with correct "service" URI and default pg_service.conf', + sql => "SELECT 'connect2_2'", + expected_stdout => qr/connect2_2/); + + $dummy_node->connect_fails( + 'service=undefined-service', + 'connection with incorrect "service" string and default pg_service.conf', + expected_stderr => + qr/definition of service "undefined-service" not found/); + + local $ENV{PGSERVICE} = 'my_srv'; + + $dummy_node->connect_ok( + '', + 'connection with correct PGSERVICE and default pg_service.conf', + sql => "SELECT 'connect2_3'", + expected_stdout => qr/connect2_3/); + + local $ENV{PGSERVICE} = 'undefined-service'; + + $dummy_node->connect_fails( + '', + 'connection with incorrect PGSERVICE and default pg_service.conf', + expected_stdout => + qr/definition of service "undefined-service" not found/); + + # Remove default pg_service.conf. + unlink($srvfile_default); +} + +$node->teardown_node; + +done_testing(); From 2e2e7ff7b891fcb880c1c221e41208dcb0787290 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Fri, 15 Aug 2025 10:29:16 +0200 Subject: [PATCH 513/602] Fix git whitespace warning Recent changes to src/tools/ci/README triggered warnings like src/tools/ci/README:88: leftover conflict marker Raise conflict-marker-size in .gitattributes to avoid these. --- .gitattributes | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitattributes b/.gitattributes index 8df6b75e653b8..4e26bbfb14552 100644 --- a/.gitattributes +++ b/.gitattributes @@ -12,8 +12,8 @@ *.xsl whitespace=space-before-tab,trailing-space,tab-in-indent # Avoid confusing ASCII underlines with leftover merge conflict markers -README conflict-marker-size=32 -README.* conflict-marker-size=32 +README conflict-marker-size=48 +README.* conflict-marker-size=48 # Certain data files that contain special whitespace, and other special cases *.data -whitespace From 37265ca01f0f361a3aff23e27dfc81b66057a039 Mon Sep 17 00:00:00 2001 From: Masahiko Sawada Date: Fri, 15 Aug 2025 11:58:53 -0700 Subject: [PATCH 514/602] Fix constant when extracting timestamp from UUIDv7. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When extracting a timestamp from a UUIDv7, a conversion from milliseconds to microseconds was using the incorrect constant NS_PER_US instead of US_PER_MS. Although both constants have the same value, this fix improves code clarity by using the semantically correct constant. Backpatch to v18, where UUIDv7 was introduced. Author: Erik Nordström Reviewed-by: Andrey Borodin Reviewed-by: Masahiko Sawada Discussion: https://postgr.es/m/CACAa4V+i07eaP6h4MHNydZeX47kkLPwAg0sqe67R=M5tLdxNuQ@mail.gmail.com Backpatch-through: 18 --- src/backend/utils/adt/uuid.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/backend/utils/adt/uuid.c b/src/backend/utils/adt/uuid.c index 7413239f7af16..e5f27ff892ba6 100644 --- a/src/backend/utils/adt/uuid.c +++ b/src/backend/utils/adt/uuid.c @@ -748,7 +748,7 @@ uuid_extract_timestamp(PG_FUNCTION_ARGS) + (((uint64) uuid->data[0]) << 40); /* convert ms to us, then adjust */ - ts = (TimestampTz) (tms * NS_PER_US) - + ts = (TimestampTz) (tms * US_PER_MS) - (POSTGRES_EPOCH_JDATE - UNIX_EPOCH_JDATE) * SECS_PER_DAY * USECS_PER_SEC; PG_RETURN_TIMESTAMPTZ(ts); From 928da6ff1254f471941e43f9ad258cc6b1f037e4 Mon Sep 17 00:00:00 2001 From: Masahiko Sawada Date: Sat, 16 Aug 2025 01:11:40 -0700 Subject: [PATCH 515/602] Fix typos in comments. Oversight in commit fd5a1a0c3e56. Author: Tender Wang Discussion: https://postgr.es/m/CAHewXNmTT3M_w4NngG=6G3mdT3iJ6DdncTqV9YnGXBPHW8XYtA@mail.gmail.com --- src/backend/executor/execReplication.c | 2 +- src/backend/replication/logical/worker.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c index 68184f5d671e2..da0cbf41d6f88 100644 --- a/src/backend/executor/execReplication.c +++ b/src/backend/executor/execReplication.c @@ -582,7 +582,7 @@ RelationFindDeletedTupleInfoSeq(Relation rel, TupleTableSlot *searchslot, * IsIndexUsableForFindingDeletedTuple), a full table scan becomes * necessary. In such cases, comparing the entire tuple is not required, * since the remote tuple might not include all column values. Instead, - * the indexed columns alone are suffcient to identify the target tuple + * the indexed columns alone are sufficient to identify the target tuple * (see logicalrep_rel_mark_updatable). */ indexbitmap = RelationGetIndexAttrBitmap(rel, diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c index 59b6ae7719ab1..8e34387345495 100644 --- a/src/backend/replication/logical/worker.c +++ b/src/backend/replication/logical/worker.c @@ -2923,7 +2923,7 @@ apply_handle_update_internal(ApplyExecutionData *edata, /* * Detecting whether the tuple was recently deleted or never existed - * is crucial to avoid misleading the user during confict handling. + * is crucial to avoid misleading the user during conflict handling. */ if (FindDeletedTupleInLocalRel(localrel, localindexoid, remoteslot, &conflicttuple.xmin, @@ -3392,7 +3392,7 @@ apply_handle_tuple_routing(ApplyExecutionData *edata, /* * Detecting whether the tuple was recently deleted or * never existed is crucial to avoid misleading the user - * during confict handling. + * during conflict handling. */ if (FindDeletedTupleInLocalRel(partrel, part_entry->localindexoid, From f57e214d1cbb748b1e9be79e1b30d85fcbc9e340 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Sun, 17 Aug 2025 15:20:01 +0900 Subject: [PATCH 516/602] Add isolation test for TOAST value reuse during CLUSTER This test exercises the corner case in toast_save_datum() where CLUSTER operations encounter duplicated TOAST references, reusing the existing TOAST data instead of creating redundant copies. During table rewrites like CLUSTER, both live and recently-dead versions of a row may reference the same TOAST value. When copying the second or later version of such a row, the system checks if a TOAST value already exists in the new TOAST table using toastrel_valueid_exists(). If found, toast_save_datum() sets data_todo = 0 so as redundant data is not stored, ensuring only one copy of the TOAST value exists in the new table. The test relies on a combination of UPDATE, CLUSTER, and checks of the TOAST values used before and after the relation rewrite, to make sure that the same values are reused across the rewrite. This is a continuation of 69f75d671475 to make sure that this corner case keeps working should we mess with this area of the code. Author: Nikhil Kumar Veldanda Discussion: https://postgr.es/m/CAFAfj_E+kw5P713S8_jZyVgQAGVFfzFiTUJPrgo-TTtJJoazQw@mail.gmail.com --- .../expected/cluster-toast-value-reuse.out | 29 ++++++++ src/test/isolation/isolation_schedule | 1 + .../specs/cluster-toast-value-reuse.spec | 69 +++++++++++++++++++ 3 files changed, 99 insertions(+) create mode 100644 src/test/isolation/expected/cluster-toast-value-reuse.out create mode 100644 src/test/isolation/specs/cluster-toast-value-reuse.spec diff --git a/src/test/isolation/expected/cluster-toast-value-reuse.out b/src/test/isolation/expected/cluster-toast-value-reuse.out new file mode 100644 index 0000000000000..cb14ddcee34b8 --- /dev/null +++ b/src/test/isolation/expected/cluster-toast-value-reuse.out @@ -0,0 +1,29 @@ +Parsed test spec with 2 sessions + +starting permutation: s1_begin s1_update s2_store_chunk_ids s2_cluster s1_commit s2_verify_chunk_ids +step s1_begin: BEGIN; +step s1_update: UPDATE cluster_toast_value SET flag = 1 WHERE TRUE; +step s2_store_chunk_ids: + CREATE TABLE cluster_chunk_id AS + SELECT c.id, pg_column_toast_chunk_id(c.value) AS chunk_id + FROM cluster_toast_value c; + SELECT count(*) FROM cluster_chunk_id; + +count +----- + 1 +(1 row) + +step s2_cluster: CLUSTER cluster_toast_value; +step s1_commit: COMMIT; +step s2_cluster: <... completed> +step s2_verify_chunk_ids: + SELECT o.id AS chunk_ids_preserved + FROM cluster_chunk_id o + JOIN cluster_toast_value c ON o.id = c.id + WHERE o.chunk_id != pg_column_toast_chunk_id(c.value); + +chunk_ids_preserved +------------------- +(0 rows) + diff --git a/src/test/isolation/isolation_schedule b/src/test/isolation/isolation_schedule index 4411d3c86ddad..9f1e997d81b00 100644 --- a/src/test/isolation/isolation_schedule +++ b/src/test/isolation/isolation_schedule @@ -111,6 +111,7 @@ test: partition-key-update-4 test: plpgsql-toast test: cluster-conflict test: cluster-conflict-partition +test: cluster-toast-value-reuse test: truncate-conflict test: serializable-parallel test: serializable-parallel-2 diff --git a/src/test/isolation/specs/cluster-toast-value-reuse.spec b/src/test/isolation/specs/cluster-toast-value-reuse.spec new file mode 100644 index 0000000000000..1930eedb8505f --- /dev/null +++ b/src/test/isolation/specs/cluster-toast-value-reuse.spec @@ -0,0 +1,69 @@ +# Tests with CLUSTER for toast values + +# This test does a relation rewrite, with toast values reused to make the +# rewrite cheaper (see data_todo = 0 case in toast_save_datum()). +# +# A first session updates the table with an attribute not toasted. CLUSTER +# is then executed in a second session. The comparison of the values +# allocated for the toasted values are done using a CTAS. The allocated +# chunk_ids are saved before the rewrite, and compared after the rewrite. + +# ---------- global setup ---------- +setup +{ + DROP TABLE IF EXISTS cluster_toast_value CASCADE; + DROP TABLE IF EXISTS cluster_chunk_id CASCADE; + + CREATE TABLE cluster_toast_value ( + id serial PRIMARY KEY, + flag integer, + value text); + + -- Make sure 'value' is large enough to be toasted. + ALTER TABLE cluster_toast_value ALTER COLUMN value SET STORAGE EXTERNAL; + + -- Clustering index. + CLUSTER cluster_toast_value_pkey ON cluster_toast_value; + + -- Seed data: one row with big string to force TOAST tuple and trigger the todo=0 code path. + INSERT INTO cluster_toast_value(flag, value) + VALUES (0, repeat(md5('1'), 120) || repeat('x', 8000)); + + CLUSTER cluster_toast_value; +} + +teardown +{ + DROP TABLE IF EXISTS cluster_toast_value; + DROP TABLE IF EXISTS cluster_chunk_id; +} + +session s1 +step s1_begin { BEGIN; } +step s1_update { UPDATE cluster_toast_value SET flag = 1 WHERE TRUE; } +step s1_commit { COMMIT; } + +session s2 +# Store the primary key values and their associated chunk IDs. This makes +# sure that some data is captured. +step s2_store_chunk_ids { + CREATE TABLE cluster_chunk_id AS + SELECT c.id, pg_column_toast_chunk_id(c.value) AS chunk_id + FROM cluster_toast_value c; + SELECT count(*) FROM cluster_chunk_id; +} +step s2_cluster { CLUSTER cluster_toast_value; } + +# Verify that toast values allocated are the same, indicating reuse. +# This query reports the tuples with toast values that do not match. +step s2_verify_chunk_ids { + SELECT o.id AS chunk_ids_preserved + FROM cluster_chunk_id o + JOIN cluster_toast_value c ON o.id = c.id + WHERE o.chunk_id != pg_column_toast_chunk_id(c.value); +} + +# Run UPDATE with its transaction still open, then store the chunk IDs. +# CLUSTER will wait until the first transaction commit. Finally, the chunk +# IDs are compared. +permutation s1_begin s1_update s2_store_chunk_ids s2_cluster s1_commit s2_verify_chunk_ids From 5a8ab650a782ffb78325fa66607a5fbb25c5c74a Mon Sep 17 00:00:00 2001 From: Etsuro Fujita Date: Sun, 17 Aug 2025 19:40:00 +0900 Subject: [PATCH 517/602] Update obsolete comments in ResultRelInfo struct. Commit c5b7ba4e6 changed things so that the ri_RootResultRelInfo field of this struct is set for both partitions and inheritance children and used for tuple routing and transition capture (before that commit, it was only set for partitions to route tuples into), but failed to update these comments. Author: Etsuro Fujita Reviewed-by: Dean Rasheed Discussion: https://postgr.es/m/CAPmGK14NF5CcdCmTZpxrvpvBiT0y4EqKikW1r_wAu1CEHeOmUA%40mail.gmail.com Backpatch-through: 14 --- src/include/nodes/execnodes.h | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index e107d6e5f8174..de782014b2d41 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -609,15 +609,13 @@ typedef struct ResultRelInfo bool ri_RootToChildMapValid; /* - * Information needed by tuple routing target relations + * Other information needed by child result relations * - * RootResultRelInfo gives the target relation mentioned in the query, if - * it's a partitioned table. It is not set if the target relation - * mentioned in the query is an inherited table, nor when tuple routing is - * not needed. + * ri_RootResultRelInfo gives the target relation mentioned in the query. + * Used as the root for tuple routing and/or transition capture. * - * PartitionTupleSlot is non-NULL if RootToChild conversion is needed and - * the relation is a partition. + * ri_PartitionTupleSlot is non-NULL if the relation is a partition to + * route tuples into and ri_RootToChildMap conversion is needed. */ struct ResultRelInfo *ri_RootResultRelInfo; TupleTableSlot *ri_PartitionTupleSlot; From 97ca67377aa289762af432c96c5204685b3ecd21 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Mon, 18 Aug 2025 08:18:09 +0900 Subject: [PATCH 518/602] Remove md5() call from isolation test for CLUSTER and TOAST This test was failing because MD5 computations are not supported in these environments. This switches the test to rely on sha256() instead, providing the same coverage while avoiding the failure. Oversight in f57e214d1cbb. Per buildfarm members gecko, molamola, shikra and froghopper. Discussion: https://postgr.es/m/aKJijS2ZRfRZiYb0@paquier.xyz --- src/test/isolation/specs/cluster-toast-value-reuse.spec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/isolation/specs/cluster-toast-value-reuse.spec b/src/test/isolation/specs/cluster-toast-value-reuse.spec index 1930eedb8505f..9a2d10600b39f 100644 --- a/src/test/isolation/specs/cluster-toast-value-reuse.spec +++ b/src/test/isolation/specs/cluster-toast-value-reuse.spec @@ -27,7 +27,7 @@ setup -- Seed data: one row with big string to force TOAST tuple and trigger the todo=0 code path. INSERT INTO cluster_toast_value(flag, value) - VALUES (0, repeat(md5('1'), 120) || repeat('x', 8000)); + VALUES (0, repeat(encode(sha256('1'), 'hex'), 120) || repeat('x', 8000)); CLUSTER cluster_toast_value; } From ba3d93b2e806a877f26922e0f9e1845d0ef1511b Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Mon, 18 Aug 2025 11:38:44 +0900 Subject: [PATCH 519/602] Refactor init_params() in sequence.c to not use FormData_pg_sequence_data init_params() sets up "last_value" and "is_called" for a sequence relation holdind its metadata, based on the sequence properties in pg_sequences. "log_cnt" is the third property that can be updated in this routine for FormData_pg_sequence_data, tracking when WAL records should be generated for a sequence after nextval() iterations. This routine is called when creating or altering a sequence. This commit refactors init_params() to not depend anymore on FormData_pg_sequence_data, removing traces of it in sequence.c, making easier the manipulation of metadata related to sequences. The knowledge about "log_cnt" is replaced with a more general "reset_state" flag, to let the caller know if the sequence state should be reset. In the case of in-core sequences, this relates to WAL logging. We still need to depend on FormData_pg_sequence. Author: Michael Paquier Reviewed-by: Kirill Reshke Reviewed-by: Tomas Vondra Discussion: https://postgr.es/m/ZWlohtKAs0uVVpZ3@paquier.xyz --- src/backend/commands/sequence.c | 81 ++++++++++++++++++++------------- 1 file changed, 49 insertions(+), 32 deletions(-) diff --git a/src/backend/commands/sequence.c b/src/backend/commands/sequence.c index 451ae6f7f6940..a3c8cff97b03b 100644 --- a/src/backend/commands/sequence.c +++ b/src/backend/commands/sequence.c @@ -106,7 +106,9 @@ static Form_pg_sequence_data read_seq_tuple(Relation rel, static void init_params(ParseState *pstate, List *options, bool for_identity, bool isInit, Form_pg_sequence seqform, - Form_pg_sequence_data seqdataform, + int64 *last_value, + bool *reset_state, + bool *is_called, bool *need_seq_rewrite, List **owned_by); static void do_setval(Oid relid, int64 next, bool iscalled); @@ -121,7 +123,9 @@ ObjectAddress DefineSequence(ParseState *pstate, CreateSeqStmt *seq) { FormData_pg_sequence seqform; - FormData_pg_sequence_data seqdataform; + int64 last_value; + bool reset_state; + bool is_called; bool need_seq_rewrite; List *owned_by; CreateStmt *stmt = makeNode(CreateStmt); @@ -164,7 +168,7 @@ DefineSequence(ParseState *pstate, CreateSeqStmt *seq) /* Check and set all option values */ init_params(pstate, seq->options, seq->for_identity, true, - &seqform, &seqdataform, + &seqform, &last_value, &reset_state, &is_called, &need_seq_rewrite, &owned_by); /* @@ -179,7 +183,7 @@ DefineSequence(ParseState *pstate, CreateSeqStmt *seq) { case SEQ_COL_LASTVAL: coldef = makeColumnDef("last_value", INT8OID, -1, InvalidOid); - value[i - 1] = Int64GetDatumFast(seqdataform.last_value); + value[i - 1] = Int64GetDatumFast(last_value); break; case SEQ_COL_LOG: coldef = makeColumnDef("log_cnt", INT8OID, -1, InvalidOid); @@ -448,6 +452,9 @@ AlterSequence(ParseState *pstate, AlterSeqStmt *stmt) ObjectAddress address; Relation rel; HeapTuple seqtuple; + bool reset_state = false; + bool is_called; + int64 last_value; HeapTuple newdatatuple; /* Open and lock sequence, and check for ownership along the way. */ @@ -481,12 +488,14 @@ AlterSequence(ParseState *pstate, AlterSeqStmt *stmt) /* copy the existing sequence data tuple, so it can be modified locally */ newdatatuple = heap_copytuple(&datatuple); newdataform = (Form_pg_sequence_data) GETSTRUCT(newdatatuple); + last_value = newdataform->last_value; + is_called = newdataform->is_called; UnlockReleaseBuffer(buf); /* Check and set new values */ init_params(pstate, stmt->options, stmt->for_identity, false, - seqform, newdataform, + seqform, &last_value, &reset_state, &is_called, &need_seq_rewrite, &owned_by); /* If needed, rewrite the sequence relation itself */ @@ -513,6 +522,10 @@ AlterSequence(ParseState *pstate, AlterSeqStmt *stmt) /* * Insert the modified tuple into the new storage file. */ + newdataform->last_value = last_value; + newdataform->is_called = is_called; + if (reset_state) + newdataform->log_cnt = 0; fill_seq_with_data(seqrel, newdatatuple); } @@ -1236,17 +1249,19 @@ read_seq_tuple(Relation rel, Buffer *buf, HeapTuple seqdatatuple) /* * init_params: process the options list of CREATE or ALTER SEQUENCE, and * store the values into appropriate fields of seqform, for changes that go - * into the pg_sequence catalog, and fields of seqdataform for changes to the - * sequence relation itself. Set *need_seq_rewrite to true if we changed any - * parameters that require rewriting the sequence's relation (interesting for - * ALTER SEQUENCE). Also set *owned_by to any OWNED BY option, or to NIL if - * there is none. + * into the pg_sequence catalog, and fields for changes to the sequence + * relation itself (*is_called, *last_value and *reset_state). Set + * *need_seq_rewrite to true if we changed any parameters that require + * rewriting the sequence's relation (interesting for ALTER SEQUENCE). Also + * set *owned_by to any OWNED BY option, or to NIL if there is none. Set + * *reset_state to true if the internal state of the sequence needs to be + * reset, affecting future nextval() calls, for example with WAL logging. * * If isInit is true, fill any unspecified options with default values; * otherwise, do not change existing options that aren't explicitly overridden. * * Note: we force a sequence rewrite whenever we change parameters that affect - * generation of future sequence values, even if the seqdataform per se is not + * generation of future sequence values, even if the metadata per se is not * changed. This allows ALTER SEQUENCE to behave transactionally. Currently, * the only option that doesn't cause that is OWNED BY. It's *necessary* for * ALTER SEQUENCE OWNED BY to not rewrite the sequence, because that would @@ -1257,7 +1272,9 @@ static void init_params(ParseState *pstate, List *options, bool for_identity, bool isInit, Form_pg_sequence seqform, - Form_pg_sequence_data seqdataform, + int64 *last_value, + bool *reset_state, + bool *is_called, bool *need_seq_rewrite, List **owned_by) { @@ -1363,11 +1380,11 @@ init_params(ParseState *pstate, List *options, bool for_identity, } /* - * We must reset log_cnt when isInit or when changing any parameters that - * would affect future nextval allocations. + * We must reset the state of the sequence when isInit or when changing + * any parameters that would affect future nextval allocations. */ if (isInit) - seqdataform->log_cnt = 0; + *reset_state = true; /* AS type */ if (as_type != NULL) @@ -1416,7 +1433,7 @@ init_params(ParseState *pstate, List *options, bool for_identity, ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("INCREMENT must not be zero"))); - seqdataform->log_cnt = 0; + *reset_state = true; } else if (isInit) { @@ -1428,7 +1445,7 @@ init_params(ParseState *pstate, List *options, bool for_identity, { seqform->seqcycle = boolVal(is_cycled->arg); Assert(BoolIsValid(seqform->seqcycle)); - seqdataform->log_cnt = 0; + *reset_state = true; } else if (isInit) { @@ -1439,7 +1456,7 @@ init_params(ParseState *pstate, List *options, bool for_identity, if (max_value != NULL && max_value->arg) { seqform->seqmax = defGetInt64(max_value); - seqdataform->log_cnt = 0; + *reset_state = true; } else if (isInit || max_value != NULL || reset_max_value) { @@ -1455,7 +1472,7 @@ init_params(ParseState *pstate, List *options, bool for_identity, } else seqform->seqmax = -1; /* descending seq */ - seqdataform->log_cnt = 0; + *reset_state = true; } /* Validate maximum value. No need to check INT8 as seqmax is an int64 */ @@ -1471,7 +1488,7 @@ init_params(ParseState *pstate, List *options, bool for_identity, if (min_value != NULL && min_value->arg) { seqform->seqmin = defGetInt64(min_value); - seqdataform->log_cnt = 0; + *reset_state = true; } else if (isInit || min_value != NULL || reset_min_value) { @@ -1487,7 +1504,7 @@ init_params(ParseState *pstate, List *options, bool for_identity, } else seqform->seqmin = 1; /* ascending seq */ - seqdataform->log_cnt = 0; + *reset_state = true; } /* Validate minimum value. No need to check INT8 as seqmin is an int64 */ @@ -1538,30 +1555,30 @@ init_params(ParseState *pstate, List *options, bool for_identity, if (restart_value != NULL) { if (restart_value->arg != NULL) - seqdataform->last_value = defGetInt64(restart_value); + *last_value = defGetInt64(restart_value); else - seqdataform->last_value = seqform->seqstart; - seqdataform->is_called = false; - seqdataform->log_cnt = 0; + *last_value = seqform->seqstart; + *is_called = false; + *reset_state = true; } else if (isInit) { - seqdataform->last_value = seqform->seqstart; - seqdataform->is_called = false; + *last_value = seqform->seqstart; + *is_called = false; } /* crosscheck RESTART (or current value, if changing MIN/MAX) */ - if (seqdataform->last_value < seqform->seqmin) + if (*last_value < seqform->seqmin) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("RESTART value (%" PRId64 ") cannot be less than MINVALUE (%" PRId64 ")", - seqdataform->last_value, + *last_value, seqform->seqmin))); - if (seqdataform->last_value > seqform->seqmax) + if (*last_value > seqform->seqmax) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("RESTART value (%" PRId64 ") cannot be greater than MAXVALUE (%" PRId64 ")", - seqdataform->last_value, + *last_value, seqform->seqmax))); /* CACHE */ @@ -1573,7 +1590,7 @@ init_params(ParseState *pstate, List *options, bool for_identity, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("CACHE (%" PRId64 ") must be greater than zero", seqform->seqcache))); - seqdataform->log_cnt = 0; + *reset_state = true; } else if (isInit) { From 4a4038068bb29eff2516296fc842b88741625afd Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Mon, 18 Aug 2025 07:35:55 +0200 Subject: [PATCH 520/602] meson: Move C99 test earlier Move the test for compiler options for C99 earlier in meson.build, before we make use of the compiler for other tests. That way, if any command-line options are needed, subsequent tests will also use them. This is at the moment a theoretical problem, but it seems better to get this correct. It also matches the order in the Autoconf-based build more closely. Discussion: https://www.postgresql.org/message-id/flat/01a69441-af54-4822-891b-ca28e05b215a@eisentraut.org --- meson.build | 85 ++++++++++++++++++++++++++--------------------------- 1 file changed, 42 insertions(+), 43 deletions(-) diff --git a/meson.build b/meson.build index 30e0edda3e793..a87eb913fff30 100644 --- a/meson.build +++ b/meson.build @@ -550,6 +550,48 @@ dir_doc_extension = dir_doc / 'extension' # used, they need to be added to test_c_args as well. ############################################################### +# Do we need -std=c99 to compile C99 code? We don't want to add -std=c99 +# unnecessarily, because we optionally rely on newer features. +c99_test = ''' +#include +#include +#include +#include + +struct named_init_test { + int a; + int b; +}; + +extern void structfunc(struct named_init_test); + +int main(int argc, char **argv) +{ + struct named_init_test nit = { + .a = 3, + .b = 5, + }; + + for (int loop_var = 0; loop_var < 3; loop_var++) + { + nit.a += nit.b; + } + + structfunc((struct named_init_test){1, 0}); + + return nit.a != 0; +} +''' + +if not cc.compiles(c99_test, name: 'c99') + if cc.compiles(c99_test, name: 'c99 with -std=c99', args: ['-std=c99']) + cflags += '-std=c99' + else + error('C compiler does not support C99') + endif +endif + + postgres_inc = [include_directories(postgres_inc_d)] test_lib_d = postgres_lib_d test_c_args = cppflags + cflags @@ -1704,49 +1746,6 @@ endif # Compiler tests ############################################################### -# Do we need -std=c99 to compile C99 code? We don't want to add -std=c99 -# unnecessarily, because we optionally rely on newer features. -c99_test = ''' -#include -#include -#include -#include - -struct named_init_test { - int a; - int b; -}; - -extern void structfunc(struct named_init_test); - -int main(int argc, char **argv) -{ - struct named_init_test nit = { - .a = 3, - .b = 5, - }; - - for (int loop_var = 0; loop_var < 3; loop_var++) - { - nit.a += nit.b; - } - - structfunc((struct named_init_test){1, 0}); - - return nit.a != 0; -} -''' - -if not cc.compiles(c99_test, name: 'c99', args: test_c_args) - if cc.compiles(c99_test, name: 'c99 with -std=c99', - args: test_c_args + ['-std=c99']) - test_c_args += '-std=c99' - cflags += '-std=c99' - else - error('C compiler does not support C99') - endif -endif - if host_machine.endian() == 'big' cdata.set('WORDS_BIGENDIAN', 1) endif From df9133fa63843627fb3579c89d892dc3d9ea2b95 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Mon, 18 Aug 2025 14:52:34 +0900 Subject: [PATCH 521/602] Move SQL-callable code related to multixacts into its own file A patch is under discussion to add more SQL capabilities related to multixacts, and this move avoids bloating the file more than necessary. This affects pg_get_multixact_members(). A side effect of this move is the requirement to add mxstatus_to_string() to multixact.h. Extracted from a larger patch by the same author, tweaked by me. Author: Naga Appani Reviewed-by: Michael Paquier Reviewed-by: Ashutosh Bapat Discussion: https://postgr.es/m/CA+QeY+AAsYK6WvBW4qYzHz4bahHycDAY_q5ECmHkEV_eB9ckzg@mail.gmail.com --- src/backend/access/transam/multixact.c | 66 +------------------ src/backend/utils/adt/Makefile | 1 + src/backend/utils/adt/meson.build | 1 + src/backend/utils/adt/multixactfuncs.c | 87 ++++++++++++++++++++++++++ src/include/access/multixact.h | 1 + 5 files changed, 91 insertions(+), 65 deletions(-) create mode 100644 src/backend/utils/adt/multixactfuncs.c diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c index 3cb09c3d5987c..886740d2d5534 100644 --- a/src/backend/access/transam/multixact.c +++ b/src/backend/access/transam/multixact.c @@ -398,8 +398,6 @@ static int mXactCacheGetById(MultiXactId multi, MultiXactMember **members); static void mXactCachePut(MultiXactId multi, int nmembers, MultiXactMember *members); -static char *mxstatus_to_string(MultiXactStatus status); - /* management of SLRU infrastructure */ static bool MultiXactOffsetPagePrecedes(int64 page1, int64 page2); static bool MultiXactMemberPagePrecedes(int64 page1, int64 page2); @@ -1747,7 +1745,7 @@ mXactCachePut(MultiXactId multi, int nmembers, MultiXactMember *members) } } -static char * +char * mxstatus_to_string(MultiXactStatus status) { switch (status) @@ -3414,68 +3412,6 @@ multixact_redo(XLogReaderState *record) elog(PANIC, "multixact_redo: unknown op code %u", info); } -Datum -pg_get_multixact_members(PG_FUNCTION_ARGS) -{ - typedef struct - { - MultiXactMember *members; - int nmembers; - int iter; - } mxact; - MultiXactId mxid = PG_GETARG_TRANSACTIONID(0); - mxact *multi; - FuncCallContext *funccxt; - - if (mxid < FirstMultiXactId) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("invalid MultiXactId: %u", mxid))); - - if (SRF_IS_FIRSTCALL()) - { - MemoryContext oldcxt; - TupleDesc tupdesc; - - funccxt = SRF_FIRSTCALL_INIT(); - oldcxt = MemoryContextSwitchTo(funccxt->multi_call_memory_ctx); - - multi = palloc(sizeof(mxact)); - /* no need to allow for old values here */ - multi->nmembers = GetMultiXactIdMembers(mxid, &multi->members, false, - false); - multi->iter = 0; - - if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) - elog(ERROR, "return type must be a row type"); - funccxt->tuple_desc = tupdesc; - funccxt->attinmeta = TupleDescGetAttInMetadata(tupdesc); - funccxt->user_fctx = multi; - - MemoryContextSwitchTo(oldcxt); - } - - funccxt = SRF_PERCALL_SETUP(); - multi = (mxact *) funccxt->user_fctx; - - while (multi->iter < multi->nmembers) - { - HeapTuple tuple; - char *values[2]; - - values[0] = psprintf("%u", multi->members[multi->iter].xid); - values[1] = mxstatus_to_string(multi->members[multi->iter].status); - - tuple = BuildTupleFromCStrings(funccxt->attinmeta, values); - - multi->iter++; - pfree(values[0]); - SRF_RETURN_NEXT(funccxt, HeapTupleGetDatum(tuple)); - } - - SRF_RETURN_DONE(funccxt); -} - /* * Entrypoint for sync.c to sync offsets files. */ diff --git a/src/backend/utils/adt/Makefile b/src/backend/utils/adt/Makefile index ffeacf2b819f3..cc68ac545a5f0 100644 --- a/src/backend/utils/adt/Makefile +++ b/src/backend/utils/adt/Makefile @@ -68,6 +68,7 @@ OBJS = \ misc.o \ multirangetypes.o \ multirangetypes_selfuncs.o \ + multixactfuncs.o \ name.o \ network.o \ network_gist.o \ diff --git a/src/backend/utils/adt/meson.build b/src/backend/utils/adt/meson.build index ed9bbd7b9266b..dac372c3bea3b 100644 --- a/src/backend/utils/adt/meson.build +++ b/src/backend/utils/adt/meson.build @@ -55,6 +55,7 @@ backend_sources += files( 'misc.c', 'multirangetypes.c', 'multirangetypes_selfuncs.c', + 'multixactfuncs.c', 'name.c', 'network.c', 'network_gist.c', diff --git a/src/backend/utils/adt/multixactfuncs.c b/src/backend/utils/adt/multixactfuncs.c new file mode 100644 index 0000000000000..e74ea93834860 --- /dev/null +++ b/src/backend/utils/adt/multixactfuncs.c @@ -0,0 +1,87 @@ +/*------------------------------------------------------------------------- + * + * multixactfuncs.c + * Functions for accessing multixact-related data. + * + * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/utils/adt/multixactfuncs.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "access/multixact.h" +#include "funcapi.h" +#include "utils/builtins.h" + +/* + * pg_get_multixact_members + * + * Returns information about the MultiXactMembers of the specified + * MultiXactId. + */ +Datum +pg_get_multixact_members(PG_FUNCTION_ARGS) +{ + typedef struct + { + MultiXactMember *members; + int nmembers; + int iter; + } mxact; + MultiXactId mxid = PG_GETARG_TRANSACTIONID(0); + mxact *multi; + FuncCallContext *funccxt; + + if (mxid < FirstMultiXactId) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid MultiXactId: %u", mxid))); + + if (SRF_IS_FIRSTCALL()) + { + MemoryContext oldcxt; + TupleDesc tupdesc; + + funccxt = SRF_FIRSTCALL_INIT(); + oldcxt = MemoryContextSwitchTo(funccxt->multi_call_memory_ctx); + + multi = palloc(sizeof(mxact)); + /* no need to allow for old values here */ + multi->nmembers = GetMultiXactIdMembers(mxid, &multi->members, false, + false); + multi->iter = 0; + + if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + funccxt->tuple_desc = tupdesc; + funccxt->attinmeta = TupleDescGetAttInMetadata(tupdesc); + funccxt->user_fctx = multi; + + MemoryContextSwitchTo(oldcxt); + } + + funccxt = SRF_PERCALL_SETUP(); + multi = (mxact *) funccxt->user_fctx; + + while (multi->iter < multi->nmembers) + { + HeapTuple tuple; + char *values[2]; + + values[0] = psprintf("%u", multi->members[multi->iter].xid); + values[1] = mxstatus_to_string(multi->members[multi->iter].status); + + tuple = BuildTupleFromCStrings(funccxt->attinmeta, values); + + multi->iter++; + pfree(values[0]); + SRF_RETURN_NEXT(funccxt, HeapTupleGetDatum(tuple)); + } + + SRF_RETURN_DONE(funccxt); +} diff --git a/src/include/access/multixact.h b/src/include/access/multixact.h index b876e98f46ed7..6607b645a1820 100644 --- a/src/include/access/multixact.h +++ b/src/include/access/multixact.h @@ -158,5 +158,6 @@ extern void multixact_desc(StringInfo buf, XLogReaderState *record); extern const char *multixact_identify(uint8 info); extern char *mxid_to_string(MultiXactId multi, int nmembers, MultiXactMember *members); +extern char *mxstatus_to_string(MultiXactStatus status); #endif /* MULTIXACT_H */ From c61d51d50006a2a7bfe25d62ea0677e318febedc Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Mon, 18 Aug 2025 11:03:22 +0200 Subject: [PATCH 522/602] Detect buffer underflow in get_th() Input with zero length can result in a buffer underflow when accessing *(num + (len - 1)), as (len - 1) would produce a negative index. Add an assertion for zero-length input to prevent it. This was found by ALT Linux Team. Reviewing the call sites shows that get_th() currently cannot be applied to an empty string: it is always called on a string containing a number we've just printed. Therefore, an assertion rather than a user-facing error message is sufficient. Co-authored-by: Alexander Kuznetsov Discussion: https://www.postgresql.org/message-id/flat/e22df993-cdb4-4d0a-b629-42211ebed582@altlinux.org --- src/backend/utils/adt/formatting.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c index 1d05481181db7..7ad453314c307 100644 --- a/src/backend/utils/adt/formatting.c +++ b/src/backend/utils/adt/formatting.c @@ -1565,6 +1565,8 @@ get_th(char *num, int type) int len = strlen(num), last; + Assert(len > 0); + last = *(num + (len - 1)); if (!isdigit((unsigned char) last)) ereport(ERROR, From 5e8f05cd70a6a5446801b4c733874bd6603eba7b Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Mon, 18 Aug 2025 14:54:59 -0400 Subject: [PATCH 523/602] Fix missing "use Test::More" in Kerberos.pm. Apparently the only Test::More function this script uses is BAIL_OUT, so this omission just results in the wrong error output appearing in the cases where it bails out. Seems to have been an oversight in commit 9f899562d which split Kerberos.pm out of another script. Author: Maxim Orlov Reviewed-by: Tom Lane Discussion: https://postgr.es/m/CACG=ezY1Dp-S94b78nN0ZuaBGGcMUB6_nF-VyYUwPt1ArFqmGA@mail.gmail.com Backpatch-through: 17 --- src/test/perl/PostgreSQL/Test/Kerberos.pm | 1 + 1 file changed, 1 insertion(+) diff --git a/src/test/perl/PostgreSQL/Test/Kerberos.pm b/src/test/perl/PostgreSQL/Test/Kerberos.pm index b72dd2fbaf414..07a1ea899d031 100644 --- a/src/test/perl/PostgreSQL/Test/Kerberos.pm +++ b/src/test/perl/PostgreSQL/Test/Kerberos.pm @@ -9,6 +9,7 @@ package PostgreSQL::Test::Kerberos; use strict; use warnings FATAL => 'all'; use PostgreSQL::Test::Utils; +use Test::More; our ( $krb5_bin_dir, $krb5_sbin_dir, $krb5_config, $kinit, From 05fcb9667c371fda93830b87269c01d74ca247e5 Mon Sep 17 00:00:00 2001 From: David Rowley Date: Tue, 19 Aug 2025 10:57:44 +1200 Subject: [PATCH 524/602] Use elog(DEBUG4) for dynahash.c statistics output Previously this was being output to stderr. This commit adjusts things to use elog(DEBUG4). Here we also adjust the format of the message to add the hash table name and also put the message on a single line. This should make grepping the logs for this information easier. Also get rid of the global hash table statistics. This seems very dated and didn't fit very well with trying to put all the statistics for a specific hash table on a single log line. The main aim here is to allow it so we can have at least one buildfarm member build with HASH_STATISTICS to help prevent future changes from breaking things in that area. ca3891251 recently fixed some issues here. In passing, switch to using uint64 data types rather than longs for the usage counters. The long type is 32 bits on some platforms we support. Author: David Rowley Reviewed-by: Tom Lane Reviewed-by: Michael Paquier Discussion: https://postgr.es/m/CAApHDvoccvJ9CG5zx+i-EyCzJbcL5K=CzqrnL_YN59qaL5hiaw@mail.gmail.com --- src/backend/utils/hash/dynahash.c | 40 +++++++++++-------------------- src/include/utils/hsearch.h | 2 +- 2 files changed, 15 insertions(+), 27 deletions(-) diff --git a/src/backend/utils/hash/dynahash.c b/src/backend/utils/hash/dynahash.c index d08435510076e..2b22c65e541d4 100644 --- a/src/backend/utils/hash/dynahash.c +++ b/src/backend/utils/hash/dynahash.c @@ -205,8 +205,9 @@ struct HASHHDR * Count statistics here. NB: stats code doesn't bother with mutex, so * counts could be corrupted a bit in a partitioned table. */ - long accesses; - long collisions; + uint64 accesses; + uint64 collisions; + uint64 expansions; #endif }; @@ -266,12 +267,6 @@ struct HTAB */ #define MOD(x,y) ((x) & ((y)-1)) -#ifdef HASH_STATISTICS -static long hash_accesses, - hash_collisions, - hash_expansions; -#endif - /* * Private function prototypes */ @@ -661,7 +656,7 @@ hdefault(HTAB *hashp) hctl->isfixed = false; /* can be enlarged */ #ifdef HASH_STATISTICS - hctl->accesses = hctl->collisions = 0; + hctl->accesses = hctl->collisions = hctl->expansions = 0; #endif } @@ -888,7 +883,7 @@ hash_destroy(HTAB *hashp) /* so this hashtable must have its own context */ Assert(hashp->hcxt != NULL); - hash_stats("destroy", hashp); + hash_stats(__func__, hashp); /* * Free everything by destroying the hash table's memory context. @@ -898,19 +893,16 @@ hash_destroy(HTAB *hashp) } void -hash_stats(const char *where, HTAB *hashp) +hash_stats(const char *caller, HTAB *hashp) { #ifdef HASH_STATISTICS - fprintf(stderr, "%s: this HTAB -- accesses %ld collisions %ld\n", - where, hashp->hctl->accesses, hashp->hctl->collisions); - - fprintf(stderr, "hash_stats: entries %ld keysize %ld maxp %u segmentcount %ld\n", - hash_get_num_entries(hashp), (long) hashp->hctl->keysize, - hashp->hctl->max_bucket, hashp->hctl->nsegs); - fprintf(stderr, "%s: total accesses %ld total collisions %ld\n", - where, hash_accesses, hash_collisions); - fprintf(stderr, "hash_stats: total expansions %ld\n", - hash_expansions); + HASHHDR *hctl = hashp->hctl; + + elog(DEBUG4, + "hash_stats: Caller: %s Table Name: \"%s\" Accesses: " UINT64_FORMAT " Collisions: " UINT64_FORMAT " Expansions: " UINT64_FORMAT " Entries: %ld Key Size: %zu Max Bucket: %u Segment Count: %ld", + caller != NULL ? caller : "(unknown)", hashp->tabname, hctl->accesses, + hctl->collisions, hctl->expansions, hash_get_num_entries(hashp), + hctl->keysize, hctl->max_bucket, hctl->nsegs); #endif } @@ -996,7 +988,6 @@ hash_search_with_hash_value(HTAB *hashp, HashCompareFunc match; #ifdef HASH_STATISTICS - hash_accesses++; hctl->accesses++; #endif @@ -1040,7 +1031,6 @@ hash_search_with_hash_value(HTAB *hashp, prevBucketPtr = &(currBucket->link); currBucket = *prevBucketPtr; #ifdef HASH_STATISTICS - hash_collisions++; hctl->collisions++; #endif } @@ -1176,7 +1166,6 @@ hash_update_hash_key(HTAB *hashp, #ifdef HASH_STATISTICS HASHHDR *hctl = hashp->hctl; - hash_accesses++; hctl->accesses++; #endif @@ -1230,7 +1219,6 @@ hash_update_hash_key(HTAB *hashp, prevBucketPtr = &(currBucket->link); currBucket = *prevBucketPtr; #ifdef HASH_STATISTICS - hash_collisions++; hctl->collisions++; #endif } @@ -1586,7 +1574,7 @@ expand_table(HTAB *hashp) Assert(!IS_PARTITIONED(hctl)); #ifdef HASH_STATISTICS - hash_expansions++; + hctl->expansions++; #endif new_bucket = hctl->max_bucket + 1; diff --git a/src/include/utils/hsearch.h b/src/include/utils/hsearch.h index 932cc4f34d90d..80deb8e543e6f 100644 --- a/src/include/utils/hsearch.h +++ b/src/include/utils/hsearch.h @@ -132,7 +132,7 @@ typedef struct extern HTAB *hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags); extern void hash_destroy(HTAB *hashp); -extern void hash_stats(const char *where, HTAB *hashp); +extern void hash_stats(const char *caller, HTAB *hashp); extern void *hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr); extern uint32 get_hash_value(HTAB *hashp, const void *keyPtr); From a98ccf727ebbb6d620b1a7995012f1751f261075 Mon Sep 17 00:00:00 2001 From: David Rowley Date: Tue, 19 Aug 2025 11:14:21 +1200 Subject: [PATCH 525/602] Remove HASH_DEBUG output from dynahash.c This existed in a semi broken stated from be0a66666 until 296cba276. Recent discussion has questioned the value of having this at all as it only outputs static information from various of the hash table's properties when the hash table is created. Author: Hayato Kuroda (Fujitsu) Reviewed-by: Michael Paquier Reviewed-by: Tom Lane Discussion: https://postgr.es/m/OSCPR01MB1496650D03FA0293AB9C21416F534A@OSCPR01MB14966.jpnprd01.prod.outlook.com --- src/backend/utils/hash/dynahash.c | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/src/backend/utils/hash/dynahash.c b/src/backend/utils/hash/dynahash.c index 2b22c65e541d4..a7094917c20ca 100644 --- a/src/backend/utils/hash/dynahash.c +++ b/src/backend/utils/hash/dynahash.c @@ -80,9 +80,8 @@ * are not implemented; otherwise functionality is identical. * * Compilation controls: - * HASH_DEBUG controls some informative traces, mainly for debugging. - * HASH_STATISTICS causes HashAccesses and HashCollisions to be maintained; - * when combined with HASH_DEBUG, these are displayed by hdestroy(). + * HASH_STATISTICS causes some usage statistics to be maintained, which can be + * logged by calling hash_stats(). * * Problems & fixes to ejp@ausmelb.oz. WARNING: relies on pre-processor * concatenation property, in probably unnecessary code 'optimization'. @@ -770,17 +769,6 @@ init_htab(HTAB *hashp, long nelem) /* Choose number of entries to allocate at a time */ hctl->nelem_alloc = choose_nelem_alloc(hctl->entrysize); -#ifdef HASH_DEBUG - fprintf(stderr, "init_htab:\n%s%p\n%s%ld\n%s%ld\n%s%d\n%s%u\n%s%x\n%s%x\n%s%ld\n", - "TABLE POINTER ", hashp, - "DIRECTORY SIZE ", hctl->dsize, - "SEGMENT SIZE ", hctl->ssize, - "SEGMENT SHIFT ", hctl->sshift, - "MAX BUCKET ", hctl->max_bucket, - "HIGH MASK ", hctl->high_mask, - "LOW MASK ", hctl->low_mask, - "NSEGS ", hctl->nsegs); -#endif return true; } From 24e71d53f88e1a37506cf1c9967b6db5d685f249 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Tue, 19 Aug 2025 08:57:20 +0900 Subject: [PATCH 526/602] Remove unneeded header declarations in multixact.c Two header declarations were related to SQL-callable functions, that should have been cleaned up in df9133fa6384. Some more includes can be removed on closer inspection, so let's clean up these as well, while on it. Reported-by: Tom Lane Discussion: https://postgr.es/m/345438.1755524834@sss.pgh.pa.us --- src/backend/access/transam/multixact.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c index 886740d2d5534..6a7963ccd03f8 100644 --- a/src/backend/access/transam/multixact.c +++ b/src/backend/access/transam/multixact.c @@ -70,16 +70,12 @@ #include "access/multixact.h" #include "access/slru.h" -#include "access/transam.h" #include "access/twophase.h" #include "access/twophase_rmgr.h" -#include "access/xact.h" #include "access/xlog.h" #include "access/xloginsert.h" #include "access/xlogutils.h" #include "commands/dbcommands.h" -#include "funcapi.h" -#include "lib/ilist.h" #include "miscadmin.h" #include "pg_trace.h" #include "pgstat.h" @@ -87,7 +83,6 @@ #include "storage/pmsignal.h" #include "storage/proc.h" #include "storage/procarray.h" -#include "utils/fmgrprotos.h" #include "utils/guc_hooks.h" #include "utils/injection_point.h" #include "utils/memutils.h" From 3c07944d048c82ae67884c1f09e81f53f769ac2a Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Tue, 19 Aug 2025 09:08:57 +0900 Subject: [PATCH 527/602] test_ddl_deparse: Rename test create_sequence_1 to create_sequence This test was the only one named following the convention used for alternate output files. This was a little bit confusing when looking at the diffs of the test, because one would think that the diffs are based on an uncommon case, as alternate outputs are usually used for uncommon configuration scenarios. create_sequence_1 was the only test in the tree using such a name, and it had no alternate output. Reviewed-by: Kirill Reshke Discussion: https://postgr.es/m/aKLY6wCa_OInr3kY@paquier.xyz --- src/test/modules/test_ddl_deparse/Makefile | 2 +- .../expected/{create_sequence_1.out => create_sequence.out} | 0 src/test/modules/test_ddl_deparse/meson.build | 2 +- .../sql/{create_sequence_1.sql => create_sequence.sql} | 0 4 files changed, 2 insertions(+), 2 deletions(-) rename src/test/modules/test_ddl_deparse/expected/{create_sequence_1.out => create_sequence.out} (100%) rename src/test/modules/test_ddl_deparse/sql/{create_sequence_1.sql => create_sequence.sql} (100%) diff --git a/src/test/modules/test_ddl_deparse/Makefile b/src/test/modules/test_ddl_deparse/Makefile index 3a57a95c84969..6a9c133ebe970 100644 --- a/src/test/modules/test_ddl_deparse/Makefile +++ b/src/test/modules/test_ddl_deparse/Makefile @@ -13,7 +13,7 @@ REGRESS = test_ddl_deparse \ create_type \ create_conversion \ create_domain \ - create_sequence_1 \ + create_sequence \ create_table \ create_transform \ alter_table \ diff --git a/src/test/modules/test_ddl_deparse/expected/create_sequence_1.out b/src/test/modules/test_ddl_deparse/expected/create_sequence.out similarity index 100% rename from src/test/modules/test_ddl_deparse/expected/create_sequence_1.out rename to src/test/modules/test_ddl_deparse/expected/create_sequence.out diff --git a/src/test/modules/test_ddl_deparse/meson.build b/src/test/modules/test_ddl_deparse/meson.build index bff65ba6333d8..e60aee3b1d3b6 100644 --- a/src/test/modules/test_ddl_deparse/meson.build +++ b/src/test/modules/test_ddl_deparse/meson.build @@ -33,7 +33,7 @@ tests += { 'create_type', 'create_conversion', 'create_domain', - 'create_sequence_1', + 'create_sequence', 'create_table', 'create_transform', 'alter_table', diff --git a/src/test/modules/test_ddl_deparse/sql/create_sequence_1.sql b/src/test/modules/test_ddl_deparse/sql/create_sequence.sql similarity index 100% rename from src/test/modules/test_ddl_deparse/sql/create_sequence_1.sql rename to src/test/modules/test_ddl_deparse/sql/create_sequence.sql From 24225ad9aafc576295e210026d8ffa9f50d61145 Mon Sep 17 00:00:00 2001 From: Richard Guo Date: Tue, 19 Aug 2025 09:35:40 +0900 Subject: [PATCH 528/602] Pathify RHS unique-ification for semijoin planning There are two implementation techniques for semijoins: one uses the JOIN_SEMI jointype, where the executor emits at most one matching row per left-hand side (LHS) row; the other unique-ifies the right-hand side (RHS) and then performs a plain inner join. The latter technique currently has some drawbacks related to the unique-ification step. * Only the cheapest-total path of the RHS is considered during unique-ification. This may cause us to miss some optimization opportunities; for example, a path with a better sort order might be overlooked simply because it is not the cheapest in total cost. Such a path could help avoid a sort at a higher level, potentially resulting in a cheaper overall plan. * We currently rely on heuristics to choose between hash-based and sort-based unique-ification. A better approach would be to generate paths for both methods and allow add_path() to decide which one is preferable, consistent with how path selection is handled elsewhere in the planner. * In the sort-based implementation, we currently pay no attention to the pathkeys of the input subpath or the resulting output. This can result in redundant sort nodes being added to the final plan. This patch improves semijoin planning by creating a new RelOptInfo for the RHS rel to represent its unique-ified version. It then generates multiple paths that represent elimination of distinct rows from the RHS, considering both a hash-based implementation using the cheapest total path of the original RHS rel, and sort-based implementations that either exploit presorted input paths or explicitly sort the cheapest total path. All resulting paths compete in add_path(), and those deemed worthy of consideration are added to the new RelOptInfo. Finally, the unique-ified rel is joined with the other side of the semijoin using a plain inner join. As a side effect, most of the code related to the JOIN_UNIQUE_OUTER and JOIN_UNIQUE_INNER jointypes -- used to indicate that the LHS or RHS path should be made unique -- has been removed. Besides, the T_Unique path now has the same meaning for both semijoins and upper DISTINCT clauses: it represents adjacent-duplicate removal on presorted input. This patch unifies their handling by sharing the same data structures and functions. This patch also removes the UNIQUE_PATH_NOOP related code along the way, as it is dead code -- if the RHS rel is provably unique, the semijoin should have already been simplified to a plain inner join by analyzejoins.c. Author: Richard Guo Reviewed-by: Alexandra Wang Reviewed-by: wenhui qiu Discussion: https://postgr.es/m/CAMbWs4-EBnaRvEs7frTLbsXiweSTUXifsteF-d3rvv01FKO86w@mail.gmail.com --- src/backend/optimizer/README | 3 +- src/backend/optimizer/path/costsize.c | 11 +- src/backend/optimizer/path/joinpath.c | 338 ++++-------- src/backend/optimizer/path/joinrels.c | 18 +- src/backend/optimizer/plan/createplan.c | 301 +---------- src/backend/optimizer/plan/planner.c | 518 ++++++++++++++++++- src/backend/optimizer/prep/prepunion.c | 30 +- src/backend/optimizer/util/pathnode.c | 306 +---------- src/backend/optimizer/util/relnode.c | 13 +- src/include/nodes/nodes.h | 4 +- src/include/nodes/pathnodes.h | 77 +-- src/include/optimizer/pathnode.h | 12 +- src/include/optimizer/planner.h | 3 + src/test/regress/expected/join.out | 15 +- src/test/regress/expected/partition_join.out | 94 ++-- src/test/regress/expected/subselect.out | 233 ++++++++- src/test/regress/sql/subselect.sql | 67 +++ src/tools/pgindent/typedefs.list | 2 - 18 files changed, 1074 insertions(+), 971 deletions(-) diff --git a/src/backend/optimizer/README b/src/backend/optimizer/README index 9c724ccfabf83..843368096fd0d 100644 --- a/src/backend/optimizer/README +++ b/src/backend/optimizer/README @@ -640,7 +640,6 @@ RelOptInfo - a relation or joined relations GroupResultPath - childless Result plan node (used for degenerate grouping) MaterialPath - a Material plan node MemoizePath - a Memoize plan node for caching tuples from sub-paths - UniquePath - remove duplicate rows (either by hashing or sorting) GatherPath - collect the results of parallel workers GatherMergePath - collect parallel results, preserving their common sort order ProjectionPath - a Result plan node with child (used for projection) @@ -648,7 +647,7 @@ RelOptInfo - a relation or joined relations SortPath - a Sort plan node applied to some sub-path IncrementalSortPath - an IncrementalSort plan node applied to some sub-path GroupPath - a Group plan node applied to some sub-path - UpperUniquePath - a Unique plan node applied to some sub-path + UniquePath - a Unique plan node applied to some sub-path AggPath - an Agg plan node applied to some sub-path GroupingSetsPath - an Agg plan node used to implement GROUPING SETS MinMaxAggPath - a Result plan node with subplans performing MIN/MAX diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 344a3188317b1..783dca8a4acbe 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -3966,10 +3966,12 @@ final_cost_mergejoin(PlannerInfo *root, MergePath *path, * when we should not. Can we do better without expensive selectivity * computations? * - * The whole issue is moot if we are working from a unique-ified outer - * input, or if we know we don't need to mark/restore at all. + * The whole issue is moot if we know we don't need to mark/restore at + * all, or if we are working from a unique-ified outer input. */ - if (IsA(outer_path, UniquePath) || path->skip_mark_restore) + if (path->skip_mark_restore || + RELATION_WAS_MADE_UNIQUE(outer_path->parent, extra->sjinfo, + path->jpath.jointype)) rescannedtuples = 0; else { @@ -4364,7 +4366,8 @@ final_cost_hashjoin(PlannerInfo *root, HashPath *path, * because we avoid contaminating the cache with a value that's wrong for * non-unique-ified paths. */ - if (IsA(inner_path, UniquePath)) + if (RELATION_WAS_MADE_UNIQUE(inner_path->parent, extra->sjinfo, + path->jpath.jointype)) { innerbucketsize = 1.0 / virtualbuckets; innermcvfreq = 0.0; diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c index ebedc5574ca9c..3b9407eb2eb79 100644 --- a/src/backend/optimizer/path/joinpath.c +++ b/src/backend/optimizer/path/joinpath.c @@ -112,12 +112,12 @@ static void generate_mergejoin_paths(PlannerInfo *root, * "flipped around" if we are considering joining the rels in the opposite * direction from what's indicated in sjinfo. * - * Also, this routine and others in this module accept the special JoinTypes - * JOIN_UNIQUE_OUTER and JOIN_UNIQUE_INNER to indicate that we should - * unique-ify the outer or inner relation and then apply a regular inner - * join. These values are not allowed to propagate outside this module, - * however. Path cost estimation code may need to recognize that it's - * dealing with such a case --- the combination of nominal jointype INNER + * Also, this routine accepts the special JoinTypes JOIN_UNIQUE_OUTER and + * JOIN_UNIQUE_INNER to indicate that the outer or inner relation has been + * unique-ified and a regular inner join should then be applied. These values + * are not allowed to propagate outside this routine, however. Path cost + * estimation code, as well as match_unsorted_outer, may need to recognize that + * it's dealing with such a case --- the combination of nominal jointype INNER * with sjinfo->jointype == JOIN_SEMI indicates that. */ void @@ -129,6 +129,7 @@ add_paths_to_joinrel(PlannerInfo *root, SpecialJoinInfo *sjinfo, List *restrictlist) { + JoinType save_jointype = jointype; JoinPathExtraData extra; bool mergejoin_allowed = true; ListCell *lc; @@ -165,10 +166,10 @@ add_paths_to_joinrel(PlannerInfo *root, * reduce_unique_semijoins would've simplified it), so there's no point in * calling innerrel_is_unique. However, if the LHS covers all of the * semijoin's min_lefthand, then it's appropriate to set inner_unique - * because the path produced by create_unique_path will be unique relative - * to the LHS. (If we have an LHS that's only part of the min_lefthand, - * that is *not* true.) For JOIN_UNIQUE_OUTER, pass JOIN_INNER to avoid - * letting that value escape this module. + * because the unique relation produced by create_unique_paths will be + * unique relative to the LHS. (If we have an LHS that's only part of the + * min_lefthand, that is *not* true.) For JOIN_UNIQUE_OUTER, pass + * JOIN_INNER to avoid letting that value escape this module. */ switch (jointype) { @@ -199,6 +200,13 @@ add_paths_to_joinrel(PlannerInfo *root, break; } + /* + * If the outer or inner relation has been unique-ified, handle as a plain + * inner join. + */ + if (jointype == JOIN_UNIQUE_OUTER || jointype == JOIN_UNIQUE_INNER) + jointype = JOIN_INNER; + /* * Find potential mergejoin clauses. We can skip this if we are not * interested in doing a mergejoin. However, mergejoin may be our only @@ -329,7 +337,7 @@ add_paths_to_joinrel(PlannerInfo *root, joinrel->fdwroutine->GetForeignJoinPaths) joinrel->fdwroutine->GetForeignJoinPaths(root, joinrel, outerrel, innerrel, - jointype, &extra); + save_jointype, &extra); /* * 6. Finally, give extensions a chance to manipulate the path list. They @@ -339,7 +347,7 @@ add_paths_to_joinrel(PlannerInfo *root, */ if (set_join_pathlist_hook) set_join_pathlist_hook(root, joinrel, outerrel, innerrel, - jointype, &extra); + save_jointype, &extra); } /* @@ -1364,7 +1372,6 @@ sort_inner_and_outer(PlannerInfo *root, JoinType jointype, JoinPathExtraData *extra) { - JoinType save_jointype = jointype; Path *outer_path; Path *inner_path; Path *cheapest_partial_outer = NULL; @@ -1402,38 +1409,16 @@ sort_inner_and_outer(PlannerInfo *root, PATH_PARAM_BY_REL(inner_path, outerrel)) return; - /* - * If unique-ification is requested, do it and then handle as a plain - * inner join. - */ - if (jointype == JOIN_UNIQUE_OUTER) - { - outer_path = (Path *) create_unique_path(root, outerrel, - outer_path, extra->sjinfo); - Assert(outer_path); - jointype = JOIN_INNER; - } - else if (jointype == JOIN_UNIQUE_INNER) - { - inner_path = (Path *) create_unique_path(root, innerrel, - inner_path, extra->sjinfo); - Assert(inner_path); - jointype = JOIN_INNER; - } - /* * If the joinrel is parallel-safe, we may be able to consider a partial - * merge join. However, we can't handle JOIN_UNIQUE_OUTER, because the - * outer path will be partial, and therefore we won't be able to properly - * guarantee uniqueness. Similarly, we can't handle JOIN_FULL, JOIN_RIGHT - * and JOIN_RIGHT_ANTI, because they can produce false null extended rows. + * merge join. However, we can't handle JOIN_FULL, JOIN_RIGHT and + * JOIN_RIGHT_ANTI, because they can produce false null extended rows. * Also, the resulting path must not be parameterized. */ if (joinrel->consider_parallel && - save_jointype != JOIN_UNIQUE_OUTER && - save_jointype != JOIN_FULL && - save_jointype != JOIN_RIGHT && - save_jointype != JOIN_RIGHT_ANTI && + jointype != JOIN_FULL && + jointype != JOIN_RIGHT && + jointype != JOIN_RIGHT_ANTI && outerrel->partial_pathlist != NIL && bms_is_empty(joinrel->lateral_relids)) { @@ -1441,7 +1426,7 @@ sort_inner_and_outer(PlannerInfo *root, if (inner_path->parallel_safe) cheapest_safe_inner = inner_path; - else if (save_jointype != JOIN_UNIQUE_INNER) + else cheapest_safe_inner = get_cheapest_parallel_safe_total_inner(innerrel->pathlist); } @@ -1580,13 +1565,9 @@ generate_mergejoin_paths(PlannerInfo *root, List *trialsortkeys; Path *cheapest_startup_inner; Path *cheapest_total_inner; - JoinType save_jointype = jointype; int num_sortkeys; int sortkeycnt; - if (jointype == JOIN_UNIQUE_OUTER || jointype == JOIN_UNIQUE_INNER) - jointype = JOIN_INNER; - /* Look for useful mergeclauses (if any) */ mergeclauses = find_mergeclauses_for_outer_pathkeys(root, @@ -1636,10 +1617,6 @@ generate_mergejoin_paths(PlannerInfo *root, extra, is_partial); - /* Can't do anything else if inner path needs to be unique'd */ - if (save_jointype == JOIN_UNIQUE_INNER) - return; - /* * Look for presorted inner paths that satisfy the innersortkey list --- * or any truncation thereof, if we are allowed to build a mergejoin using @@ -1819,7 +1796,6 @@ match_unsorted_outer(PlannerInfo *root, JoinType jointype, JoinPathExtraData *extra) { - JoinType save_jointype = jointype; bool nestjoinOK; bool useallclauses; Path *inner_cheapest_total = innerrel->cheapest_total_path; @@ -1855,12 +1831,6 @@ match_unsorted_outer(PlannerInfo *root, nestjoinOK = false; useallclauses = true; break; - case JOIN_UNIQUE_OUTER: - case JOIN_UNIQUE_INNER: - jointype = JOIN_INNER; - nestjoinOK = true; - useallclauses = false; - break; default: elog(ERROR, "unrecognized join type: %d", (int) jointype); @@ -1873,24 +1843,20 @@ match_unsorted_outer(PlannerInfo *root, * If inner_cheapest_total is parameterized by the outer rel, ignore it; * we will consider it below as a member of cheapest_parameterized_paths, * but the other possibilities considered in this routine aren't usable. + * + * Furthermore, if the inner side is a unique-ified relation, we cannot + * generate any valid paths here, because the inner rel's dependency on + * the outer rel makes unique-ification meaningless. */ if (PATH_PARAM_BY_REL(inner_cheapest_total, outerrel)) + { inner_cheapest_total = NULL; - /* - * If we need to unique-ify the inner path, we will consider only the - * cheapest-total inner. - */ - if (save_jointype == JOIN_UNIQUE_INNER) - { - /* No way to do this with an inner path parameterized by outer rel */ - if (inner_cheapest_total == NULL) + if (RELATION_WAS_MADE_UNIQUE(innerrel, extra->sjinfo, jointype)) return; - inner_cheapest_total = (Path *) - create_unique_path(root, innerrel, inner_cheapest_total, extra->sjinfo); - Assert(inner_cheapest_total); } - else if (nestjoinOK) + + if (nestjoinOK) { /* * Consider materializing the cheapest inner path, unless @@ -1914,20 +1880,6 @@ match_unsorted_outer(PlannerInfo *root, if (PATH_PARAM_BY_REL(outerpath, innerrel)) continue; - /* - * If we need to unique-ify the outer path, it's pointless to consider - * any but the cheapest outer. (XXX we don't consider parameterized - * outers, nor inners, for unique-ified cases. Should we?) - */ - if (save_jointype == JOIN_UNIQUE_OUTER) - { - if (outerpath != outerrel->cheapest_total_path) - continue; - outerpath = (Path *) create_unique_path(root, outerrel, - outerpath, extra->sjinfo); - Assert(outerpath); - } - /* * The result will have this sort order (even if it is implemented as * a nestloop, and even if some of the mergeclauses are implemented by @@ -1936,21 +1888,7 @@ match_unsorted_outer(PlannerInfo *root, merge_pathkeys = build_join_pathkeys(root, joinrel, jointype, outerpath->pathkeys); - if (save_jointype == JOIN_UNIQUE_INNER) - { - /* - * Consider nestloop join, but only with the unique-ified cheapest - * inner path - */ - try_nestloop_path(root, - joinrel, - outerpath, - inner_cheapest_total, - merge_pathkeys, - jointype, - extra); - } - else if (nestjoinOK) + if (nestjoinOK) { /* * Consider nestloop joins using this outer path and various @@ -2001,17 +1939,13 @@ match_unsorted_outer(PlannerInfo *root, extra); } - /* Can't do anything else if outer path needs to be unique'd */ - if (save_jointype == JOIN_UNIQUE_OUTER) - continue; - /* Can't do anything else if inner rel is parameterized by outer */ if (inner_cheapest_total == NULL) continue; /* Generate merge join paths */ generate_mergejoin_paths(root, joinrel, innerrel, outerpath, - save_jointype, extra, useallclauses, + jointype, extra, useallclauses, inner_cheapest_total, merge_pathkeys, false); } @@ -2019,41 +1953,35 @@ match_unsorted_outer(PlannerInfo *root, /* * Consider partial nestloop and mergejoin plan if outerrel has any * partial path and the joinrel is parallel-safe. However, we can't - * handle JOIN_UNIQUE_OUTER, because the outer path will be partial, and - * therefore we won't be able to properly guarantee uniqueness. Nor can - * we handle joins needing lateral rels, since partial paths must not be - * parameterized. Similarly, we can't handle JOIN_FULL, JOIN_RIGHT and + * handle joins needing lateral rels, since partial paths must not be + * parameterized. Similarly, we can't handle JOIN_FULL, JOIN_RIGHT and * JOIN_RIGHT_ANTI, because they can produce false null extended rows. */ if (joinrel->consider_parallel && - save_jointype != JOIN_UNIQUE_OUTER && - save_jointype != JOIN_FULL && - save_jointype != JOIN_RIGHT && - save_jointype != JOIN_RIGHT_ANTI && + jointype != JOIN_FULL && + jointype != JOIN_RIGHT && + jointype != JOIN_RIGHT_ANTI && outerrel->partial_pathlist != NIL && bms_is_empty(joinrel->lateral_relids)) { if (nestjoinOK) consider_parallel_nestloop(root, joinrel, outerrel, innerrel, - save_jointype, extra); + jointype, extra); /* * If inner_cheapest_total is NULL or non parallel-safe then find the - * cheapest total parallel safe path. If doing JOIN_UNIQUE_INNER, we - * can't use any alternative inner path. + * cheapest total parallel safe path. */ if (inner_cheapest_total == NULL || !inner_cheapest_total->parallel_safe) { - if (save_jointype == JOIN_UNIQUE_INNER) - return; - - inner_cheapest_total = get_cheapest_parallel_safe_total_inner(innerrel->pathlist); + inner_cheapest_total = + get_cheapest_parallel_safe_total_inner(innerrel->pathlist); } if (inner_cheapest_total) consider_parallel_mergejoin(root, joinrel, outerrel, innerrel, - save_jointype, extra, + jointype, extra, inner_cheapest_total); } } @@ -2118,24 +2046,17 @@ consider_parallel_nestloop(PlannerInfo *root, JoinType jointype, JoinPathExtraData *extra) { - JoinType save_jointype = jointype; Path *inner_cheapest_total = innerrel->cheapest_total_path; Path *matpath = NULL; ListCell *lc1; - if (jointype == JOIN_UNIQUE_INNER) - jointype = JOIN_INNER; - /* - * Consider materializing the cheapest inner path, unless: 1) we're doing - * JOIN_UNIQUE_INNER, because in this case we have to unique-ify the - * cheapest inner path, 2) enable_material is off, 3) the cheapest inner - * path is not parallel-safe, 4) the cheapest inner path is parameterized - * by the outer rel, or 5) the cheapest inner path materializes its output - * anyway. + * Consider materializing the cheapest inner path, unless: 1) + * enable_material is off, 2) the cheapest inner path is not + * parallel-safe, 3) the cheapest inner path is parameterized by the outer + * rel, or 4) the cheapest inner path materializes its output anyway. */ - if (save_jointype != JOIN_UNIQUE_INNER && - enable_material && inner_cheapest_total->parallel_safe && + if (enable_material && inner_cheapest_total->parallel_safe && !PATH_PARAM_BY_REL(inner_cheapest_total, outerrel) && !ExecMaterializesOutput(inner_cheapest_total->pathtype)) { @@ -2169,23 +2090,6 @@ consider_parallel_nestloop(PlannerInfo *root, if (!innerpath->parallel_safe) continue; - /* - * If we're doing JOIN_UNIQUE_INNER, we can only use the inner's - * cheapest_total_path, and we have to unique-ify it. (We might - * be able to relax this to allow other safe, unparameterized - * inner paths, but right now create_unique_path is not on board - * with that.) - */ - if (save_jointype == JOIN_UNIQUE_INNER) - { - if (innerpath != innerrel->cheapest_total_path) - continue; - innerpath = (Path *) create_unique_path(root, innerrel, - innerpath, - extra->sjinfo); - Assert(innerpath); - } - try_partial_nestloop_path(root, joinrel, outerpath, innerpath, pathkeys, jointype, extra); @@ -2227,7 +2131,6 @@ hash_inner_and_outer(PlannerInfo *root, JoinType jointype, JoinPathExtraData *extra) { - JoinType save_jointype = jointype; bool isouterjoin = IS_OUTER_JOIN(jointype); List *hashclauses; ListCell *l; @@ -2290,6 +2193,8 @@ hash_inner_and_outer(PlannerInfo *root, Path *cheapest_startup_outer = outerrel->cheapest_startup_path; Path *cheapest_total_outer = outerrel->cheapest_total_path; Path *cheapest_total_inner = innerrel->cheapest_total_path; + ListCell *lc1; + ListCell *lc2; /* * If either cheapest-total path is parameterized by the other rel, we @@ -2301,114 +2206,64 @@ hash_inner_and_outer(PlannerInfo *root, PATH_PARAM_BY_REL(cheapest_total_inner, outerrel)) return; - /* Unique-ify if need be; we ignore parameterized possibilities */ - if (jointype == JOIN_UNIQUE_OUTER) - { - cheapest_total_outer = (Path *) - create_unique_path(root, outerrel, - cheapest_total_outer, extra->sjinfo); - Assert(cheapest_total_outer); - jointype = JOIN_INNER; - try_hashjoin_path(root, - joinrel, - cheapest_total_outer, - cheapest_total_inner, - hashclauses, - jointype, - extra); - /* no possibility of cheap startup here */ - } - else if (jointype == JOIN_UNIQUE_INNER) - { - cheapest_total_inner = (Path *) - create_unique_path(root, innerrel, - cheapest_total_inner, extra->sjinfo); - Assert(cheapest_total_inner); - jointype = JOIN_INNER; + /* + * Consider the cheapest startup outer together with the cheapest + * total inner, and then consider pairings of cheapest-total paths + * including parameterized ones. There is no use in generating + * parameterized paths on the basis of possibly cheap startup cost, so + * this is sufficient. + */ + if (cheapest_startup_outer != NULL) try_hashjoin_path(root, joinrel, - cheapest_total_outer, + cheapest_startup_outer, cheapest_total_inner, hashclauses, jointype, extra); - if (cheapest_startup_outer != NULL && - cheapest_startup_outer != cheapest_total_outer) - try_hashjoin_path(root, - joinrel, - cheapest_startup_outer, - cheapest_total_inner, - hashclauses, - jointype, - extra); - } - else + + foreach(lc1, outerrel->cheapest_parameterized_paths) { + Path *outerpath = (Path *) lfirst(lc1); + /* - * For other jointypes, we consider the cheapest startup outer - * together with the cheapest total inner, and then consider - * pairings of cheapest-total paths including parameterized ones. - * There is no use in generating parameterized paths on the basis - * of possibly cheap startup cost, so this is sufficient. + * We cannot use an outer path that is parameterized by the inner + * rel. */ - ListCell *lc1; - ListCell *lc2; - - if (cheapest_startup_outer != NULL) - try_hashjoin_path(root, - joinrel, - cheapest_startup_outer, - cheapest_total_inner, - hashclauses, - jointype, - extra); + if (PATH_PARAM_BY_REL(outerpath, innerrel)) + continue; - foreach(lc1, outerrel->cheapest_parameterized_paths) + foreach(lc2, innerrel->cheapest_parameterized_paths) { - Path *outerpath = (Path *) lfirst(lc1); + Path *innerpath = (Path *) lfirst(lc2); /* - * We cannot use an outer path that is parameterized by the - * inner rel. + * We cannot use an inner path that is parameterized by the + * outer rel, either. */ - if (PATH_PARAM_BY_REL(outerpath, innerrel)) + if (PATH_PARAM_BY_REL(innerpath, outerrel)) continue; - foreach(lc2, innerrel->cheapest_parameterized_paths) - { - Path *innerpath = (Path *) lfirst(lc2); - - /* - * We cannot use an inner path that is parameterized by - * the outer rel, either. - */ - if (PATH_PARAM_BY_REL(innerpath, outerrel)) - continue; + if (outerpath == cheapest_startup_outer && + innerpath == cheapest_total_inner) + continue; /* already tried it */ - if (outerpath == cheapest_startup_outer && - innerpath == cheapest_total_inner) - continue; /* already tried it */ - - try_hashjoin_path(root, - joinrel, - outerpath, - innerpath, - hashclauses, - jointype, - extra); - } + try_hashjoin_path(root, + joinrel, + outerpath, + innerpath, + hashclauses, + jointype, + extra); } } /* * If the joinrel is parallel-safe, we may be able to consider a - * partial hash join. However, we can't handle JOIN_UNIQUE_OUTER, - * because the outer path will be partial, and therefore we won't be - * able to properly guarantee uniqueness. Also, the resulting path - * must not be parameterized. + * partial hash join. However, the resulting path must not be + * parameterized. */ if (joinrel->consider_parallel && - save_jointype != JOIN_UNIQUE_OUTER && outerrel->partial_pathlist != NIL && bms_is_empty(joinrel->lateral_relids)) { @@ -2421,11 +2276,9 @@ hash_inner_and_outer(PlannerInfo *root, /* * Can we use a partial inner plan too, so that we can build a - * shared hash table in parallel? We can't handle - * JOIN_UNIQUE_INNER because we can't guarantee uniqueness. + * shared hash table in parallel? */ if (innerrel->partial_pathlist != NIL && - save_jointype != JOIN_UNIQUE_INNER && enable_parallel_hash) { cheapest_partial_inner = @@ -2441,19 +2294,18 @@ hash_inner_and_outer(PlannerInfo *root, * Normally, given that the joinrel is parallel-safe, the cheapest * total inner path will also be parallel-safe, but if not, we'll * have to search for the cheapest safe, unparameterized inner - * path. If doing JOIN_UNIQUE_INNER, we can't use any alternative - * inner path. If full, right, right-semi or right-anti join, we - * can't use parallelism (building the hash table in each backend) + * path. If full, right, right-semi or right-anti join, we can't + * use parallelism (building the hash table in each backend) * because no one process has all the match bits. */ - if (save_jointype == JOIN_FULL || - save_jointype == JOIN_RIGHT || - save_jointype == JOIN_RIGHT_SEMI || - save_jointype == JOIN_RIGHT_ANTI) + if (jointype == JOIN_FULL || + jointype == JOIN_RIGHT || + jointype == JOIN_RIGHT_SEMI || + jointype == JOIN_RIGHT_ANTI) cheapest_safe_inner = NULL; else if (cheapest_total_inner->parallel_safe) cheapest_safe_inner = cheapest_total_inner; - else if (save_jointype != JOIN_UNIQUE_INNER) + else cheapest_safe_inner = get_cheapest_parallel_safe_total_inner(innerrel->pathlist); diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c index aad41b940091d..535248aa52516 100644 --- a/src/backend/optimizer/path/joinrels.c +++ b/src/backend/optimizer/path/joinrels.c @@ -19,6 +19,7 @@ #include "optimizer/joininfo.h" #include "optimizer/pathnode.h" #include "optimizer/paths.h" +#include "optimizer/planner.h" #include "partitioning/partbounds.h" #include "utils/memutils.h" @@ -444,8 +445,7 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2, } else if (sjinfo->jointype == JOIN_SEMI && bms_equal(sjinfo->syn_righthand, rel2->relids) && - create_unique_path(root, rel2, rel2->cheapest_total_path, - sjinfo) != NULL) + create_unique_paths(root, rel2, sjinfo) != NULL) { /*---------- * For a semijoin, we can join the RHS to anything else by @@ -477,8 +477,7 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2, } else if (sjinfo->jointype == JOIN_SEMI && bms_equal(sjinfo->syn_righthand, rel1->relids) && - create_unique_path(root, rel1, rel1->cheapest_total_path, - sjinfo) != NULL) + create_unique_paths(root, rel1, sjinfo) != NULL) { /* Reversed semijoin case */ if (match_sjinfo) @@ -886,6 +885,8 @@ populate_joinrel_with_paths(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2, RelOptInfo *joinrel, SpecialJoinInfo *sjinfo, List *restrictlist) { + RelOptInfo *unique_rel2; + /* * Consider paths using each rel as both outer and inner. Depending on * the join type, a provably empty outer or inner rel might mean the join @@ -991,14 +992,13 @@ populate_joinrel_with_paths(PlannerInfo *root, RelOptInfo *rel1, /* * If we know how to unique-ify the RHS and one input rel is * exactly the RHS (not a superset) we can consider unique-ifying - * it and then doing a regular join. (The create_unique_path + * it and then doing a regular join. (The create_unique_paths * check here is probably redundant with what join_is_legal did, * but if so the check is cheap because it's cached. So test * anyway to be sure.) */ if (bms_equal(sjinfo->syn_righthand, rel2->relids) && - create_unique_path(root, rel2, rel2->cheapest_total_path, - sjinfo) != NULL) + (unique_rel2 = create_unique_paths(root, rel2, sjinfo)) != NULL) { if (is_dummy_rel(rel1) || is_dummy_rel(rel2) || restriction_is_constant_false(restrictlist, joinrel, false)) @@ -1006,10 +1006,10 @@ populate_joinrel_with_paths(PlannerInfo *root, RelOptInfo *rel1, mark_dummy_rel(joinrel); break; } - add_paths_to_joinrel(root, joinrel, rel1, rel2, + add_paths_to_joinrel(root, joinrel, rel1, unique_rel2, JOIN_UNIQUE_INNER, sjinfo, restrictlist); - add_paths_to_joinrel(root, joinrel, rel2, rel1, + add_paths_to_joinrel(root, joinrel, unique_rel2, rel1, JOIN_UNIQUE_OUTER, sjinfo, restrictlist); } diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 9fd5c31edf228..6791cbeb416ed 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -95,8 +95,6 @@ static Material *create_material_plan(PlannerInfo *root, MaterialPath *best_path int flags); static Memoize *create_memoize_plan(PlannerInfo *root, MemoizePath *best_path, int flags); -static Plan *create_unique_plan(PlannerInfo *root, UniquePath *best_path, - int flags); static Gather *create_gather_plan(PlannerInfo *root, GatherPath *best_path); static Plan *create_projection_plan(PlannerInfo *root, ProjectionPath *best_path, @@ -106,8 +104,7 @@ static Sort *create_sort_plan(PlannerInfo *root, SortPath *best_path, int flags) static IncrementalSort *create_incrementalsort_plan(PlannerInfo *root, IncrementalSortPath *best_path, int flags); static Group *create_group_plan(PlannerInfo *root, GroupPath *best_path); -static Unique *create_upper_unique_plan(PlannerInfo *root, UpperUniquePath *best_path, - int flags); +static Unique *create_unique_plan(PlannerInfo *root, UniquePath *best_path, int flags); static Agg *create_agg_plan(PlannerInfo *root, AggPath *best_path); static Plan *create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path); static Result *create_minmaxagg_plan(PlannerInfo *root, MinMaxAggPath *best_path); @@ -296,9 +293,9 @@ static WindowAgg *make_windowagg(List *tlist, WindowClause *wc, static Group *make_group(List *tlist, List *qual, int numGroupCols, AttrNumber *grpColIdx, Oid *grpOperators, Oid *grpCollations, Plan *lefttree); -static Unique *make_unique_from_sortclauses(Plan *lefttree, List *distinctList); static Unique *make_unique_from_pathkeys(Plan *lefttree, - List *pathkeys, int numCols); + List *pathkeys, int numCols, + Relids relids); static Gather *make_gather(List *qptlist, List *qpqual, int nworkers, int rescan_param, bool single_copy, Plan *subplan); static SetOp *make_setop(SetOpCmd cmd, SetOpStrategy strategy, @@ -470,19 +467,9 @@ create_plan_recurse(PlannerInfo *root, Path *best_path, int flags) flags); break; case T_Unique: - if (IsA(best_path, UpperUniquePath)) - { - plan = (Plan *) create_upper_unique_plan(root, - (UpperUniquePath *) best_path, - flags); - } - else - { - Assert(IsA(best_path, UniquePath)); - plan = create_unique_plan(root, - (UniquePath *) best_path, - flags); - } + plan = (Plan *) create_unique_plan(root, + (UniquePath *) best_path, + flags); break; case T_Gather: plan = (Plan *) create_gather_plan(root, @@ -1764,207 +1751,6 @@ create_memoize_plan(PlannerInfo *root, MemoizePath *best_path, int flags) return plan; } -/* - * create_unique_plan - * Create a Unique plan for 'best_path' and (recursively) plans - * for its subpaths. - * - * Returns a Plan node. - */ -static Plan * -create_unique_plan(PlannerInfo *root, UniquePath *best_path, int flags) -{ - Plan *plan; - Plan *subplan; - List *in_operators; - List *uniq_exprs; - List *newtlist; - int nextresno; - bool newitems; - int numGroupCols; - AttrNumber *groupColIdx; - Oid *groupCollations; - int groupColPos; - ListCell *l; - - /* Unique doesn't project, so tlist requirements pass through */ - subplan = create_plan_recurse(root, best_path->subpath, flags); - - /* Done if we don't need to do any actual unique-ifying */ - if (best_path->umethod == UNIQUE_PATH_NOOP) - return subplan; - - /* - * As constructed, the subplan has a "flat" tlist containing just the Vars - * needed here and at upper levels. The values we are supposed to - * unique-ify may be expressions in these variables. We have to add any - * such expressions to the subplan's tlist. - * - * The subplan may have a "physical" tlist if it is a simple scan plan. If - * we're going to sort, this should be reduced to the regular tlist, so - * that we don't sort more data than we need to. For hashing, the tlist - * should be left as-is if we don't need to add any expressions; but if we - * do have to add expressions, then a projection step will be needed at - * runtime anyway, so we may as well remove unneeded items. Therefore - * newtlist starts from build_path_tlist() not just a copy of the - * subplan's tlist; and we don't install it into the subplan unless we are - * sorting or stuff has to be added. - */ - in_operators = best_path->in_operators; - uniq_exprs = best_path->uniq_exprs; - - /* initialize modified subplan tlist as just the "required" vars */ - newtlist = build_path_tlist(root, &best_path->path); - nextresno = list_length(newtlist) + 1; - newitems = false; - - foreach(l, uniq_exprs) - { - Expr *uniqexpr = lfirst(l); - TargetEntry *tle; - - tle = tlist_member(uniqexpr, newtlist); - if (!tle) - { - tle = makeTargetEntry((Expr *) uniqexpr, - nextresno, - NULL, - false); - newtlist = lappend(newtlist, tle); - nextresno++; - newitems = true; - } - } - - /* Use change_plan_targetlist in case we need to insert a Result node */ - if (newitems || best_path->umethod == UNIQUE_PATH_SORT) - subplan = change_plan_targetlist(subplan, newtlist, - best_path->path.parallel_safe); - - /* - * Build control information showing which subplan output columns are to - * be examined by the grouping step. Unfortunately we can't merge this - * with the previous loop, since we didn't then know which version of the - * subplan tlist we'd end up using. - */ - newtlist = subplan->targetlist; - numGroupCols = list_length(uniq_exprs); - groupColIdx = (AttrNumber *) palloc(numGroupCols * sizeof(AttrNumber)); - groupCollations = (Oid *) palloc(numGroupCols * sizeof(Oid)); - - groupColPos = 0; - foreach(l, uniq_exprs) - { - Expr *uniqexpr = lfirst(l); - TargetEntry *tle; - - tle = tlist_member(uniqexpr, newtlist); - if (!tle) /* shouldn't happen */ - elog(ERROR, "failed to find unique expression in subplan tlist"); - groupColIdx[groupColPos] = tle->resno; - groupCollations[groupColPos] = exprCollation((Node *) tle->expr); - groupColPos++; - } - - if (best_path->umethod == UNIQUE_PATH_HASH) - { - Oid *groupOperators; - - /* - * Get the hashable equality operators for the Agg node to use. - * Normally these are the same as the IN clause operators, but if - * those are cross-type operators then the equality operators are the - * ones for the IN clause operators' RHS datatype. - */ - groupOperators = (Oid *) palloc(numGroupCols * sizeof(Oid)); - groupColPos = 0; - foreach(l, in_operators) - { - Oid in_oper = lfirst_oid(l); - Oid eq_oper; - - if (!get_compatible_hash_operators(in_oper, NULL, &eq_oper)) - elog(ERROR, "could not find compatible hash operator for operator %u", - in_oper); - groupOperators[groupColPos++] = eq_oper; - } - - /* - * Since the Agg node is going to project anyway, we can give it the - * minimum output tlist, without any stuff we might have added to the - * subplan tlist. - */ - plan = (Plan *) make_agg(build_path_tlist(root, &best_path->path), - NIL, - AGG_HASHED, - AGGSPLIT_SIMPLE, - numGroupCols, - groupColIdx, - groupOperators, - groupCollations, - NIL, - NIL, - best_path->path.rows, - 0, - subplan); - } - else - { - List *sortList = NIL; - Sort *sort; - - /* Create an ORDER BY list to sort the input compatibly */ - groupColPos = 0; - foreach(l, in_operators) - { - Oid in_oper = lfirst_oid(l); - Oid sortop; - Oid eqop; - TargetEntry *tle; - SortGroupClause *sortcl; - - sortop = get_ordering_op_for_equality_op(in_oper, false); - if (!OidIsValid(sortop)) /* shouldn't happen */ - elog(ERROR, "could not find ordering operator for equality operator %u", - in_oper); - - /* - * The Unique node will need equality operators. Normally these - * are the same as the IN clause operators, but if those are - * cross-type operators then the equality operators are the ones - * for the IN clause operators' RHS datatype. - */ - eqop = get_equality_op_for_ordering_op(sortop, NULL); - if (!OidIsValid(eqop)) /* shouldn't happen */ - elog(ERROR, "could not find equality operator for ordering operator %u", - sortop); - - tle = get_tle_by_resno(subplan->targetlist, - groupColIdx[groupColPos]); - Assert(tle != NULL); - - sortcl = makeNode(SortGroupClause); - sortcl->tleSortGroupRef = assignSortGroupRef(tle, - subplan->targetlist); - sortcl->eqop = eqop; - sortcl->sortop = sortop; - sortcl->reverse_sort = false; - sortcl->nulls_first = false; - sortcl->hashable = false; /* no need to make this accurate */ - sortList = lappend(sortList, sortcl); - groupColPos++; - } - sort = make_sort_from_sortclauses(sortList, subplan); - label_sort_with_costsize(root, sort, -1.0); - plan = (Plan *) make_unique_from_sortclauses((Plan *) sort, sortList); - } - - /* Copy cost data from Path to Plan */ - copy_generic_path_info(plan, &best_path->path); - - return plan; -} - /* * create_gather_plan * @@ -2322,13 +2108,13 @@ create_group_plan(PlannerInfo *root, GroupPath *best_path) } /* - * create_upper_unique_plan + * create_unique_plan * * Create a Unique plan for 'best_path' and (recursively) plans * for its subpaths. */ static Unique * -create_upper_unique_plan(PlannerInfo *root, UpperUniquePath *best_path, int flags) +create_unique_plan(PlannerInfo *root, UniquePath *best_path, int flags) { Unique *plan; Plan *subplan; @@ -2340,9 +2126,17 @@ create_upper_unique_plan(PlannerInfo *root, UpperUniquePath *best_path, int flag subplan = create_plan_recurse(root, best_path->subpath, flags | CP_LABEL_TLIST); + /* + * make_unique_from_pathkeys calls find_ec_member_matching_expr, which + * will ignore any child EC members that don't belong to the given relids. + * Thus, if this unique path is based on a child relation, we must pass + * its relids. + */ plan = make_unique_from_pathkeys(subplan, best_path->path.pathkeys, - best_path->numkeys); + best_path->numkeys, + IS_OTHER_REL(best_path->path.parent) ? + best_path->path.parent->relids : NULL); copy_generic_path_info(&plan->plan, (Path *) best_path); @@ -6880,61 +6674,14 @@ make_group(List *tlist, } /* - * distinctList is a list of SortGroupClauses, identifying the targetlist items - * that should be considered by the Unique filter. The input path must - * already be sorted accordingly. - */ -static Unique * -make_unique_from_sortclauses(Plan *lefttree, List *distinctList) -{ - Unique *node = makeNode(Unique); - Plan *plan = &node->plan; - int numCols = list_length(distinctList); - int keyno = 0; - AttrNumber *uniqColIdx; - Oid *uniqOperators; - Oid *uniqCollations; - ListCell *slitem; - - plan->targetlist = lefttree->targetlist; - plan->qual = NIL; - plan->lefttree = lefttree; - plan->righttree = NULL; - - /* - * convert SortGroupClause list into arrays of attr indexes and equality - * operators, as wanted by executor - */ - Assert(numCols > 0); - uniqColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols); - uniqOperators = (Oid *) palloc(sizeof(Oid) * numCols); - uniqCollations = (Oid *) palloc(sizeof(Oid) * numCols); - - foreach(slitem, distinctList) - { - SortGroupClause *sortcl = (SortGroupClause *) lfirst(slitem); - TargetEntry *tle = get_sortgroupclause_tle(sortcl, plan->targetlist); - - uniqColIdx[keyno] = tle->resno; - uniqOperators[keyno] = sortcl->eqop; - uniqCollations[keyno] = exprCollation((Node *) tle->expr); - Assert(OidIsValid(uniqOperators[keyno])); - keyno++; - } - - node->numCols = numCols; - node->uniqColIdx = uniqColIdx; - node->uniqOperators = uniqOperators; - node->uniqCollations = uniqCollations; - - return node; -} - -/* - * as above, but use pathkeys to identify the sort columns and semantics + * pathkeys is a list of PathKeys, identifying the sort columns and semantics. + * The input plan must already be sorted accordingly. + * + * relids identifies the child relation being unique-ified, if any. */ static Unique * -make_unique_from_pathkeys(Plan *lefttree, List *pathkeys, int numCols) +make_unique_from_pathkeys(Plan *lefttree, List *pathkeys, int numCols, + Relids relids) { Unique *node = makeNode(Unique); Plan *plan = &node->plan; @@ -6997,7 +6744,7 @@ make_unique_from_pathkeys(Plan *lefttree, List *pathkeys, int numCols) foreach(j, plan->targetlist) { tle = (TargetEntry *) lfirst(j); - em = find_ec_member_matching_expr(ec, tle->expr, NULL); + em = find_ec_member_matching_expr(ec, tle->expr, relids); if (em) { /* found expr already in tlist */ diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 0d5a692e5fdca..65f1710159194 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -268,6 +268,12 @@ static bool group_by_has_partkey(RelOptInfo *input_rel, static int common_prefix_cmp(const void *a, const void *b); static List *generate_setop_child_grouplist(SetOperationStmt *op, List *targetlist); +static void create_final_unique_paths(PlannerInfo *root, RelOptInfo *input_rel, + List *sortPathkeys, List *groupClause, + SpecialJoinInfo *sjinfo, RelOptInfo *unique_rel); +static void create_partial_unique_paths(PlannerInfo *root, RelOptInfo *input_rel, + List *sortPathkeys, List *groupClause, + SpecialJoinInfo *sjinfo, RelOptInfo *unique_rel); /***************************************************************************** @@ -4939,10 +4945,10 @@ create_partial_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel, else { add_partial_path(partial_distinct_rel, (Path *) - create_upper_unique_path(root, partial_distinct_rel, - sorted_path, - list_length(root->distinct_pathkeys), - numDistinctRows)); + create_unique_path(root, partial_distinct_rel, + sorted_path, + list_length(root->distinct_pathkeys), + numDistinctRows)); } } } @@ -5133,10 +5139,10 @@ create_final_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel, else { add_path(distinct_rel, (Path *) - create_upper_unique_path(root, distinct_rel, - sorted_path, - list_length(root->distinct_pathkeys), - numDistinctRows)); + create_unique_path(root, distinct_rel, + sorted_path, + list_length(root->distinct_pathkeys), + numDistinctRows)); } } } @@ -8270,3 +8276,499 @@ generate_setop_child_grouplist(SetOperationStmt *op, List *targetlist) return grouplist; } + +/* + * create_unique_paths + * Build a new RelOptInfo containing Paths that represent elimination of + * distinct rows from the input data. Distinct-ness is defined according to + * the needs of the semijoin represented by sjinfo. If it is not possible + * to identify how to make the data unique, NULL is returned. + * + * If used at all, this is likely to be called repeatedly on the same rel; + * So we cache the result. + */ +RelOptInfo * +create_unique_paths(PlannerInfo *root, RelOptInfo *rel, SpecialJoinInfo *sjinfo) +{ + RelOptInfo *unique_rel; + List *sortPathkeys = NIL; + List *groupClause = NIL; + MemoryContext oldcontext; + + /* Caller made a mistake if SpecialJoinInfo is the wrong one */ + Assert(sjinfo->jointype == JOIN_SEMI); + Assert(bms_equal(rel->relids, sjinfo->syn_righthand)); + + /* If result already cached, return it */ + if (rel->unique_rel) + return rel->unique_rel; + + /* If it's not possible to unique-ify, return NULL */ + if (!(sjinfo->semi_can_btree || sjinfo->semi_can_hash)) + return NULL; + + /* + * When called during GEQO join planning, we are in a short-lived memory + * context. We must make sure that the unique rel and any subsidiary data + * structures created for a baserel survive the GEQO cycle, else the + * baserel is trashed for future GEQO cycles. On the other hand, when we + * are creating those for a joinrel during GEQO, we don't want them to + * clutter the main planning context. Upshot is that the best solution is + * to explicitly allocate memory in the same context the given RelOptInfo + * is in. + */ + oldcontext = MemoryContextSwitchTo(GetMemoryChunkContext(rel)); + + unique_rel = makeNode(RelOptInfo); + memcpy(unique_rel, rel, sizeof(RelOptInfo)); + + /* + * clear path info + */ + unique_rel->pathlist = NIL; + unique_rel->ppilist = NIL; + unique_rel->partial_pathlist = NIL; + unique_rel->cheapest_startup_path = NULL; + unique_rel->cheapest_total_path = NULL; + unique_rel->cheapest_parameterized_paths = NIL; + + /* + * Build the target list for the unique rel. We also build the pathkeys + * that represent the ordering requirements for the sort-based + * implementation, and the list of SortGroupClause nodes that represent + * the columns to be grouped on for the hash-based implementation. + * + * For a child rel, we can construct these fields from those of its + * parent. + */ + if (IS_OTHER_REL(rel)) + { + PathTarget *child_unique_target; + PathTarget *parent_unique_target; + + parent_unique_target = rel->top_parent->unique_rel->reltarget; + + child_unique_target = copy_pathtarget(parent_unique_target); + + /* Translate the target expressions */ + child_unique_target->exprs = (List *) + adjust_appendrel_attrs_multilevel(root, + (Node *) parent_unique_target->exprs, + rel, + rel->top_parent); + + unique_rel->reltarget = child_unique_target; + + sortPathkeys = rel->top_parent->unique_pathkeys; + groupClause = rel->top_parent->unique_groupclause; + } + else + { + List *newtlist; + int nextresno; + List *sortList = NIL; + ListCell *lc1; + ListCell *lc2; + + /* + * The values we are supposed to unique-ify may be expressions in the + * variables of the input rel's targetlist. We have to add any such + * expressions to the unique rel's targetlist. + * + * While in the loop, build the lists of SortGroupClause's that + * represent the ordering for the sort-based implementation and the + * grouping for the hash-based implementation. + */ + newtlist = make_tlist_from_pathtarget(rel->reltarget); + nextresno = list_length(newtlist) + 1; + + forboth(lc1, sjinfo->semi_rhs_exprs, lc2, sjinfo->semi_operators) + { + Expr *uniqexpr = lfirst(lc1); + Oid in_oper = lfirst_oid(lc2); + Oid sortop = InvalidOid; + TargetEntry *tle; + + tle = tlist_member(uniqexpr, newtlist); + if (!tle) + { + tle = makeTargetEntry((Expr *) uniqexpr, + nextresno, + NULL, + false); + newtlist = lappend(newtlist, tle); + nextresno++; + } + + if (sjinfo->semi_can_btree) + { + /* Create an ORDER BY list to sort the input compatibly */ + Oid eqop; + SortGroupClause *sortcl; + + sortop = get_ordering_op_for_equality_op(in_oper, false); + if (!OidIsValid(sortop)) /* shouldn't happen */ + elog(ERROR, "could not find ordering operator for equality operator %u", + in_oper); + + /* + * The Unique node will need equality operators. Normally + * these are the same as the IN clause operators, but if those + * are cross-type operators then the equality operators are + * the ones for the IN clause operators' RHS datatype. + */ + eqop = get_equality_op_for_ordering_op(sortop, NULL); + if (!OidIsValid(eqop)) /* shouldn't happen */ + elog(ERROR, "could not find equality operator for ordering operator %u", + sortop); + + sortcl = makeNode(SortGroupClause); + sortcl->tleSortGroupRef = assignSortGroupRef(tle, newtlist); + sortcl->eqop = eqop; + sortcl->sortop = sortop; + sortcl->reverse_sort = false; + sortcl->nulls_first = false; + sortcl->hashable = false; /* no need to make this accurate */ + sortList = lappend(sortList, sortcl); + } + + if (sjinfo->semi_can_hash) + { + /* Create a GROUP BY list for the Agg node to use */ + Oid eq_oper; + SortGroupClause *groupcl; + + /* + * Get the hashable equality operators for the Agg node to + * use. Normally these are the same as the IN clause + * operators, but if those are cross-type operators then the + * equality operators are the ones for the IN clause + * operators' RHS datatype. + */ + if (!get_compatible_hash_operators(in_oper, NULL, &eq_oper)) + elog(ERROR, "could not find compatible hash operator for operator %u", + in_oper); + + groupcl = makeNode(SortGroupClause); + groupcl->tleSortGroupRef = assignSortGroupRef(tle, newtlist); + groupcl->eqop = eq_oper; + groupcl->sortop = sortop; + groupcl->reverse_sort = false; + groupcl->nulls_first = false; + groupcl->hashable = true; + groupClause = lappend(groupClause, groupcl); + } + } + + unique_rel->reltarget = create_pathtarget(root, newtlist); + sortPathkeys = make_pathkeys_for_sortclauses(root, sortList, newtlist); + } + + /* build unique paths based on input rel's pathlist */ + create_final_unique_paths(root, rel, sortPathkeys, groupClause, + sjinfo, unique_rel); + + /* build unique paths based on input rel's partial_pathlist */ + create_partial_unique_paths(root, rel, sortPathkeys, groupClause, + sjinfo, unique_rel); + + /* Now choose the best path(s) */ + set_cheapest(unique_rel); + + /* + * There shouldn't be any partial paths for the unique relation; + * otherwise, we won't be able to properly guarantee uniqueness. + */ + Assert(unique_rel->partial_pathlist == NIL); + + /* Cache the result */ + rel->unique_rel = unique_rel; + rel->unique_pathkeys = sortPathkeys; + rel->unique_groupclause = groupClause; + + MemoryContextSwitchTo(oldcontext); + + return unique_rel; +} + +/* + * create_final_unique_paths + * Create unique paths in 'unique_rel' based on 'input_rel' pathlist + */ +static void +create_final_unique_paths(PlannerInfo *root, RelOptInfo *input_rel, + List *sortPathkeys, List *groupClause, + SpecialJoinInfo *sjinfo, RelOptInfo *unique_rel) +{ + Path *cheapest_input_path = input_rel->cheapest_total_path; + + /* Estimate number of output rows */ + unique_rel->rows = estimate_num_groups(root, + sjinfo->semi_rhs_exprs, + cheapest_input_path->rows, + NULL, + NULL); + + /* Consider sort-based implementations, if possible. */ + if (sjinfo->semi_can_btree) + { + ListCell *lc; + + /* + * Use any available suitably-sorted path as input, and also consider + * sorting the cheapest-total path and incremental sort on any paths + * with presorted keys. + * + * To save planning time, we ignore parameterized input paths unless + * they are the cheapest-total path. + */ + foreach(lc, input_rel->pathlist) + { + Path *input_path = (Path *) lfirst(lc); + Path *path; + bool is_sorted; + int presorted_keys; + + /* + * Ignore parameterized paths that are not the cheapest-total + * path. + */ + if (input_path->param_info && + input_path != cheapest_input_path) + continue; + + is_sorted = pathkeys_count_contained_in(sortPathkeys, + input_path->pathkeys, + &presorted_keys); + + /* + * Ignore paths that are not suitably or partially sorted, unless + * they are the cheapest total path (no need to deal with paths + * which have presorted keys when incremental sort is disabled). + */ + if (!is_sorted && input_path != cheapest_input_path && + (presorted_keys == 0 || !enable_incremental_sort)) + continue; + + /* + * Make a separate ProjectionPath in case we need a Result node. + */ + path = (Path *) create_projection_path(root, + unique_rel, + input_path, + unique_rel->reltarget); + + if (!is_sorted) + { + /* + * We've no need to consider both a sort and incremental sort. + * We'll just do a sort if there are no presorted keys and an + * incremental sort when there are presorted keys. + */ + if (presorted_keys == 0 || !enable_incremental_sort) + path = (Path *) create_sort_path(root, + unique_rel, + path, + sortPathkeys, + -1.0); + else + path = (Path *) create_incremental_sort_path(root, + unique_rel, + path, + sortPathkeys, + presorted_keys, + -1.0); + } + + path = (Path *) create_unique_path(root, unique_rel, path, + list_length(sortPathkeys), + unique_rel->rows); + + add_path(unique_rel, path); + } + } + + /* Consider hash-based implementation, if possible. */ + if (sjinfo->semi_can_hash) + { + Path *path; + + /* + * Make a separate ProjectionPath in case we need a Result node. + */ + path = (Path *) create_projection_path(root, + unique_rel, + cheapest_input_path, + unique_rel->reltarget); + + path = (Path *) create_agg_path(root, + unique_rel, + path, + cheapest_input_path->pathtarget, + AGG_HASHED, + AGGSPLIT_SIMPLE, + groupClause, + NIL, + NULL, + unique_rel->rows); + + add_path(unique_rel, path); + } +} + +/* + * create_partial_unique_paths + * Create unique paths in 'unique_rel' based on 'input_rel' partial_pathlist + */ +static void +create_partial_unique_paths(PlannerInfo *root, RelOptInfo *input_rel, + List *sortPathkeys, List *groupClause, + SpecialJoinInfo *sjinfo, RelOptInfo *unique_rel) +{ + RelOptInfo *partial_unique_rel; + Path *cheapest_partial_path; + + /* nothing to do when there are no partial paths in the input rel */ + if (!input_rel->consider_parallel || input_rel->partial_pathlist == NIL) + return; + + /* + * nothing to do if there's anything in the targetlist that's + * parallel-restricted. + */ + if (!is_parallel_safe(root, (Node *) unique_rel->reltarget->exprs)) + return; + + cheapest_partial_path = linitial(input_rel->partial_pathlist); + + partial_unique_rel = makeNode(RelOptInfo); + memcpy(partial_unique_rel, input_rel, sizeof(RelOptInfo)); + + /* + * clear path info + */ + partial_unique_rel->pathlist = NIL; + partial_unique_rel->ppilist = NIL; + partial_unique_rel->partial_pathlist = NIL; + partial_unique_rel->cheapest_startup_path = NULL; + partial_unique_rel->cheapest_total_path = NULL; + partial_unique_rel->cheapest_parameterized_paths = NIL; + + /* Estimate number of output rows */ + partial_unique_rel->rows = estimate_num_groups(root, + sjinfo->semi_rhs_exprs, + cheapest_partial_path->rows, + NULL, + NULL); + partial_unique_rel->reltarget = unique_rel->reltarget; + + /* Consider sort-based implementations, if possible. */ + if (sjinfo->semi_can_btree) + { + ListCell *lc; + + /* + * Use any available suitably-sorted path as input, and also consider + * sorting the cheapest partial path and incremental sort on any paths + * with presorted keys. + */ + foreach(lc, input_rel->partial_pathlist) + { + Path *input_path = (Path *) lfirst(lc); + Path *path; + bool is_sorted; + int presorted_keys; + + is_sorted = pathkeys_count_contained_in(sortPathkeys, + input_path->pathkeys, + &presorted_keys); + + /* + * Ignore paths that are not suitably or partially sorted, unless + * they are the cheapest partial path (no need to deal with paths + * which have presorted keys when incremental sort is disabled). + */ + if (!is_sorted && input_path != cheapest_partial_path && + (presorted_keys == 0 || !enable_incremental_sort)) + continue; + + /* + * Make a separate ProjectionPath in case we need a Result node. + */ + path = (Path *) create_projection_path(root, + partial_unique_rel, + input_path, + partial_unique_rel->reltarget); + + if (!is_sorted) + { + /* + * We've no need to consider both a sort and incremental sort. + * We'll just do a sort if there are no presorted keys and an + * incremental sort when there are presorted keys. + */ + if (presorted_keys == 0 || !enable_incremental_sort) + path = (Path *) create_sort_path(root, + partial_unique_rel, + path, + sortPathkeys, + -1.0); + else + path = (Path *) create_incremental_sort_path(root, + partial_unique_rel, + path, + sortPathkeys, + presorted_keys, + -1.0); + } + + path = (Path *) create_unique_path(root, partial_unique_rel, path, + list_length(sortPathkeys), + partial_unique_rel->rows); + + add_partial_path(partial_unique_rel, path); + } + } + + /* Consider hash-based implementation, if possible. */ + if (sjinfo->semi_can_hash) + { + Path *path; + + /* + * Make a separate ProjectionPath in case we need a Result node. + */ + path = (Path *) create_projection_path(root, + partial_unique_rel, + cheapest_partial_path, + partial_unique_rel->reltarget); + + path = (Path *) create_agg_path(root, + partial_unique_rel, + path, + cheapest_partial_path->pathtarget, + AGG_HASHED, + AGGSPLIT_SIMPLE, + groupClause, + NIL, + NULL, + partial_unique_rel->rows); + + add_partial_path(partial_unique_rel, path); + } + + if (partial_unique_rel->partial_pathlist != NIL) + { + generate_useful_gather_paths(root, partial_unique_rel, true); + set_cheapest(partial_unique_rel); + + /* + * Finally, create paths to unique-ify the final result. This step is + * needed to remove any duplicates due to combining rows from parallel + * workers. + */ + create_final_unique_paths(root, partial_unique_rel, + sortPathkeys, groupClause, + sjinfo, unique_rel); + } +} diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c index eab44da65b8f0..28a4ae6444068 100644 --- a/src/backend/optimizer/prep/prepunion.c +++ b/src/backend/optimizer/prep/prepunion.c @@ -929,11 +929,11 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root, make_pathkeys_for_sortclauses(root, groupList, tlist), -1.0); - path = (Path *) create_upper_unique_path(root, - result_rel, - path, - list_length(path->pathkeys), - dNumGroups); + path = (Path *) create_unique_path(root, + result_rel, + path, + list_length(path->pathkeys), + dNumGroups); add_path(result_rel, path); @@ -946,11 +946,11 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root, make_pathkeys_for_sortclauses(root, groupList, tlist), -1.0); - path = (Path *) create_upper_unique_path(root, - result_rel, - path, - list_length(path->pathkeys), - dNumGroups); + path = (Path *) create_unique_path(root, + result_rel, + path, + list_length(path->pathkeys), + dNumGroups); add_path(result_rel, path); } } @@ -970,11 +970,11 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root, NULL); /* and make the MergeAppend unique */ - path = (Path *) create_upper_unique_path(root, - result_rel, - path, - list_length(tlist), - dNumGroups); + path = (Path *) create_unique_path(root, + result_rel, + path, + list_length(tlist), + dNumGroups); add_path(result_rel, path); } diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index a4c5867cdcb84..b0da28150d32c 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -46,7 +46,6 @@ typedef enum */ #define STD_FUZZ_FACTOR 1.01 -static List *translate_sub_tlist(List *tlist, int relid); static int append_total_cost_compare(const ListCell *a, const ListCell *b); static int append_startup_cost_compare(const ListCell *a, const ListCell *b); static List *reparameterize_pathlist_by_child(PlannerInfo *root, @@ -381,7 +380,6 @@ set_cheapest(RelOptInfo *parent_rel) parent_rel->cheapest_startup_path = cheapest_startup_path; parent_rel->cheapest_total_path = cheapest_total_path; - parent_rel->cheapest_unique_path = NULL; /* computed only if needed */ parent_rel->cheapest_parameterized_paths = parameterized_paths; } @@ -1740,246 +1738,6 @@ create_memoize_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, return pathnode; } -/* - * create_unique_path - * Creates a path representing elimination of distinct rows from the - * input data. Distinct-ness is defined according to the needs of the - * semijoin represented by sjinfo. If it is not possible to identify - * how to make the data unique, NULL is returned. - * - * If used at all, this is likely to be called repeatedly on the same rel; - * and the input subpath should always be the same (the cheapest_total path - * for the rel). So we cache the result. - */ -UniquePath * -create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, - SpecialJoinInfo *sjinfo) -{ - UniquePath *pathnode; - Path sort_path; /* dummy for result of cost_sort */ - Path agg_path; /* dummy for result of cost_agg */ - MemoryContext oldcontext; - int numCols; - - /* Caller made a mistake if subpath isn't cheapest_total ... */ - Assert(subpath == rel->cheapest_total_path); - Assert(subpath->parent == rel); - /* ... or if SpecialJoinInfo is the wrong one */ - Assert(sjinfo->jointype == JOIN_SEMI); - Assert(bms_equal(rel->relids, sjinfo->syn_righthand)); - - /* If result already cached, return it */ - if (rel->cheapest_unique_path) - return (UniquePath *) rel->cheapest_unique_path; - - /* If it's not possible to unique-ify, return NULL */ - if (!(sjinfo->semi_can_btree || sjinfo->semi_can_hash)) - return NULL; - - /* - * When called during GEQO join planning, we are in a short-lived memory - * context. We must make sure that the path and any subsidiary data - * structures created for a baserel survive the GEQO cycle, else the - * baserel is trashed for future GEQO cycles. On the other hand, when we - * are creating those for a joinrel during GEQO, we don't want them to - * clutter the main planning context. Upshot is that the best solution is - * to explicitly allocate memory in the same context the given RelOptInfo - * is in. - */ - oldcontext = MemoryContextSwitchTo(GetMemoryChunkContext(rel)); - - pathnode = makeNode(UniquePath); - - pathnode->path.pathtype = T_Unique; - pathnode->path.parent = rel; - pathnode->path.pathtarget = rel->reltarget; - pathnode->path.param_info = subpath->param_info; - pathnode->path.parallel_aware = false; - pathnode->path.parallel_safe = rel->consider_parallel && - subpath->parallel_safe; - pathnode->path.parallel_workers = subpath->parallel_workers; - - /* - * Assume the output is unsorted, since we don't necessarily have pathkeys - * to represent it. (This might get overridden below.) - */ - pathnode->path.pathkeys = NIL; - - pathnode->subpath = subpath; - - /* - * Under GEQO and when planning child joins, the sjinfo might be - * short-lived, so we'd better make copies of data structures we extract - * from it. - */ - pathnode->in_operators = copyObject(sjinfo->semi_operators); - pathnode->uniq_exprs = copyObject(sjinfo->semi_rhs_exprs); - - /* - * If the input is a relation and it has a unique index that proves the - * semi_rhs_exprs are unique, then we don't need to do anything. Note - * that relation_has_unique_index_for automatically considers restriction - * clauses for the rel, as well. - */ - if (rel->rtekind == RTE_RELATION && sjinfo->semi_can_btree && - relation_has_unique_index_for(root, rel, NIL, - sjinfo->semi_rhs_exprs, - sjinfo->semi_operators)) - { - pathnode->umethod = UNIQUE_PATH_NOOP; - pathnode->path.rows = rel->rows; - pathnode->path.disabled_nodes = subpath->disabled_nodes; - pathnode->path.startup_cost = subpath->startup_cost; - pathnode->path.total_cost = subpath->total_cost; - pathnode->path.pathkeys = subpath->pathkeys; - - rel->cheapest_unique_path = (Path *) pathnode; - - MemoryContextSwitchTo(oldcontext); - - return pathnode; - } - - /* - * If the input is a subquery whose output must be unique already, then we - * don't need to do anything. The test for uniqueness has to consider - * exactly which columns we are extracting; for example "SELECT DISTINCT - * x,y" doesn't guarantee that x alone is distinct. So we cannot check for - * this optimization unless semi_rhs_exprs consists only of simple Vars - * referencing subquery outputs. (Possibly we could do something with - * expressions in the subquery outputs, too, but for now keep it simple.) - */ - if (rel->rtekind == RTE_SUBQUERY) - { - RangeTblEntry *rte = planner_rt_fetch(rel->relid, root); - - if (query_supports_distinctness(rte->subquery)) - { - List *sub_tlist_colnos; - - sub_tlist_colnos = translate_sub_tlist(sjinfo->semi_rhs_exprs, - rel->relid); - - if (sub_tlist_colnos && - query_is_distinct_for(rte->subquery, - sub_tlist_colnos, - sjinfo->semi_operators)) - { - pathnode->umethod = UNIQUE_PATH_NOOP; - pathnode->path.rows = rel->rows; - pathnode->path.disabled_nodes = subpath->disabled_nodes; - pathnode->path.startup_cost = subpath->startup_cost; - pathnode->path.total_cost = subpath->total_cost; - pathnode->path.pathkeys = subpath->pathkeys; - - rel->cheapest_unique_path = (Path *) pathnode; - - MemoryContextSwitchTo(oldcontext); - - return pathnode; - } - } - } - - /* Estimate number of output rows */ - pathnode->path.rows = estimate_num_groups(root, - sjinfo->semi_rhs_exprs, - rel->rows, - NULL, - NULL); - numCols = list_length(sjinfo->semi_rhs_exprs); - - if (sjinfo->semi_can_btree) - { - /* - * Estimate cost for sort+unique implementation - */ - cost_sort(&sort_path, root, NIL, - subpath->disabled_nodes, - subpath->total_cost, - rel->rows, - subpath->pathtarget->width, - 0.0, - work_mem, - -1.0); - - /* - * Charge one cpu_operator_cost per comparison per input tuple. We - * assume all columns get compared at most of the tuples. (XXX - * probably this is an overestimate.) This should agree with - * create_upper_unique_path. - */ - sort_path.total_cost += cpu_operator_cost * rel->rows * numCols; - } - - if (sjinfo->semi_can_hash) - { - /* - * Estimate the overhead per hashtable entry at 64 bytes (same as in - * planner.c). - */ - int hashentrysize = subpath->pathtarget->width + 64; - - if (hashentrysize * pathnode->path.rows > get_hash_memory_limit()) - { - /* - * We should not try to hash. Hack the SpecialJoinInfo to - * remember this, in case we come through here again. - */ - sjinfo->semi_can_hash = false; - } - else - cost_agg(&agg_path, root, - AGG_HASHED, NULL, - numCols, pathnode->path.rows, - NIL, - subpath->disabled_nodes, - subpath->startup_cost, - subpath->total_cost, - rel->rows, - subpath->pathtarget->width); - } - - if (sjinfo->semi_can_btree && sjinfo->semi_can_hash) - { - if (agg_path.disabled_nodes < sort_path.disabled_nodes || - (agg_path.disabled_nodes == sort_path.disabled_nodes && - agg_path.total_cost < sort_path.total_cost)) - pathnode->umethod = UNIQUE_PATH_HASH; - else - pathnode->umethod = UNIQUE_PATH_SORT; - } - else if (sjinfo->semi_can_btree) - pathnode->umethod = UNIQUE_PATH_SORT; - else if (sjinfo->semi_can_hash) - pathnode->umethod = UNIQUE_PATH_HASH; - else - { - /* we can get here only if we abandoned hashing above */ - MemoryContextSwitchTo(oldcontext); - return NULL; - } - - if (pathnode->umethod == UNIQUE_PATH_HASH) - { - pathnode->path.disabled_nodes = agg_path.disabled_nodes; - pathnode->path.startup_cost = agg_path.startup_cost; - pathnode->path.total_cost = agg_path.total_cost; - } - else - { - pathnode->path.disabled_nodes = sort_path.disabled_nodes; - pathnode->path.startup_cost = sort_path.startup_cost; - pathnode->path.total_cost = sort_path.total_cost; - } - - rel->cheapest_unique_path = (Path *) pathnode; - - MemoryContextSwitchTo(oldcontext); - - return pathnode; -} - /* * create_gather_merge_path * @@ -2031,36 +1789,6 @@ create_gather_merge_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, return pathnode; } -/* - * translate_sub_tlist - get subquery column numbers represented by tlist - * - * The given targetlist usually contains only Vars referencing the given relid. - * Extract their varattnos (ie, the column numbers of the subquery) and return - * as an integer List. - * - * If any of the tlist items is not a simple Var, we cannot determine whether - * the subquery's uniqueness condition (if any) matches ours, so punt and - * return NIL. - */ -static List * -translate_sub_tlist(List *tlist, int relid) -{ - List *result = NIL; - ListCell *l; - - foreach(l, tlist) - { - Var *var = (Var *) lfirst(l); - - if (!var || !IsA(var, Var) || - var->varno != relid) - return NIL; /* punt */ - - result = lappend_int(result, var->varattno); - } - return result; -} - /* * create_gather_path * Creates a path corresponding to a gather scan, returning the @@ -2818,8 +2546,7 @@ create_projection_path(PlannerInfo *root, pathnode->path.pathtype = T_Result; pathnode->path.parent = rel; pathnode->path.pathtarget = target; - /* For now, assume we are above any joins, so no parameterization */ - pathnode->path.param_info = NULL; + pathnode->path.param_info = subpath->param_info; pathnode->path.parallel_aware = false; pathnode->path.parallel_safe = rel->consider_parallel && subpath->parallel_safe && @@ -3074,8 +2801,7 @@ create_incremental_sort_path(PlannerInfo *root, pathnode->path.parent = rel; /* Sort doesn't project, so use source path's pathtarget */ pathnode->path.pathtarget = subpath->pathtarget; - /* For now, assume we are above any joins, so no parameterization */ - pathnode->path.param_info = NULL; + pathnode->path.param_info = subpath->param_info; pathnode->path.parallel_aware = false; pathnode->path.parallel_safe = rel->consider_parallel && subpath->parallel_safe; @@ -3122,8 +2848,7 @@ create_sort_path(PlannerInfo *root, pathnode->path.parent = rel; /* Sort doesn't project, so use source path's pathtarget */ pathnode->path.pathtarget = subpath->pathtarget; - /* For now, assume we are above any joins, so no parameterization */ - pathnode->path.param_info = NULL; + pathnode->path.param_info = subpath->param_info; pathnode->path.parallel_aware = false; pathnode->path.parallel_safe = rel->consider_parallel && subpath->parallel_safe; @@ -3199,13 +2924,10 @@ create_group_path(PlannerInfo *root, } /* - * create_upper_unique_path + * create_unique_path * Creates a pathnode that represents performing an explicit Unique step * on presorted input. * - * This produces a Unique plan node, but the use-case is so different from - * create_unique_path that it doesn't seem worth trying to merge the two. - * * 'rel' is the parent relation associated with the result * 'subpath' is the path representing the source of data * 'numCols' is the number of grouping columns @@ -3214,21 +2936,20 @@ create_group_path(PlannerInfo *root, * The input path must be sorted on the grouping columns, plus possibly * additional columns; so the first numCols pathkeys are the grouping columns */ -UpperUniquePath * -create_upper_unique_path(PlannerInfo *root, - RelOptInfo *rel, - Path *subpath, - int numCols, - double numGroups) +UniquePath * +create_unique_path(PlannerInfo *root, + RelOptInfo *rel, + Path *subpath, + int numCols, + double numGroups) { - UpperUniquePath *pathnode = makeNode(UpperUniquePath); + UniquePath *pathnode = makeNode(UniquePath); pathnode->path.pathtype = T_Unique; pathnode->path.parent = rel; /* Unique doesn't project, so use source path's pathtarget */ pathnode->path.pathtarget = subpath->pathtarget; - /* For now, assume we are above any joins, so no parameterization */ - pathnode->path.param_info = NULL; + pathnode->path.param_info = subpath->param_info; pathnode->path.parallel_aware = false; pathnode->path.parallel_safe = rel->consider_parallel && subpath->parallel_safe; @@ -3284,8 +3005,7 @@ create_agg_path(PlannerInfo *root, pathnode->path.pathtype = T_Agg; pathnode->path.parent = rel; pathnode->path.pathtarget = target; - /* For now, assume we are above any joins, so no parameterization */ - pathnode->path.param_info = NULL; + pathnode->path.param_info = subpath->param_info; pathnode->path.parallel_aware = false; pathnode->path.parallel_safe = rel->consider_parallel && subpath->parallel_safe; diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index ff507331a061a..0e523d2eb5b44 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -217,7 +217,6 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent) rel->partial_pathlist = NIL; rel->cheapest_startup_path = NULL; rel->cheapest_total_path = NULL; - rel->cheapest_unique_path = NULL; rel->cheapest_parameterized_paths = NIL; rel->relid = relid; rel->rtekind = rte->rtekind; @@ -269,6 +268,9 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent) rel->fdw_private = NULL; rel->unique_for_rels = NIL; rel->non_unique_for_rels = NIL; + rel->unique_rel = NULL; + rel->unique_pathkeys = NIL; + rel->unique_groupclause = NIL; rel->baserestrictinfo = NIL; rel->baserestrictcost.startup = 0; rel->baserestrictcost.per_tuple = 0; @@ -713,7 +715,6 @@ build_join_rel(PlannerInfo *root, joinrel->partial_pathlist = NIL; joinrel->cheapest_startup_path = NULL; joinrel->cheapest_total_path = NULL; - joinrel->cheapest_unique_path = NULL; joinrel->cheapest_parameterized_paths = NIL; /* init direct_lateral_relids from children; we'll finish it up below */ joinrel->direct_lateral_relids = @@ -748,6 +749,9 @@ build_join_rel(PlannerInfo *root, joinrel->fdw_private = NULL; joinrel->unique_for_rels = NIL; joinrel->non_unique_for_rels = NIL; + joinrel->unique_rel = NULL; + joinrel->unique_pathkeys = NIL; + joinrel->unique_groupclause = NIL; joinrel->baserestrictinfo = NIL; joinrel->baserestrictcost.startup = 0; joinrel->baserestrictcost.per_tuple = 0; @@ -906,7 +910,6 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel, joinrel->partial_pathlist = NIL; joinrel->cheapest_startup_path = NULL; joinrel->cheapest_total_path = NULL; - joinrel->cheapest_unique_path = NULL; joinrel->cheapest_parameterized_paths = NIL; joinrel->direct_lateral_relids = NULL; joinrel->lateral_relids = NULL; @@ -933,6 +936,9 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel, joinrel->useridiscurrent = false; joinrel->fdwroutine = NULL; joinrel->fdw_private = NULL; + joinrel->unique_rel = NULL; + joinrel->unique_pathkeys = NIL; + joinrel->unique_groupclause = NIL; joinrel->baserestrictinfo = NIL; joinrel->baserestrictcost.startup = 0; joinrel->baserestrictcost.per_tuple = 0; @@ -1488,7 +1494,6 @@ fetch_upper_rel(PlannerInfo *root, UpperRelationKind kind, Relids relids) upperrel->pathlist = NIL; upperrel->cheapest_startup_path = NULL; upperrel->cheapest_total_path = NULL; - upperrel->cheapest_unique_path = NULL; upperrel->cheapest_parameterized_paths = NIL; root->upper_rels[kind] = lappend(root->upper_rels[kind], upperrel); diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index b2dc380b57b10..fb3957e75e5f1 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -323,8 +323,8 @@ typedef enum JoinType * These codes are used internally in the planner, but are not supported * by the executor (nor, indeed, by most of the planner). */ - JOIN_UNIQUE_OUTER, /* LHS path must be made unique */ - JOIN_UNIQUE_INNER, /* RHS path must be made unique */ + JOIN_UNIQUE_OUTER, /* LHS has be made unique */ + JOIN_UNIQUE_INNER, /* RHS has be made unique */ /* * We might need additional join types someday. diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index ad2726f026f7d..4a903d1ec1832 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -703,8 +703,6 @@ typedef struct PartitionSchemeData *PartitionScheme; * (regardless of ordering) among the unparameterized paths; * or if there is no unparameterized path, the path with lowest * total cost among the paths with minimum parameterization - * cheapest_unique_path - for caching cheapest path to produce unique - * (no duplicates) output from relation; NULL if not yet requested * cheapest_parameterized_paths - best paths for their parameterizations; * always includes cheapest_total_path, even if that's unparameterized * direct_lateral_relids - rels this rel has direct LATERAL references to @@ -770,6 +768,21 @@ typedef struct PartitionSchemeData *PartitionScheme; * other rels for which we have tried and failed to prove * this one unique * + * Three fields are used to cache information about unique-ification of this + * relation. This is used to support semijoins where the relation appears on + * the RHS: the relation is first unique-ified, and then a regular join is + * performed: + * + * unique_rel - the unique-ified version of the relation, containing paths + * that produce unique (no duplicates) output from relation; + * NULL if not yet requested + * unique_pathkeys - pathkeys that represent the ordering requirements for + * the relation's output in sort-based unique-ification + * implementations + * unique_groupclause - a list of SortGroupClause nodes that represent the + * columns to be grouped on in hash-based unique-ification + * implementations + * * The presence of the following fields depends on the restrictions * and joins that the relation participates in: * @@ -930,7 +943,6 @@ typedef struct RelOptInfo List *partial_pathlist; /* partial Paths */ struct Path *cheapest_startup_path; struct Path *cheapest_total_path; - struct Path *cheapest_unique_path; List *cheapest_parameterized_paths; /* @@ -1004,6 +1016,16 @@ typedef struct RelOptInfo /* known not unique for these set(s) */ List *non_unique_for_rels; + /* + * information about unique-ification of this relation + */ + /* the unique-ified version of the relation */ + struct RelOptInfo *unique_rel; + /* pathkeys for sort-based unique-ification implementations */ + List *unique_pathkeys; + /* SortGroupClause nodes for hash-based unique-ification implementations */ + List *unique_groupclause; + /* * used by various scans and joins: */ @@ -1097,6 +1119,17 @@ typedef struct RelOptInfo ((rel)->part_scheme && (rel)->boundinfo && (rel)->nparts > 0 && \ (rel)->part_rels && (rel)->partexprs && (rel)->nullable_partexprs) +/* + * Is given relation unique-ified? + * + * When the nominal jointype is JOIN_INNER, sjinfo->jointype is JOIN_SEMI, and + * the given rel is exactly the RHS of the semijoin, it indicates that the rel + * has been unique-ified. + */ +#define RELATION_WAS_MADE_UNIQUE(rel, sjinfo, nominal_jointype) \ + ((nominal_jointype) == JOIN_INNER && (sjinfo)->jointype == JOIN_SEMI && \ + bms_equal((sjinfo)->syn_righthand, (rel)->relids)) + /* * IndexOptInfo * Per-index information for planning/optimization @@ -1741,8 +1774,8 @@ typedef struct ParamPathInfo * and the specified outer rel(s). * * "rows" is the same as parent->rows in simple paths, but in parameterized - * paths and UniquePaths it can be less than parent->rows, reflecting the - * fact that we've filtered by extra join conditions or removed duplicates. + * paths it can be less than parent->rows, reflecting the fact that we've + * filtered by extra join conditions. * * "pathkeys" is a List of PathKey nodes (see above), describing the sort * ordering of the path's output rows. @@ -2141,34 +2174,6 @@ typedef struct MemoizePath double est_hit_ratio; /* estimated cache hit ratio, for EXPLAIN */ } MemoizePath; -/* - * UniquePath represents elimination of distinct rows from the output of - * its subpath. - * - * This can represent significantly different plans: either hash-based or - * sort-based implementation, or a no-op if the input path can be proven - * distinct already. The decision is sufficiently localized that it's not - * worth having separate Path node types. (Note: in the no-op case, we could - * eliminate the UniquePath node entirely and just return the subpath; but - * it's convenient to have a UniquePath in the path tree to signal upper-level - * routines that the input is known distinct.) - */ -typedef enum UniquePathMethod -{ - UNIQUE_PATH_NOOP, /* input is known unique already */ - UNIQUE_PATH_HASH, /* use hashing */ - UNIQUE_PATH_SORT, /* use sorting */ -} UniquePathMethod; - -typedef struct UniquePath -{ - Path path; - Path *subpath; - UniquePathMethod umethod; - List *in_operators; /* equality operators of the IN clause */ - List *uniq_exprs; /* expressions to be made unique */ -} UniquePath; - /* * GatherPath runs several copies of a plan in parallel and collects the * results. The parallel leader may also execute the plan, unless the @@ -2375,17 +2380,17 @@ typedef struct GroupPath } GroupPath; /* - * UpperUniquePath represents adjacent-duplicate removal (in presorted input) + * UniquePath represents adjacent-duplicate removal (in presorted input) * * The columns to be compared are the first numkeys columns of the path's * pathkeys. The input is presumed already sorted that way. */ -typedef struct UpperUniquePath +typedef struct UniquePath { Path path; Path *subpath; /* path representing input source */ int numkeys; /* number of pathkey columns to compare */ -} UpperUniquePath; +} UniquePath; /* * AggPath represents generic computation of aggregate functions diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h index 58936e963cb6b..763cd25bb3c9a 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -91,8 +91,6 @@ extern MemoizePath *create_memoize_path(PlannerInfo *root, bool singlerow, bool binary_mode, Cardinality est_calls); -extern UniquePath *create_unique_path(PlannerInfo *root, RelOptInfo *rel, - Path *subpath, SpecialJoinInfo *sjinfo); extern GatherPath *create_gather_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, PathTarget *target, Relids required_outer, double *rows); @@ -223,11 +221,11 @@ extern GroupPath *create_group_path(PlannerInfo *root, List *groupClause, List *qual, double numGroups); -extern UpperUniquePath *create_upper_unique_path(PlannerInfo *root, - RelOptInfo *rel, - Path *subpath, - int numCols, - double numGroups); +extern UniquePath *create_unique_path(PlannerInfo *root, + RelOptInfo *rel, + Path *subpath, + int numCols, + double numGroups); extern AggPath *create_agg_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, diff --git a/src/include/optimizer/planner.h b/src/include/optimizer/planner.h index 347c582a78927..f220e9a270d5c 100644 --- a/src/include/optimizer/planner.h +++ b/src/include/optimizer/planner.h @@ -59,4 +59,7 @@ extern Path *get_cheapest_fractional_path(RelOptInfo *rel, extern Expr *preprocess_phv_expression(PlannerInfo *root, Expr *expr); +extern RelOptInfo *create_unique_paths(PlannerInfo *root, RelOptInfo *rel, + SpecialJoinInfo *sjinfo); + #endif /* PLANNER_H */ diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index 4d5d35d07270d..98b05c94a1195 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -9468,23 +9468,20 @@ where exists (select 1 from tenk1 t3 --------------------------------------------------------------------------------- Nested Loop Output: t1.unique1, t2.hundred - -> Hash Join + -> Merge Join Output: t1.unique1, t3.tenthous - Hash Cond: (t3.thousand = t1.unique1) - -> HashAggregate + Merge Cond: (t3.thousand = t1.unique1) + -> Unique Output: t3.thousand, t3.tenthous - Group Key: t3.thousand, t3.tenthous -> Index Only Scan using tenk1_thous_tenthous on public.tenk1 t3 Output: t3.thousand, t3.tenthous - -> Hash + -> Index Only Scan using onek_unique1 on public.onek t1 Output: t1.unique1 - -> Index Only Scan using onek_unique1 on public.onek t1 - Output: t1.unique1 - Index Cond: (t1.unique1 < 1) + Index Cond: (t1.unique1 < 1) -> Index Only Scan using tenk1_hundred on public.tenk1 t2 Output: t2.hundred Index Cond: (t2.hundred = t3.tenthous) -(18 rows) +(15 rows) -- ... unless it actually is unique create table j3 as select unique1, tenthous from onek; diff --git a/src/test/regress/expected/partition_join.out b/src/test/regress/expected/partition_join.out index d5368186caa9f..24e06845f921e 100644 --- a/src/test/regress/expected/partition_join.out +++ b/src/test/regress/expected/partition_join.out @@ -1134,48 +1134,50 @@ EXPLAIN (COSTS OFF) SELECT t1.* FROM prt1 t1 WHERE t1.a IN (SELECT t1.b FROM prt2 t1, prt1_e t2 WHERE t1.a = 0 AND t1.b = (t2.a + t2.b)/2) AND t1.b = 0 ORDER BY t1.a; QUERY PLAN --------------------------------------------------------------------------------- - Sort + Merge Append Sort Key: t1.a - -> Append - -> Nested Loop - Join Filter: (t1_2.a = t1_5.b) - -> HashAggregate - Group Key: t1_5.b + -> Nested Loop + Join Filter: (t1_2.a = t1_5.b) + -> Unique + -> Sort + Sort Key: t1_5.b -> Hash Join Hash Cond: (((t2_1.a + t2_1.b) / 2) = t1_5.b) -> Seq Scan on prt1_e_p1 t2_1 -> Hash -> Seq Scan on prt2_p1 t1_5 Filter: (a = 0) - -> Index Scan using iprt1_p1_a on prt1_p1 t1_2 - Index Cond: (a = ((t2_1.a + t2_1.b) / 2)) - Filter: (b = 0) - -> Nested Loop - Join Filter: (t1_3.a = t1_6.b) - -> HashAggregate - Group Key: t1_6.b + -> Index Scan using iprt1_p1_a on prt1_p1 t1_2 + Index Cond: (a = ((t2_1.a + t2_1.b) / 2)) + Filter: (b = 0) + -> Nested Loop + Join Filter: (t1_3.a = t1_6.b) + -> Unique + -> Sort + Sort Key: t1_6.b -> Hash Join Hash Cond: (((t2_2.a + t2_2.b) / 2) = t1_6.b) -> Seq Scan on prt1_e_p2 t2_2 -> Hash -> Seq Scan on prt2_p2 t1_6 Filter: (a = 0) - -> Index Scan using iprt1_p2_a on prt1_p2 t1_3 - Index Cond: (a = ((t2_2.a + t2_2.b) / 2)) - Filter: (b = 0) - -> Nested Loop - Join Filter: (t1_4.a = t1_7.b) - -> HashAggregate - Group Key: t1_7.b + -> Index Scan using iprt1_p2_a on prt1_p2 t1_3 + Index Cond: (a = ((t2_2.a + t2_2.b) / 2)) + Filter: (b = 0) + -> Nested Loop + Join Filter: (t1_4.a = t1_7.b) + -> Unique + -> Sort + Sort Key: t1_7.b -> Nested Loop -> Seq Scan on prt2_p3 t1_7 Filter: (a = 0) -> Index Scan using iprt1_e_p3_ab2 on prt1_e_p3 t2_3 Index Cond: (((a + b) / 2) = t1_7.b) - -> Index Scan using iprt1_p3_a on prt1_p3 t1_4 - Index Cond: (a = ((t2_3.a + t2_3.b) / 2)) - Filter: (b = 0) -(41 rows) + -> Index Scan using iprt1_p3_a on prt1_p3 t1_4 + Index Cond: (a = ((t2_3.a + t2_3.b) / 2)) + Filter: (b = 0) +(43 rows) SELECT t1.* FROM prt1 t1 WHERE t1.a IN (SELECT t1.b FROM prt2 t1, prt1_e t2 WHERE t1.a = 0 AND t1.b = (t2.a + t2.b)/2) AND t1.b = 0 ORDER BY t1.a; a | b | c @@ -1190,46 +1192,48 @@ EXPLAIN (COSTS OFF) SELECT t1.* FROM prt1 t1 WHERE t1.a IN (SELECT t1.b FROM prt2 t1 WHERE t1.b IN (SELECT (t1.a + t1.b)/2 FROM prt1_e t1 WHERE t1.c = 0)) AND t1.b = 0 ORDER BY t1.a; QUERY PLAN --------------------------------------------------------------------------- - Sort + Merge Append Sort Key: t1.a - -> Append - -> Nested Loop - -> HashAggregate - Group Key: t1_6.b + -> Nested Loop + -> Unique + -> Sort + Sort Key: t1_6.b -> Hash Semi Join Hash Cond: (t1_6.b = ((t1_9.a + t1_9.b) / 2)) -> Seq Scan on prt2_p1 t1_6 -> Hash -> Seq Scan on prt1_e_p1 t1_9 Filter: (c = 0) - -> Index Scan using iprt1_p1_a on prt1_p1 t1_3 - Index Cond: (a = t1_6.b) - Filter: (b = 0) - -> Nested Loop - -> HashAggregate - Group Key: t1_7.b + -> Index Scan using iprt1_p1_a on prt1_p1 t1_3 + Index Cond: (a = t1_6.b) + Filter: (b = 0) + -> Nested Loop + -> Unique + -> Sort + Sort Key: t1_7.b -> Hash Semi Join Hash Cond: (t1_7.b = ((t1_10.a + t1_10.b) / 2)) -> Seq Scan on prt2_p2 t1_7 -> Hash -> Seq Scan on prt1_e_p2 t1_10 Filter: (c = 0) - -> Index Scan using iprt1_p2_a on prt1_p2 t1_4 - Index Cond: (a = t1_7.b) - Filter: (b = 0) - -> Nested Loop - -> HashAggregate - Group Key: t1_8.b + -> Index Scan using iprt1_p2_a on prt1_p2 t1_4 + Index Cond: (a = t1_7.b) + Filter: (b = 0) + -> Nested Loop + -> Unique + -> Sort + Sort Key: t1_8.b -> Hash Semi Join Hash Cond: (t1_8.b = ((t1_11.a + t1_11.b) / 2)) -> Seq Scan on prt2_p3 t1_8 -> Hash -> Seq Scan on prt1_e_p3 t1_11 Filter: (c = 0) - -> Index Scan using iprt1_p3_a on prt1_p3 t1_5 - Index Cond: (a = t1_8.b) - Filter: (b = 0) -(39 rows) + -> Index Scan using iprt1_p3_a on prt1_p3 t1_5 + Index Cond: (a = t1_8.b) + Filter: (b = 0) +(41 rows) SELECT t1.* FROM prt1 t1 WHERE t1.a IN (SELECT t1.b FROM prt2 t1 WHERE t1.b IN (SELECT (t1.a + t1.b)/2 FROM prt1_e t1 WHERE t1.c = 0)) AND t1.b = 0 ORDER BY t1.a; a | b | c diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out index 18fed63e7381a..0563d0cd5a199 100644 --- a/src/test/regress/expected/subselect.out +++ b/src/test/regress/expected/subselect.out @@ -707,6 +707,212 @@ select * from numeric_table 3 (4 rows) +-- +-- Test that a semijoin implemented by unique-ifying the RHS can explore +-- different paths of the RHS rel. +-- +create table semijoin_unique_tbl (a int, b int); +insert into semijoin_unique_tbl select i%10, i%10 from generate_series(1,1000)i; +create index on semijoin_unique_tbl(a, b); +analyze semijoin_unique_tbl; +-- Ensure that we get a plan with Unique + IndexScan +explain (verbose, costs off) +select * from semijoin_unique_tbl t1, semijoin_unique_tbl t2 +where (t1.a, t2.a) in (select a, b from semijoin_unique_tbl t3) +order by t1.a, t2.a; + QUERY PLAN +------------------------------------------------------------------------------------------------------ + Nested Loop + Output: t1.a, t1.b, t2.a, t2.b + -> Merge Join + Output: t1.a, t1.b, t3.b + Merge Cond: (t3.a = t1.a) + -> Unique + Output: t3.a, t3.b + -> Index Only Scan using semijoin_unique_tbl_a_b_idx on public.semijoin_unique_tbl t3 + Output: t3.a, t3.b + -> Index Only Scan using semijoin_unique_tbl_a_b_idx on public.semijoin_unique_tbl t1 + Output: t1.a, t1.b + -> Memoize + Output: t2.a, t2.b + Cache Key: t3.b + Cache Mode: logical + -> Index Only Scan using semijoin_unique_tbl_a_b_idx on public.semijoin_unique_tbl t2 + Output: t2.a, t2.b + Index Cond: (t2.a = t3.b) +(18 rows) + +-- Ensure that we can unique-ify expressions more complex than plain Vars +explain (verbose, costs off) +select * from semijoin_unique_tbl t1, semijoin_unique_tbl t2 +where (t1.a, t2.a) in (select a+1, b+1 from semijoin_unique_tbl t3) +order by t1.a, t2.a; + QUERY PLAN +------------------------------------------------------------------------------------------------ + Incremental Sort + Output: t1.a, t1.b, t2.a, t2.b + Sort Key: t1.a, t2.a + Presorted Key: t1.a + -> Merge Join + Output: t1.a, t1.b, t2.a, t2.b + Merge Cond: (t1.a = ((t3.a + 1))) + -> Index Only Scan using semijoin_unique_tbl_a_b_idx on public.semijoin_unique_tbl t1 + Output: t1.a, t1.b + -> Sort + Output: t2.a, t2.b, t3.a, ((t3.a + 1)) + Sort Key: ((t3.a + 1)) + -> Hash Join + Output: t2.a, t2.b, t3.a, (t3.a + 1) + Hash Cond: (t2.a = (t3.b + 1)) + -> Seq Scan on public.semijoin_unique_tbl t2 + Output: t2.a, t2.b + -> Hash + Output: t3.a, t3.b + -> HashAggregate + Output: t3.a, t3.b + Group Key: (t3.a + 1), (t3.b + 1) + -> Seq Scan on public.semijoin_unique_tbl t3 + Output: t3.a, t3.b, (t3.a + 1), (t3.b + 1) +(24 rows) + +-- encourage use of parallel plans +set parallel_setup_cost=0; +set parallel_tuple_cost=0; +set min_parallel_table_scan_size=0; +set max_parallel_workers_per_gather=4; +set enable_indexscan to off; +-- Ensure that we get a parallel plan for the unique-ification +explain (verbose, costs off) +select * from semijoin_unique_tbl t1, semijoin_unique_tbl t2 +where (t1.a, t2.a) in (select a, b from semijoin_unique_tbl t3) +order by t1.a, t2.a; + QUERY PLAN +---------------------------------------------------------------------------------------- + Nested Loop + Output: t1.a, t1.b, t2.a, t2.b + -> Merge Join + Output: t1.a, t1.b, t3.b + Merge Cond: (t3.a = t1.a) + -> Unique + Output: t3.a, t3.b + -> Gather Merge + Output: t3.a, t3.b + Workers Planned: 2 + -> Sort + Output: t3.a, t3.b + Sort Key: t3.a, t3.b + -> HashAggregate + Output: t3.a, t3.b + Group Key: t3.a, t3.b + -> Parallel Seq Scan on public.semijoin_unique_tbl t3 + Output: t3.a, t3.b + -> Materialize + Output: t1.a, t1.b + -> Gather Merge + Output: t1.a, t1.b + Workers Planned: 2 + -> Sort + Output: t1.a, t1.b + Sort Key: t1.a + -> Parallel Seq Scan on public.semijoin_unique_tbl t1 + Output: t1.a, t1.b + -> Memoize + Output: t2.a, t2.b + Cache Key: t3.b + Cache Mode: logical + -> Bitmap Heap Scan on public.semijoin_unique_tbl t2 + Output: t2.a, t2.b + Recheck Cond: (t2.a = t3.b) + -> Bitmap Index Scan on semijoin_unique_tbl_a_b_idx + Index Cond: (t2.a = t3.b) +(37 rows) + +reset enable_indexscan; +reset max_parallel_workers_per_gather; +reset min_parallel_table_scan_size; +reset parallel_tuple_cost; +reset parallel_setup_cost; +drop table semijoin_unique_tbl; +create table unique_tbl_p (a int, b int) partition by range(a); +create table unique_tbl_p1 partition of unique_tbl_p for values from (0) to (5); +create table unique_tbl_p2 partition of unique_tbl_p for values from (5) to (10); +create table unique_tbl_p3 partition of unique_tbl_p for values from (10) to (20); +insert into unique_tbl_p select i%12, i from generate_series(0, 1000)i; +create index on unique_tbl_p1(a); +create index on unique_tbl_p2(a); +create index on unique_tbl_p3(a); +analyze unique_tbl_p; +set enable_partitionwise_join to on; +-- Ensure that the unique-ification works for partition-wise join +explain (verbose, costs off) +select * from unique_tbl_p t1, unique_tbl_p t2 +where (t1.a, t2.a) in (select a, a from unique_tbl_p t3) +order by t1.a, t2.a; + QUERY PLAN +------------------------------------------------------------------------------------------------ + Merge Append + Sort Key: t1.a + -> Nested Loop + Output: t1_1.a, t1_1.b, t2_1.a, t2_1.b + -> Nested Loop + Output: t1_1.a, t1_1.b, t3_1.a + -> Unique + Output: t3_1.a + -> Index Only Scan using unique_tbl_p1_a_idx on public.unique_tbl_p1 t3_1 + Output: t3_1.a + -> Index Scan using unique_tbl_p1_a_idx on public.unique_tbl_p1 t1_1 + Output: t1_1.a, t1_1.b + Index Cond: (t1_1.a = t3_1.a) + -> Memoize + Output: t2_1.a, t2_1.b + Cache Key: t1_1.a + Cache Mode: logical + -> Index Scan using unique_tbl_p1_a_idx on public.unique_tbl_p1 t2_1 + Output: t2_1.a, t2_1.b + Index Cond: (t2_1.a = t1_1.a) + -> Nested Loop + Output: t1_2.a, t1_2.b, t2_2.a, t2_2.b + -> Nested Loop + Output: t1_2.a, t1_2.b, t3_2.a + -> Unique + Output: t3_2.a + -> Index Only Scan using unique_tbl_p2_a_idx on public.unique_tbl_p2 t3_2 + Output: t3_2.a + -> Index Scan using unique_tbl_p2_a_idx on public.unique_tbl_p2 t1_2 + Output: t1_2.a, t1_2.b + Index Cond: (t1_2.a = t3_2.a) + -> Memoize + Output: t2_2.a, t2_2.b + Cache Key: t1_2.a + Cache Mode: logical + -> Index Scan using unique_tbl_p2_a_idx on public.unique_tbl_p2 t2_2 + Output: t2_2.a, t2_2.b + Index Cond: (t2_2.a = t1_2.a) + -> Nested Loop + Output: t1_3.a, t1_3.b, t2_3.a, t2_3.b + -> Nested Loop + Output: t1_3.a, t1_3.b, t3_3.a + -> Unique + Output: t3_3.a + -> Sort + Output: t3_3.a + Sort Key: t3_3.a + -> Seq Scan on public.unique_tbl_p3 t3_3 + Output: t3_3.a + -> Index Scan using unique_tbl_p3_a_idx on public.unique_tbl_p3 t1_3 + Output: t1_3.a, t1_3.b + Index Cond: (t1_3.a = t3_3.a) + -> Memoize + Output: t2_3.a, t2_3.b + Cache Key: t1_3.a + Cache Mode: logical + -> Index Scan using unique_tbl_p3_a_idx on public.unique_tbl_p3 t2_3 + Output: t2_3.a, t2_3.b + Index Cond: (t2_3.a = t1_3.a) +(59 rows) + +reset enable_partitionwise_join; +drop table unique_tbl_p; -- -- Test case for bug #4290: bogus calculation of subplan param sets -- @@ -2672,18 +2878,17 @@ EXPLAIN (COSTS OFF) SELECT * FROM onek WHERE (unique1,ten) IN (VALUES (1,1), (20,0), (99,9), (17,99)) ORDER BY unique1; - QUERY PLAN ------------------------------------------------------------------ - Sort - Sort Key: onek.unique1 - -> Nested Loop - -> HashAggregate - Group Key: "*VALUES*".column1, "*VALUES*".column2 + QUERY PLAN +---------------------------------------------------------------- + Nested Loop + -> Unique + -> Sort + Sort Key: "*VALUES*".column1, "*VALUES*".column2 -> Values Scan on "*VALUES*" - -> Index Scan using onek_unique1 on onek - Index Cond: (unique1 = "*VALUES*".column1) - Filter: ("*VALUES*".column2 = ten) -(9 rows) + -> Index Scan using onek_unique1 on onek + Index Cond: (unique1 = "*VALUES*".column1) + Filter: ("*VALUES*".column2 = ten) +(8 rows) EXPLAIN (COSTS OFF) SELECT * FROM onek @@ -2858,12 +3063,10 @@ SELECT ten FROM onek WHERE unique1 IN (VALUES (1), (2) ORDER BY 1); -> Unique -> Sort Sort Key: "*VALUES*".column1 - -> Sort - Sort Key: "*VALUES*".column1 - -> Values Scan on "*VALUES*" + -> Values Scan on "*VALUES*" -> Index Scan using onek_unique1 on onek Index Cond: (unique1 = "*VALUES*".column1) -(9 rows) +(7 rows) EXPLAIN (COSTS OFF) SELECT ten FROM onek WHERE unique1 IN (VALUES (1), (2) LIMIT 1); diff --git a/src/test/regress/sql/subselect.sql b/src/test/regress/sql/subselect.sql index d9a841fbc9ffd..a6d276a115b2e 100644 --- a/src/test/regress/sql/subselect.sql +++ b/src/test/regress/sql/subselect.sql @@ -361,6 +361,73 @@ select * from float_table select * from numeric_table where num_col in (select float_col from float_table); +-- +-- Test that a semijoin implemented by unique-ifying the RHS can explore +-- different paths of the RHS rel. +-- + +create table semijoin_unique_tbl (a int, b int); +insert into semijoin_unique_tbl select i%10, i%10 from generate_series(1,1000)i; +create index on semijoin_unique_tbl(a, b); +analyze semijoin_unique_tbl; + +-- Ensure that we get a plan with Unique + IndexScan +explain (verbose, costs off) +select * from semijoin_unique_tbl t1, semijoin_unique_tbl t2 +where (t1.a, t2.a) in (select a, b from semijoin_unique_tbl t3) +order by t1.a, t2.a; + +-- Ensure that we can unique-ify expressions more complex than plain Vars +explain (verbose, costs off) +select * from semijoin_unique_tbl t1, semijoin_unique_tbl t2 +where (t1.a, t2.a) in (select a+1, b+1 from semijoin_unique_tbl t3) +order by t1.a, t2.a; + +-- encourage use of parallel plans +set parallel_setup_cost=0; +set parallel_tuple_cost=0; +set min_parallel_table_scan_size=0; +set max_parallel_workers_per_gather=4; + +set enable_indexscan to off; + +-- Ensure that we get a parallel plan for the unique-ification +explain (verbose, costs off) +select * from semijoin_unique_tbl t1, semijoin_unique_tbl t2 +where (t1.a, t2.a) in (select a, b from semijoin_unique_tbl t3) +order by t1.a, t2.a; + +reset enable_indexscan; + +reset max_parallel_workers_per_gather; +reset min_parallel_table_scan_size; +reset parallel_tuple_cost; +reset parallel_setup_cost; + +drop table semijoin_unique_tbl; + +create table unique_tbl_p (a int, b int) partition by range(a); +create table unique_tbl_p1 partition of unique_tbl_p for values from (0) to (5); +create table unique_tbl_p2 partition of unique_tbl_p for values from (5) to (10); +create table unique_tbl_p3 partition of unique_tbl_p for values from (10) to (20); +insert into unique_tbl_p select i%12, i from generate_series(0, 1000)i; +create index on unique_tbl_p1(a); +create index on unique_tbl_p2(a); +create index on unique_tbl_p3(a); +analyze unique_tbl_p; + +set enable_partitionwise_join to on; + +-- Ensure that the unique-ification works for partition-wise join +explain (verbose, costs off) +select * from unique_tbl_p t1, unique_tbl_p t2 +where (t1.a, t2.a) in (select a, a from unique_tbl_p t3) +order by t1.a, t2.a; + +reset enable_partitionwise_join; + +drop table unique_tbl_p; + -- -- Test case for bug #4290: bogus calculation of subplan param sets -- diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index e6f2e93b2d6fa..e4a9ec65ab4da 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -3159,7 +3159,6 @@ UnicodeNormalizationForm UnicodeNormalizationQC Unique UniquePath -UniquePathMethod UniqueRelInfo UniqueState UnlistenStmt @@ -3175,7 +3174,6 @@ UpgradeTaskSlotState UpgradeTaskStep UploadManifestCmd UpperRelationKind -UpperUniquePath UserAuth UserContext UserMapping From bf9ee294e567654231c5b2fef09b8a5367907366 Mon Sep 17 00:00:00 2001 From: Richard Guo Date: Tue, 19 Aug 2025 09:37:04 +0900 Subject: [PATCH 529/602] Simplify relation_has_unique_index_for() Now that the only call to relation_has_unique_index_for() that supplied an exprlist and oprlist has been removed, the loop handling those lists is effectively dead code. This patch removes that loop and simplifies the function accordingly. Author: Richard Guo Discussion: https://postgr.es/m/CAMbWs4-EBnaRvEs7frTLbsXiweSTUXifsteF-d3rvv01FKO86w@mail.gmail.com --- src/backend/optimizer/path/indxpath.c | 85 ++++------------------- src/backend/optimizer/plan/analyzejoins.c | 5 +- src/include/optimizer/paths.h | 5 +- 3 files changed, 17 insertions(+), 78 deletions(-) diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index 601354ea3e056..4f5c98f0091d1 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -4142,47 +4142,26 @@ ec_member_matches_indexcol(PlannerInfo *root, RelOptInfo *rel, * a set of equality conditions, because the conditions constrain all * columns of some unique index. * - * The conditions can be represented in either or both of two ways: - * 1. A list of RestrictInfo nodes, where the caller has already determined - * that each condition is a mergejoinable equality with an expression in - * this relation on one side, and an expression not involving this relation - * on the other. The transient outer_is_left flag is used to identify which - * side we should look at: left side if outer_is_left is false, right side - * if it is true. - * 2. A list of expressions in this relation, and a corresponding list of - * equality operators. The caller must have already checked that the operators - * represent equality. (Note: the operators could be cross-type; the - * expressions should correspond to their RHS inputs.) + * The conditions are provided as a list of RestrictInfo nodes, where the + * caller has already determined that each condition is a mergejoinable + * equality with an expression in this relation on one side, and an + * expression not involving this relation on the other. The transient + * outer_is_left flag is used to identify which side we should look at: + * left side if outer_is_left is false, right side if it is true. * * The caller need only supply equality conditions arising from joins; * this routine automatically adds in any usable baserestrictinfo clauses. * (Note that the passed-in restrictlist will be destructively modified!) + * + * If extra_clauses isn't NULL, return baserestrictinfo clauses which were used + * to derive uniqueness. */ bool relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel, - List *restrictlist, - List *exprlist, List *oprlist) -{ - return relation_has_unique_index_ext(root, rel, restrictlist, - exprlist, oprlist, NULL); -} - -/* - * relation_has_unique_index_ext - * Same as relation_has_unique_index_for(), but supports extra_clauses - * parameter. If extra_clauses isn't NULL, return baserestrictinfo clauses - * which were used to derive uniqueness. - */ -bool -relation_has_unique_index_ext(PlannerInfo *root, RelOptInfo *rel, - List *restrictlist, - List *exprlist, List *oprlist, - List **extra_clauses) + List *restrictlist, List **extra_clauses) { ListCell *ic; - Assert(list_length(exprlist) == list_length(oprlist)); - /* Short-circuit if no indexes... */ if (rel->indexlist == NIL) return false; @@ -4225,7 +4204,7 @@ relation_has_unique_index_ext(PlannerInfo *root, RelOptInfo *rel, } /* Short-circuit the easy case */ - if (restrictlist == NIL && exprlist == NIL) + if (restrictlist == NIL) return false; /* Examine each index of the relation ... */ @@ -4247,14 +4226,12 @@ relation_has_unique_index_ext(PlannerInfo *root, RelOptInfo *rel, continue; /* - * Try to find each index column in the lists of conditions. This is + * Try to find each index column in the list of conditions. This is * O(N^2) or worse, but we expect all the lists to be short. */ for (c = 0; c < ind->nkeycolumns; c++) { - bool matched = false; ListCell *lc; - ListCell *lc2; foreach(lc, restrictlist) { @@ -4284,8 +4261,6 @@ relation_has_unique_index_ext(PlannerInfo *root, RelOptInfo *rel, if (match_index_to_operand(rexpr, c, ind)) { - matched = true; /* column is unique */ - if (bms_membership(rinfo->clause_relids) == BMS_SINGLETON) { MemoryContext oldMemCtx = @@ -4303,43 +4278,11 @@ relation_has_unique_index_ext(PlannerInfo *root, RelOptInfo *rel, MemoryContextSwitchTo(oldMemCtx); } - break; + break; /* found a match; column is unique */ } } - if (matched) - continue; - - forboth(lc, exprlist, lc2, oprlist) - { - Node *expr = (Node *) lfirst(lc); - Oid opr = lfirst_oid(lc2); - - /* See if the expression matches the index key */ - if (!match_index_to_operand(expr, c, ind)) - continue; - - /* - * The equality operator must be a member of the index - * opfamily, else it is not asserting the right kind of - * equality behavior for this index. We assume the caller - * determined it is an equality operator, so we don't need to - * check any more tightly than this. - */ - if (!op_in_opfamily(opr, ind->opfamily[c])) - continue; - - /* - * XXX at some point we may need to check collations here too. - * For the moment we assume all collations reduce to the same - * notion of equality. - */ - - matched = true; /* column is unique */ - break; - } - - if (!matched) + if (lc == NULL) break; /* no match; this index doesn't help us */ } diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c index 4d55c2ea59162..da92d8ee41442 100644 --- a/src/backend/optimizer/plan/analyzejoins.c +++ b/src/backend/optimizer/plan/analyzejoins.c @@ -990,11 +990,10 @@ rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list, { /* * Examine the indexes to see if we have a matching unique index. - * relation_has_unique_index_ext automatically adds any usable + * relation_has_unique_index_for automatically adds any usable * restriction clauses for the rel, so we needn't do that here. */ - if (relation_has_unique_index_ext(root, rel, clause_list, NIL, NIL, - extra_clauses)) + if (relation_has_unique_index_for(root, rel, clause_list, extra_clauses)) return true; } else if (rel->rtekind == RTE_SUBQUERY) diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 8410531f2d640..cbade77b717fb 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -71,10 +71,7 @@ extern void generate_partitionwise_join_paths(PlannerInfo *root, extern void create_index_paths(PlannerInfo *root, RelOptInfo *rel); extern bool relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel, List *restrictlist, - List *exprlist, List *oprlist); -extern bool relation_has_unique_index_ext(PlannerInfo *root, RelOptInfo *rel, - List *restrictlist, List *exprlist, - List *oprlist, List **extra_clauses); + List **extra_clauses); extern bool indexcol_is_bool_constant_for_query(PlannerInfo *root, IndexOptInfo *index, int indexcol); From 9b7eb6f02e8d4affb225dd0aa239c8e7e0ff2cba Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Tue, 19 Aug 2025 09:54:18 +0900 Subject: [PATCH 530/602] Remove useless pointer update in StatsShmemInit() This pointer was not used after its last update. This variable assignment was most likely a vestige artifact of the earlier versions of the patch set that have led to 5891c7a8ed8f. This pointer update is useless, so let's remove it. It removes one call to pgstat_dsa_init_size(), making the code slightly easier to grasp. Author: Bertrand Drouvot Discussion: https://postgr.es/m/aKLsu2sdpnyeuSSc@ip-10-97-1-34.eu-west-3.compute.internal --- src/backend/utils/activity/pgstat_shmem.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/backend/utils/activity/pgstat_shmem.c b/src/backend/utils/activity/pgstat_shmem.c index cca4277f2343a..62de347445365 100644 --- a/src/backend/utils/activity/pgstat_shmem.c +++ b/src/backend/utils/activity/pgstat_shmem.c @@ -180,7 +180,6 @@ StatsShmemInit(void) * provides a small efficiency win. */ ctl->raw_dsa_area = p; - p += MAXALIGN(pgstat_dsa_init_size()); dsa = dsa_create_in_place(ctl->raw_dsa_area, pgstat_dsa_init_size(), LWTRANCHE_PGSTATS_DSA, NULL); From a977e419ee6ee15cb7bd45d7c9b7540cf183d1e2 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Tue, 19 Aug 2025 14:04:09 +0900 Subject: [PATCH 531/602] Refactor ReadMultiXactCounts() into GetMultiXactInfo() This provides a single entry point to access some information about the state of MultiXacts, able to return some data about multixacts offsets and counts. Originally this function was only able to return some information about the number of multixacts and multixact members, extended here to provide some data about the oldest multixact ID in use and the oldest offset, if known. This change has been proposed in a patch that aims at providing more monitoring capabilities for multixacts, and it is useful on its own. GetMultiXactInfo() is added to multixact.h, becoming available for out-of-core code. Extracted from a larger patch by the same author. Author: Naga Appani Reviewed-by: Ashutosh Bapat Reviewed-by: Michael Paquier Discussion: https://postgr.es/m/CA+QeY+AAsYK6WvBW4qYzHz4bahHycDAY_q5ECmHkEV_eB9ckzg@mail.gmail.com --- src/backend/access/transam/multixact.c | 36 ++++++++++++++++++-------- src/include/access/multixact.h | 3 +++ 2 files changed, 28 insertions(+), 11 deletions(-) diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c index 6a7963ccd03f8..e89ab0c49b382 100644 --- a/src/backend/access/transam/multixact.c +++ b/src/backend/access/transam/multixact.c @@ -2852,31 +2852,43 @@ find_multixact_start(MultiXactId multi, MultiXactOffset *result) } /* - * Determine how many multixacts, and how many multixact members, currently - * exist. Return false if unable to determine. + * GetMultiXactInfo + * + * Returns information about the current MultiXact state, as of: + * multixacts: Number of MultiXacts (nextMultiXactId - oldestMultiXactId) + * members: Number of member entries (nextOffset - oldestOffset) + * oldestMultiXactId: Oldest MultiXact ID still in use + * oldestOffset: Oldest offset still in use + * + * Returns false if unable to determine, the oldest offset being unknown. */ -static bool -ReadMultiXactCounts(uint32 *multixacts, MultiXactOffset *members) +bool +GetMultiXactInfo(uint32 *multixacts, MultiXactOffset *members, + MultiXactId *oldestMultiXactId, MultiXactOffset *oldestOffset) { MultiXactOffset nextOffset; - MultiXactOffset oldestOffset; - MultiXactId oldestMultiXactId; MultiXactId nextMultiXactId; bool oldestOffsetKnown; LWLockAcquire(MultiXactGenLock, LW_SHARED); nextOffset = MultiXactState->nextOffset; - oldestMultiXactId = MultiXactState->oldestMultiXactId; + *oldestMultiXactId = MultiXactState->oldestMultiXactId; nextMultiXactId = MultiXactState->nextMXact; - oldestOffset = MultiXactState->oldestOffset; + *oldestOffset = MultiXactState->oldestOffset; oldestOffsetKnown = MultiXactState->oldestOffsetKnown; LWLockRelease(MultiXactGenLock); if (!oldestOffsetKnown) + { + *members = 0; + *multixacts = 0; + *oldestMultiXactId = InvalidMultiXactId; + *oldestOffset = 0; return false; + } - *members = nextOffset - oldestOffset; - *multixacts = nextMultiXactId - oldestMultiXactId; + *members = nextOffset - *oldestOffset; + *multixacts = nextMultiXactId - *oldestMultiXactId; return true; } @@ -2915,9 +2927,11 @@ MultiXactMemberFreezeThreshold(void) uint32 victim_multixacts; double fraction; int result; + MultiXactId oldestMultiXactId; + MultiXactOffset oldestOffset; /* If we can't determine member space utilization, assume the worst. */ - if (!ReadMultiXactCounts(&multixacts, &members)) + if (!GetMultiXactInfo(&multixacts, &members, &oldestMultiXactId, &oldestOffset)) return 0; /* If member space utilization is low, no special action is required. */ diff --git a/src/include/access/multixact.h b/src/include/access/multixact.h index 6607b645a1820..82e4bb90dd581 100644 --- a/src/include/access/multixact.h +++ b/src/include/access/multixact.h @@ -111,6 +111,9 @@ extern bool MultiXactIdIsRunning(MultiXactId multi, bool isLockOnly); extern void MultiXactIdSetOldestMember(void); extern int GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members, bool from_pgupgrade, bool isLockOnly); +extern bool GetMultiXactInfo(uint32 *multixacts, MultiXactOffset *members, + MultiXactId *oldestMultiXactId, + MultiXactOffset *oldestOffset); extern bool MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2); extern bool MultiXactIdPrecedesOrEquals(MultiXactId multi1, MultiXactId multi2); From aa21e49225a1b4f8465dee5a9410e52b5a889f90 Mon Sep 17 00:00:00 2001 From: Amit Kapila Date: Tue, 19 Aug 2025 05:33:17 +0000 Subject: [PATCH 532/602] Fix self-deadlock during DROP SUBSCRIPTION. The DROP SUBSCRIPTION command performs several operations: it stops the subscription workers, removes subscription-related entries from system catalogs, and deletes the replication slot on the publisher server. Previously, this command acquired an AccessExclusiveLock on pg_subscription before initiating these steps. However, while holding this lock, the command attempts to connect to the publisher to remove the replication slot. In cases where the connection is made to a newly created database on the same server as subscriber, the cache-building process during connection tries to acquire an AccessShareLock on pg_subscription, resulting in a self-deadlock. To resolve this issue, we reduce the lock level on pg_subscription during DROP SUBSCRIPTION from AccessExclusiveLock to RowExclusiveLock. Earlier, the higher lock level was used to prevent the launcher from starting a new worker during the drop operation, as a restarted worker could become orphaned. Now, instead of relying on a strict lock, we acquire an AccessShareLock on the specific subscription being dropped and re-validate its existence after acquiring the lock. If the subscription is no longer valid, the worker exits gracefully. This approach avoids the deadlock while still ensuring that orphan workers are not created. Reported-by: Alexander Lakhin Author: Dilip Kumar Reviewed-by: vignesh C Reviewed-by: Hayato Kuroda Reviewed-by: Amit Kapila Backpatch-through: 13 Discussion: https://postgr.es/m/18988-7312c868be2d467f@postgresql.org --- src/backend/commands/subscriptioncmds.c | 8 ++++--- src/backend/replication/logical/worker.c | 7 ++++++ src/test/subscription/t/100_bugs.pl | 30 ++++++++++++++++++++++++ 3 files changed, 42 insertions(+), 3 deletions(-) diff --git a/src/backend/commands/subscriptioncmds.c b/src/backend/commands/subscriptioncmds.c index faa3650d287de..4c01d21b2f30c 100644 --- a/src/backend/commands/subscriptioncmds.c +++ b/src/backend/commands/subscriptioncmds.c @@ -1803,10 +1803,12 @@ DropSubscription(DropSubscriptionStmt *stmt, bool isTopLevel) bool must_use_password; /* - * Lock pg_subscription with AccessExclusiveLock to ensure that the - * launcher doesn't restart new worker during dropping the subscription + * The launcher may concurrently start a new worker for this subscription. + * During initialization, the worker checks for subscription validity and + * exits if the subscription has already been dropped. See + * InitializeLogRepWorker. */ - rel = table_open(SubscriptionRelationId, AccessExclusiveLock); + rel = table_open(SubscriptionRelationId, RowExclusiveLock); tup = SearchSysCache2(SUBSCRIPTIONNAME, ObjectIdGetDatum(MyDatabaseId), CStringGetDatum(stmt->subname)); diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c index 8e34387345495..22ad9051db3fd 100644 --- a/src/backend/replication/logical/worker.c +++ b/src/backend/replication/logical/worker.c @@ -5415,6 +5415,13 @@ InitializeLogRepWorker(void) StartTransactionCommand(); oldctx = MemoryContextSwitchTo(ApplyContext); + /* + * Lock the subscription to prevent it from being concurrently dropped, + * then re-verify its existence. After the initialization, the worker will + * be terminated gracefully if the subscription is dropped. + */ + LockSharedObject(SubscriptionRelationId, MyLogicalRepWorker->subid, 0, + AccessShareLock); MySubscription = GetSubscription(MyLogicalRepWorker->subid, true); if (!MySubscription) { diff --git a/src/test/subscription/t/100_bugs.pl b/src/test/subscription/t/100_bugs.pl index 5e3577011833b..5022305491807 100644 --- a/src/test/subscription/t/100_bugs.pl +++ b/src/test/subscription/t/100_bugs.pl @@ -575,4 +575,34 @@ BEGIN $node_publisher->stop('fast'); $node_subscriber->stop('fast'); +# BUG #18988 +# The bug happened due to a self-deadlock between the DROP SUBSCRIPTION +# command and the walsender process for accessing pg_subscription. This +# occurred when DROP SUBSCRIPTION attempted to remove a replication slot by +# connecting to a newly created database whose caches are not yet +# initialized. +# +# The bug is fixed by reducing the lock-level during DROP SUBSCRIPTION. +$node_publisher->start(); + +$publisher_connstr = $node_publisher->connstr . ' dbname=regress_db'; +$node_publisher->safe_psql( + 'postgres', qq( + CREATE DATABASE regress_db; + CREATE SUBSCRIPTION regress_sub1 CONNECTION '$publisher_connstr' PUBLICATION regress_pub WITH (connect=false); +)); + +my ($ret, $stdout, $stderr) = + $node_publisher->psql('postgres', q{DROP SUBSCRIPTION regress_sub1}); + +isnt($ret, 0, "replication slot does not exist: exit code not 0"); +like( + $stderr, + qr/ERROR: could not drop replication slot "regress_sub1" on publisher/, + "could not drop replication slot: error message"); + +$node_publisher->safe_psql('postgres', "DROP DATABASE regress_db"); + +$node_publisher->stop('fast'); + done_testing(); From 16d434d53d56c5dd1919ca07274193ee2600ed1b Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Tue, 19 Aug 2025 08:03:53 +0200 Subject: [PATCH 533/602] Add src/include/catalog/README This just includes a link to the bki documentation, to help people get started. Before commit 372728b0d49, there was a README at src/backend/catalog/README, but then this was moved to the SGML documentation. So this effectively puts back a link to what was moved. But src/include/catalog/ is probably a better location, because that's where all the interesting files are. Co-authored-by: Florents Tselai Discussion: https://www.postgresql.org/message-id/flat/CA+v5N400GJFJ9RyXAX7hFKbtF7vVQGvWdFWEfcSQmvVhi9xfrA@mail.gmail.com --- src/include/catalog/README | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 src/include/catalog/README diff --git a/src/include/catalog/README b/src/include/catalog/README new file mode 100644 index 0000000000000..7e0e5d7e70c87 --- /dev/null +++ b/src/include/catalog/README @@ -0,0 +1,2 @@ +See about the +files in this directory. From 34a62c2c7fae00c08aa618c584ce4c52969dbf87 Mon Sep 17 00:00:00 2001 From: Fujii Masao Date: Tue, 19 Aug 2025 18:53:56 +0900 Subject: [PATCH 534/602] doc: Improve documentation discoverability for pgoutput. Previously, the documentation for pgoutput was located in the section on the logical streaming replication protocol, and there was no index entry for it. As a result, users had difficulty finding information about pgoutput. This commit moves the pgoutput documentation under the logical decoding section and adds an index entry, making it easier for users to locate and access this information. Author: Fujii Masao Reviewed-by: Chao Li Reviewed-by: Hayato Kuroda Reviewed-by: Euler Taveira Discussion: https://postgr.es/m/CAHGQGwFJTbygdhhjR_iP4Oem=Lo1xsptWWOq825uoW+hG_Lfnw@mail.gmail.com --- doc/src/sgml/logical-replication.sgml | 8 +- doc/src/sgml/logicaldecoding.sgml | 146 ++++++++++++++++++++++++++ doc/src/sgml/protocol.sgml | 131 ++--------------------- 3 files changed, 156 insertions(+), 129 deletions(-) diff --git a/doc/src/sgml/logical-replication.sgml b/doc/src/sgml/logical-replication.sgml index a0761cfee3f6d..0ac29928f17ec 100644 --- a/doc/src/sgml/logical-replication.sgml +++ b/doc/src/sgml/logical-replication.sgml @@ -542,8 +542,8 @@ it manually before the subscription can be activated. The steps to create the slot and activate the subscription are shown in the following examples. These examples specify the standard logical decoding output plugin - (pgoutput), which is what the built-in logical - replication uses. + (), + which is what the built-in logical replication uses. First, create a publication for the examples to use. @@ -2173,8 +2173,8 @@ CONTEXT: processing remote data for replication origin "pg_16395" during "INSER implemented by walsender and apply processes. The walsender process starts logical decoding (described in ) of the WAL and loads the standard - logical decoding output plugin (pgoutput). The plugin - transforms the changes read + logical decoding output plugin (). + The plugin transforms the changes read from WAL to the logical replication protocol (see ) and filters the data according to the publication specification. The data is then continuously diff --git a/doc/src/sgml/logicaldecoding.sgml b/doc/src/sgml/logicaldecoding.sgml index 77c720c422c50..1e3ccc84e65f7 100644 --- a/doc/src/sgml/logicaldecoding.sgml +++ b/doc/src/sgml/logicaldecoding.sgml @@ -574,6 +574,152 @@ DETAIL: Synchronization could lead to data loss, because the remote slot needs Logical Decoding Output Plugins + + + PostgreSQL provides two logical decoding + output plugins, and + . You can also develop custom output plugins + (see for details). + + + + pgoutput — Standard Logical Decoding Output Plugin + + + pgoutput + + + + pgoutput is the standard logical decoding output + plugin provided by PostgreSQL. + It's used for the built-in + logical replication. + + + + Options + + + + + proto_version + + + + Protocol version. Currently versions 1, 2, + 3, and 4 are supported. A valid + version is required. + + + Version 2 is supported only for server version 14 + and above, and it allows streaming of large in-progress transactions. + + + Version 3 is supported only for server version 15 + and above, and it allows streaming of two-phase commits. + + + Version 4 is supported only for server version 16 + and above, and it allows streams of large in-progress transactions to + be applied in parallel. + + + + + + + publication_names + + + + Comma-separated list of publication names for which to subscribe + (receive changes). The individual publication names are treated + as standard objects names and can be quoted the same as needed. + At least one publication name is required. + + + + + + + binary + + + + Boolean option to use binary transfer mode. Binary mode is faster + than the text mode but slightly less robust. + The default is off. + + + + + + + messages + + + + Boolean option to enable sending the messages that are written + by pg_logical_emit_message. + The default is off. + + + + + + + streaming + + + + Option to enable streaming of in-progress transactions. Valid values are + off (the default), on and + parallel. The setting parallel + enables sending extra information with some messages to be used for + parallelization. Minimum protocol version 2 is required to turn it + on. Minimum protocol version 4 is required for the + parallel value. + + + + + + + two_phase + + + + Boolean option to enable two-phase transactions. Minimum protocol + version 3 is required to turn it on. + The default is off. + + + + + + + origin + + + + Option to send changes by their origin. Possible values are + none to only send the changes that have no origin + associated, or any + to send the changes regardless of their origin. This can be used + to avoid loops (infinite replication of the same data) among + replication nodes. + The default is any. + + + + + + + + + + + Writing Logical Decoding Output Plugins An example output plugin can be found in the diff --git a/doc/src/sgml/protocol.sgml b/doc/src/sgml/protocol.sgml index cc5c8dc574ce5..e63647c093bad 100644 --- a/doc/src/sgml/protocol.sgml +++ b/doc/src/sgml/protocol.sgml @@ -2955,7 +2955,7 @@ psql "dbname=postgres replication=database" -c "IDENTIFY_SYSTEM;" The name of an option passed to the slot's logical decoding output - plugin. See for + plugin. See for options that are accepted by the standard (pgoutput) plugin. @@ -3523,134 +3523,15 @@ psql "dbname=postgres replication=database" -c "IDENTIFY_SYSTEM;" the physical streaming replication protocol. - - PostgreSQL logical decoding supports output - plugins. pgoutput is the standard one used for - the built-in logical replication. - - Logical Streaming Replication Parameters - Using the START_REPLICATION command, - pgoutput accepts the following options: - - - - - proto_version - - - - Protocol version. Currently versions 1, 2, - 3, and 4 are supported. A valid - version is required. - - - Version 2 is supported only for server version 14 - and above, and it allows streaming of large in-progress transactions. - - - Version 3 is supported only for server version 15 - and above, and it allows streaming of two-phase commits. - - - Version 4 is supported only for server version 16 - and above, and it allows streams of large in-progress transactions to - be applied in parallel. - - - - - - - publication_names - - - - Comma-separated list of publication names for which to subscribe - (receive changes). The individual publication names are treated - as standard objects names and can be quoted the same as needed. - At least one publication name is required. - - - - - - - binary - - - - Boolean option to use binary transfer mode. Binary mode is faster - than the text mode but slightly less robust. - The default is off. - - - - - - - messages - - - - Boolean option to enable sending the messages that are written - by pg_logical_emit_message. - The default is off. - - - - - - - streaming - - - - Option to enable streaming of in-progress transactions. Valid values are - off (the default), on and - parallel. The setting parallel - enables sending extra information with some messages to be used for - parallelization. Minimum protocol version 2 is required to turn it - on. Minimum protocol version 4 is required for the - parallel value. - - - - - - - two_phase - - - - Boolean option to enable two-phase transactions. Minimum protocol - version 3 is required to turn it on. - The default is off. - - - - - - - origin - - - - Option to send changes by their origin. Possible values are - none to only send the changes that have no origin - associated, or any - to send the changes regardless of their origin. This can be used - to avoid loops (infinite replication of the same data) among - replication nodes. - The default is any. - - - - - + The START_REPLICATION command can pass + options to the logical decoding output plugin associated + with the specified replication slot. + See for options + that are accepted by the standard (pgoutput) plugin. From 38c5fbd97ee6a8409c6c41d6af0c06169cd51c2b Mon Sep 17 00:00:00 2001 From: Fujii Masao Date: Tue, 19 Aug 2025 18:54:27 +0900 Subject: [PATCH 535/602] doc: Improve pgoutput documentation. This commit updates the pgoutput documentation with the following changes: - Specify the data type for each pgoutput option. - Clarify the relationship between proto_version and options such as streaming and two_phase. - Add a note on the use of pg_logical_slot_peek_changes and pg_logical_slot_get_changes with pgoutput. Author: Fujii Masao Reviewed-by: Chao Li Reviewed-by: Hayato Kuroda Discussion: https://postgr.es/m/CAHGQGwFJTbygdhhjR_iP4Oem=Lo1xsptWWOq825uoW+hG_Lfnw@mail.gmail.com --- doc/src/sgml/func/func-admin.sgml | 2 +- doc/src/sgml/logicaldecoding.sgml | 118 +++++++++++++++++------------- 2 files changed, 69 insertions(+), 51 deletions(-) diff --git a/doc/src/sgml/func/func-admin.sgml b/doc/src/sgml/func/func-admin.sgml index 446fdfe56f4f9..6347fe60b0c4e 100644 --- a/doc/src/sgml/func/func-admin.sgml +++ b/doc/src/sgml/func/func-admin.sgml @@ -1224,7 +1224,7 @@ postgres=# SELECT '0/0'::pg_lsn + pd.segment_number * ps.setting::int + :offset - + pg_logical_slot_peek_binary_changes diff --git a/doc/src/sgml/logicaldecoding.sgml b/doc/src/sgml/logicaldecoding.sgml index 1e3ccc84e65f7..a1f2efb24208f 100644 --- a/doc/src/sgml/logicaldecoding.sgml +++ b/doc/src/sgml/logicaldecoding.sgml @@ -600,109 +600,112 @@ DETAIL: Synchronization could lead to data loss, because the remote slot needs Options - - - proto_version - + + proto_version (integer) - Protocol version. Currently versions 1, 2, + Specifies the protocol version. + Currently versions 1, 2, 3, and 4 are supported. A valid version is required. - Version 2 is supported only for server version 14 - and above, and it allows streaming of large in-progress transactions. + Version 2 is supported on server version 14 + and above, and is required when streaming + is set to on to stream large in-progress + transactions. - Version 3 is supported only for server version 15 - and above, and it allows streaming of two-phase commits. + Version 3 is supported on server version 15 + and above, and is required when two_phase + is enabled to stream two-phase commits. - Version 4 is supported only for server version 16 - and above, and it allows streams of large in-progress transactions to - be applied in parallel. + Version 4 is supported on server version 16 + and above, and is required when streaming + is set to parallel to stream large in-progress + transactions to be applied in parallel. - - - publication_names - + + publication_names (string) - Comma-separated list of publication names for which to subscribe - (receive changes). The individual publication names are treated + A comma-separated list of publication names to subscribe to. + The individual publication names are treated as standard objects names and can be quoted the same as needed. At least one publication name is required. - - - binary - + + binary (boolean) - Boolean option to use binary transfer mode. Binary mode is faster + Enables binary transfer mode. Binary mode is faster than the text mode but slightly less robust. The default is off. - - - messages - + + messages (boolean) - Boolean option to enable sending the messages that are written - by pg_logical_emit_message. + Enables sending the messages that are written by + pg_logical_emit_message. The default is off. - - - streaming - + + streaming (enum) - Option to enable streaming of in-progress transactions. Valid values are + Enables streaming of in-progress transactions. Valid values are off (the default), on and - parallel. The setting parallel - enables sending extra information with some messages to be used for - parallelization. Minimum protocol version 2 is required to turn it - on. Minimum protocol version 4 is required for the - parallel value. + parallel. + + + When set to off, pgoutput + fully decodes a transaction before sending it as a whole. + This mode works with any protocol version. + + + When set to on, pgoutput + streams large in-progress transactions. + This requires protocol version 2 or higher. + + + When set to parallel, pgoutput + streams large in-progress transactions and also sends + extra information in some messages to support parallel processing. + This requires protocol version 4 or higher. - - - two_phase - + + two_phase (boolean) - Boolean option to enable two-phase transactions. Minimum protocol - version 3 is required to turn it on. + Enables sending two-phase transactions. + Minimum protocol version 3 is required to turn it on. The default is off. - - - origin - + + origin (enum) - Option to send changes by their origin. Possible values are + Specifies whether to send changes by their origin. Possible values are none to only send the changes that have no origin associated, or any to send the changes regardless of their origin. This can be used @@ -715,6 +718,21 @@ DETAIL: Synchronization could lead to data loss, because the remote slot needs + + + Notes + + + pgoutput produces binary output, + so functions expecting textual data ( + pg_logical_slot_peek_changes and + pg_logical_slot_get_changes) + cannot be used with it. Use + pg_logical_slot_peek_binary_changes or + pg_logical_slot_get_binary_changes + instead. + + From c6abf24ebfecf0ef4f7d21750e198959b8b61586 Mon Sep 17 00:00:00 2001 From: Nathan Bossart Date: Tue, 19 Aug 2025 10:43:15 -0500 Subject: [PATCH 536/602] Fix misspelling of "tranche" in dsa.h. Oversight in commit bb952c8c8b. Discussion: https://postgr.es/m/aKOWzsCPgrsoEG1Q%40nathan --- src/include/utils/dsa.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/include/utils/dsa.h b/src/include/utils/dsa.h index 0a6067be6288b..f2104dacbfcc5 100644 --- a/src/include/utils/dsa.h +++ b/src/include/utils/dsa.h @@ -114,13 +114,13 @@ typedef pg_atomic_uint64 dsa_pointer_atomic; dsa_allocate_extended(area, size, DSA_ALLOC_ZERO) /* Create dsa_area with default segment sizes */ -#define dsa_create(tranch_id) \ - dsa_create_ext(tranch_id, DSA_DEFAULT_INIT_SEGMENT_SIZE, \ +#define dsa_create(tranche_id) \ + dsa_create_ext(tranche_id, DSA_DEFAULT_INIT_SEGMENT_SIZE, \ DSA_MAX_SEGMENT_SIZE) /* Create dsa_area with default segment sizes in an existing share memory space */ -#define dsa_create_in_place(place, size, tranch_id, segment) \ - dsa_create_in_place_ext(place, size, tranch_id, segment, \ +#define dsa_create_in_place(place, size, tranche_id, segment) \ + dsa_create_in_place_ext(place, size, tranche_id, segment, \ DSA_DEFAULT_INIT_SEGMENT_SIZE, \ DSA_MAX_SEGMENT_SIZE) From eab9e4e27c0c433dbfe09914e69c875dda2af63f Mon Sep 17 00:00:00 2001 From: Masahiko Sawada Date: Tue, 19 Aug 2025 12:11:42 -0700 Subject: [PATCH 537/602] Add CHECK_FOR_INTERRUPTS in contrib/pg_buffercache functions. This commit adds CHECK_FOR_INTERRUPTS to loops iterating over shared buffers in several pg_buffercache functions, allowing them to be interrupted during long-running operations. Backpatch to all supported versions. Add CHECK_FOR_INTERRUPTS to the loop in pg_buffercache_pages() in all supported branches, and to pg_buffercache_summary() and pg_buffercache_usage_counts() in version 16 and newer. Author: SATYANARAYANA NARLAPURAM Discussion: https://postgr.es/m/CAHg+QDcejeLx7WunFT3DX6XKh1KshvGKa8F5au8xVhqVvvQPRw@mail.gmail.com Backpatch-through: 13 --- contrib/pg_buffercache/pg_buffercache_pages.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/contrib/pg_buffercache/pg_buffercache_pages.c b/contrib/pg_buffercache/pg_buffercache_pages.c index ae0291e6e96df..3df04c98959e1 100644 --- a/contrib/pg_buffercache/pg_buffercache_pages.c +++ b/contrib/pg_buffercache/pg_buffercache_pages.c @@ -194,6 +194,8 @@ pg_buffercache_pages(PG_FUNCTION_ARGS) BufferDesc *bufHdr; uint32 buf_state; + CHECK_FOR_INTERRUPTS(); + bufHdr = GetBufferDescriptor(i); /* Lock each buffer header before inspecting. */ buf_state = LockBufHdr(bufHdr); @@ -560,6 +562,8 @@ pg_buffercache_summary(PG_FUNCTION_ARGS) BufferDesc *bufHdr; uint32 buf_state; + CHECK_FOR_INTERRUPTS(); + /* * This function summarizes the state of all headers. Locking the * buffer headers wouldn't provide an improved result as the state of @@ -620,6 +624,8 @@ pg_buffercache_usage_counts(PG_FUNCTION_ARGS) uint32 buf_state = pg_atomic_read_u32(&bufHdr->state); int usage_count; + CHECK_FOR_INTERRUPTS(); + usage_count = BUF_STATE_GET_USAGECOUNT(buf_state); usage_counts[usage_count]++; From 3eec0e65331e4a38d0c8b3ac251ea3885452fd71 Mon Sep 17 00:00:00 2001 From: Nathan Bossart Date: Tue, 19 Aug 2025 16:48:22 -0500 Subject: [PATCH 538/602] Fix comment for MAX_SIMUL_LWLOCKS. This comment mentions that pg_buffercache locks all buffer partitions simultaneously, but it hasn't done so since v10. Oversight in commit 6e654546fb. Reviewed-by: Andres Freund Discussion: https://postgr.es/m/aKTuAHVEuYCUmmIy%40nathan --- src/backend/storage/lmgr/lwlock.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c index ec9c345ffdfb8..c80b43f1f55cf 100644 --- a/src/backend/storage/lmgr/lwlock.c +++ b/src/backend/storage/lmgr/lwlock.c @@ -162,8 +162,7 @@ LWLockPadded *MainLWLockArray = NULL; /* * We use this structure to keep track of locked LWLocks for release * during error recovery. Normally, only a few will be held at once, but - * occasionally the number can be much higher; for example, the pg_buffercache - * extension locks all buffer partitions simultaneously. + * occasionally the number can be much higher. */ #define MAX_SIMUL_LWLOCKS 200 From 6429e5b771dbb98aa56441677b79cdb58f6e1675 Mon Sep 17 00:00:00 2001 From: Fujii Masao Date: Wed, 20 Aug 2025 13:16:06 +0900 Subject: [PATCH 539/602] vacuumdb: Make vacuumdb --analyze-only process partitioned tables. vacuumdb should follow the behavior of the underlying VACUUM and ANALYZE commands. When --analyze-only is used, it ought to analyze regular tables, materialized views, and partitioned tables, just as ANALYZE (with no explicit target tables) does. Otherwise, it should only process regular tables and materialized views, since VACUUM skips partitioned tables when no targets are given. Previously, vacuumdb --analyze-only skipped partitioned tables. This was inconsistent, and also inconvenient after pg_upgrade, where --analyze-only is typically used to gather missing statistics. This commit fixes the behavior so that vacuumdb --analyze-only also processes partitioned tables. As a result, both vacuumdb --analyze-only and ANALYZE (with no explicit targets) now analyze regular tables, partitioned tables, and materialized views, but not foreign tables. Because this is a nontrivial behavior change, it is applied only to master. Reported-by: Zechman, Derek S Author: Laurenz Albe Co-authored-by: Mircea Cadariu Reviewed-by: Fujii Masao Discussion: https://postgr.es/m/CO1PR04MB8281387B9AD9DE30976966BBC045A%40CO1PR04MB8281.namprd04.prod.outlook.com --- doc/src/sgml/ref/vacuumdb.sgml | 9 +++++++++ src/bin/scripts/t/100_vacuumdb.pl | 11 +++++++++++ src/bin/scripts/vacuumdb.c | 24 ++++++++++++++++++++---- 3 files changed, 40 insertions(+), 4 deletions(-) diff --git a/doc/src/sgml/ref/vacuumdb.sgml b/doc/src/sgml/ref/vacuumdb.sgml index c7d9dca17b867..53147480515e6 100644 --- a/doc/src/sgml/ref/vacuumdb.sgml +++ b/doc/src/sgml/ref/vacuumdb.sgml @@ -397,6 +397,15 @@ PostgreSQL documentation Multiple tables can be vacuumed by writing multiple switches. + + If no tables are specified with the option, + vacuumdb will clean all regular tables + and materialized views in the connected database. + If or + is also specified, + it will analyze all regular tables, partitioned tables, + and materialized views (but not foreign tables). + If you specify columns, you probably have to escape the parentheses diff --git a/src/bin/scripts/t/100_vacuumdb.pl b/src/bin/scripts/t/100_vacuumdb.pl index ff56a13b46bbb..240f0fdd3e5cb 100644 --- a/src/bin/scripts/t/100_vacuumdb.pl +++ b/src/bin/scripts/t/100_vacuumdb.pl @@ -340,4 +340,15 @@ qr/statement:\ ANALYZE/sx, '--missing-stats-only with no missing partition stats'); +$node->safe_psql('postgres', + "CREATE TABLE parent_table (a INT) PARTITION BY LIST (a);\n" + . "CREATE TABLE child_table PARTITION OF parent_table FOR VALUES IN (1);\n" + . "INSERT INTO parent_table VALUES (1);\n"); +$node->issues_sql_like( + [ + 'vacuumdb', '--analyze-only', 'postgres' + ], + qr/statement: ANALYZE public.parent_table/s, + '--analyze-only updates statistics for partitioned tables'); + done_testing(); diff --git a/src/bin/scripts/vacuumdb.c b/src/bin/scripts/vacuumdb.c index 79b1096eb08c4..22093e50aa5ed 100644 --- a/src/bin/scripts/vacuumdb.c +++ b/src/bin/scripts/vacuumdb.c @@ -911,10 +911,26 @@ retrieve_objects(PGconn *conn, vacuumingOptions *vacopts, */ if ((objfilter & OBJFILTER_TABLE) == 0) { - appendPQExpBufferStr(&catalog_query, - " AND c.relkind OPERATOR(pg_catalog.=) ANY (array[" - CppAsString2(RELKIND_RELATION) ", " - CppAsString2(RELKIND_MATVIEW) "])\n"); + /* + * vacuumdb should generally follow the behavior of the underlying + * VACUUM and ANALYZE commands. If analyze_only is true, process + * regular tables, materialized views, and partitioned tables, just + * like ANALYZE (with no specific target tables) does. Otherwise, + * process only regular tables and materialized views, since VACUUM + * skips partitioned tables when no target tables are specified. + */ + if (vacopts->analyze_only) + appendPQExpBufferStr(&catalog_query, + " AND c.relkind OPERATOR(pg_catalog.=) ANY (array[" + CppAsString2(RELKIND_RELATION) ", " + CppAsString2(RELKIND_MATVIEW) ", " + CppAsString2(RELKIND_PARTITIONED_TABLE) "])\n"); + else + appendPQExpBufferStr(&catalog_query, + " AND c.relkind OPERATOR(pg_catalog.=) ANY (array[" + CppAsString2(RELKIND_RELATION) ", " + CppAsString2(RELKIND_MATVIEW) "])\n"); + } /* From 1f2e51e3c7c2042e4cb184b9a9030c1353de298e Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Wed, 20 Aug 2025 15:00:04 +0900 Subject: [PATCH 540/602] Fix assertion failure with replication slot release in single-user mode Some replication slot manipulations (logical decoding via SQL, advancing) were failing an assertion when releasing a slot in single-user mode, because active_pid was not set in a ReplicationSlot when its slot is acquired. ReplicationSlotAcquire() has some logic to be able to work with the single-user mode. This commit sets ReplicationSlot->active_pid to MyProcPid, to let the slot-related logic fall-through, considering the single process as the one holding the slot. Some TAP tests are added for various replication slot functions with the single-user mode, while on it, for slot creation, drop, advancing, copy and logical decoding with multiple slot types (temporary, physical vs logical). These tests are skipped on Windows, as direct calls of postgres --single would fail on permission failures. There is no platform-specific behavior that needs to be checked, so living with this restriction should be fine. The CI is OK with that, now let's see what the buildfarm tells. Author: Hayato Kuroda Reviewed-by: Paul A. Jungwirth Reviewed-by: Mutaamba Maasha Discussion: https://postgr.es/m/OSCPR01MB14966ED588A0328DAEBE8CB25F5FA2@OSCPR01MB14966.jpnprd01.prod.outlook.com Backpatch-through: 13 --- src/backend/replication/slot.c | 2 +- src/test/modules/test_misc/Makefile | 3 +- src/test/modules/test_misc/meson.build | 1 + .../test_misc/t/008_replslot_single_user.pl | 95 +++++++++++++++++++ 4 files changed, 99 insertions(+), 2 deletions(-) create mode 100644 src/test/modules/test_misc/t/008_replslot_single_user.pl diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c index 8605776ad8631..fd0fdb96d4246 100644 --- a/src/backend/replication/slot.c +++ b/src/backend/replication/slot.c @@ -653,7 +653,7 @@ ReplicationSlotAcquire(const char *name, bool nowait, bool error_if_invalid) } else { - active_pid = MyProcPid; + s->active_pid = active_pid = MyProcPid; ReplicationSlotSetInactiveSince(s, 0, true); } LWLockRelease(ReplicationSlotControlLock); diff --git a/src/test/modules/test_misc/Makefile b/src/test/modules/test_misc/Makefile index 919a25fc67fd3..399b9094a3880 100644 --- a/src/test/modules/test_misc/Makefile +++ b/src/test/modules/test_misc/Makefile @@ -2,7 +2,8 @@ TAP_TESTS = 1 -EXTRA_INSTALL=src/test/modules/injection_points +EXTRA_INSTALL=src/test/modules/injection_points \ + contrib/test_decoding export enable_injection_points diff --git a/src/test/modules/test_misc/meson.build b/src/test/modules/test_misc/meson.build index 9c50de7efb0f7..6b1e730bf46d0 100644 --- a/src/test/modules/test_misc/meson.build +++ b/src/test/modules/test_misc/meson.build @@ -16,6 +16,7 @@ tests += { 't/005_timeouts.pl', 't/006_signal_autovacuum.pl', 't/007_catcache_inval.pl', + 't/008_replslot_single_user.pl', ], }, } diff --git a/src/test/modules/test_misc/t/008_replslot_single_user.pl b/src/test/modules/test_misc/t/008_replslot_single_user.pl new file mode 100644 index 0000000000000..796700d621f88 --- /dev/null +++ b/src/test/modules/test_misc/t/008_replslot_single_user.pl @@ -0,0 +1,95 @@ +# Copyright (c) 2025, PostgreSQL Global Development Group + +# Test manipulations of replication slots with the single-user mode. + +use strict; +use warnings; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; + +# Skip the tests on Windows, as single-user mode would fail on permission +# failure with privileged accounts. +if ($windows_os) +{ + plan skip_all => 'this test is not supported by this platform'; +} + +# Run set of queries in single-user mode. +sub test_single_mode +{ + my ($node, $queries, $testname) = @_; + + my $result = run_log( + [ + 'postgres', '--single', '-F', + '-c' => 'exit_on_error=true', + '-D' => $node->data_dir, + 'postgres' + ], + '<' => \$queries); + + ok($result, $testname); +} + +my $slot_logical = 'slot_logical'; +my $slot_physical = 'slot_physical'; + +# Initialize a node +my $node = PostgreSQL::Test::Cluster->new('node'); +$node->init(allows_streaming => "logical"); +$node->start; + +# Define initial table +$node->safe_psql('postgres', "CREATE TABLE foo (id int)"); + +$node->stop; + +test_single_mode( + $node, + "SELECT pg_create_logical_replication_slot('$slot_logical', 'test_decoding')", + "logical slot creation"); +test_single_mode( + $node, + "SELECT pg_create_physical_replication_slot('$slot_physical', true)", + "physical slot creation"); +test_single_mode( + $node, + "SELECT pg_create_physical_replication_slot('slot_tmp', true, true)", + "temporary physical slot creation"); + +test_single_mode( + $node, qq( +INSERT INTO foo VALUES (1); +SELECT pg_logical_slot_get_changes('$slot_logical', NULL, NULL); +), + "logical decoding"); + +test_single_mode( + $node, + "SELECT pg_replication_slot_advance('$slot_logical', pg_current_wal_lsn())", + "logical slot advance"); +test_single_mode( + $node, + "SELECT pg_replication_slot_advance('$slot_physical', pg_current_wal_lsn())", + "physical slot advance"); + +test_single_mode( + $node, + "SELECT pg_copy_logical_replication_slot('$slot_logical', 'slot_log_copy')", + "logical slot copy"); +test_single_mode( + $node, + "SELECT pg_copy_physical_replication_slot('$slot_physical', 'slot_phy_copy')", + "physical slot copy"); + +test_single_mode( + $node, + "SELECT pg_drop_replication_slot('$slot_logical')", + "logical slot drop"); +test_single_mode( + $node, + "SELECT pg_drop_replication_slot('$slot_physical')", + "physical slot drop"); + +done_testing(); From e9c043a11ac402d376def531a12883d1dac15315 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Wed, 20 Aug 2025 18:12:08 +0200 Subject: [PATCH 541/602] Minor error message enhancement In refuseDupeIndexAttach(), change from errdetail("Another index is already attached for partition \"%s\"."...) to errdetail("Another index \"%s\" is already attached for partition \"%s\"."...) so we can easily understand which index is already attached for partition \"%s\". Author: Jian He Reviewed-by: Daniel Gustafsson Reviewed-by: Peter Eisentraut Discussion: https://www.postgresql.org/message-id/flat/CACJufxGBfykJ_1ztk9T%2BL_gLmkOSOF%2BmL9Mn4ZPydz-rh%3DLccQ%40mail.gmail.com --- src/backend/commands/tablecmds.c | 3 ++- src/test/regress/expected/indexing.out | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index c6dd2e020da23..082a3575d621e 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -21750,7 +21750,8 @@ refuseDupeIndexAttach(Relation parentIdx, Relation partIdx, Relation partitionTb errmsg("cannot attach index \"%s\" as a partition of index \"%s\"", RelationGetRelationName(partIdx), RelationGetRelationName(parentIdx)), - errdetail("Another index is already attached for partition \"%s\".", + errdetail("Another index \"%s\" is already attached for partition \"%s\".", + get_rel_name(existingIdx), RelationGetRelationName(partitionTbl)))); } diff --git a/src/test/regress/expected/indexing.out b/src/test/regress/expected/indexing.out index bcf1db11d731d..4d29fb85293e0 100644 --- a/src/test/regress/expected/indexing.out +++ b/src/test/regress/expected/indexing.out @@ -248,7 +248,7 @@ alter index idxpart_a_b_idx attach partition idxpart1_a_b_idx; -- quiet create index idxpart1_2_a_b on idxpart1 (a, b); alter index idxpart_a_b_idx attach partition idxpart1_2_a_b; ERROR: cannot attach index "idxpart1_2_a_b" as a partition of index "idxpart_a_b_idx" -DETAIL: Another index is already attached for partition "idxpart1". +DETAIL: Another index "idxpart1_a_b_idx" is already attached for partition "idxpart1". drop table idxpart; -- make sure everything's gone select indexrelid::regclass, indrelid::regclass From a67d4847a4319ddee8a8ee7d8945c07301ada66e Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Wed, 20 Aug 2025 16:09:18 -0400 Subject: [PATCH 542/602] Fix re-execution of a failed SQLFunctionCache entry. If we error out during execution of a SQL-language function, we will often leave behind non-null pointers in its SQLFunctionCache's cplan and eslist fields. This is problematic if the SQLFunctionCache is re-used, because those pointers will point at resources that were released during error cleanup. This problem escaped detection so far because ordinarily we won't re-use an FmgrInfo+SQLFunctionCache struct after a query error. However, in the rather improbable case that someone implements an opclass support function in SQL language, there will be long-lived FmgrInfos for it in the relcache, and then the problem is reachable after the function throws an error. To fix, add a flag to SQLFunctionCache that tracks whether execution escapes out of fmgr_sql, and clear out the relevant fields during init_sql_fcache if so. (This is going to need more thought if we ever try to share FMgrInfos across threads; but it's very far from being the only problem such a project will encounter, since many functions regard fn_extra as being query-local state.) This broke at commit 0313c5dc6; before that we did not try to re-use SQLFunctionCache state across calls. Hence, back-patch to v18. Bug: #19026 Reported-by: Alexander Lakhin Author: Tom Lane Discussion: https://postgr.es/m/19026-90aed5e71d0c8af3@postgresql.org Backpatch-through: 18 --- src/backend/executor/functions.c | 29 +++++++++++++++++++ .../regress/expected/create_function_sql.out | 21 +++++++++++++- src/test/regress/sql/create_function_sql.sql | 17 +++++++++++ 3 files changed, 66 insertions(+), 1 deletion(-) diff --git a/src/backend/executor/functions.c b/src/backend/executor/functions.c index 359aafea681b9..97455b1ed4a5b 100644 --- a/src/backend/executor/functions.c +++ b/src/backend/executor/functions.c @@ -143,6 +143,7 @@ typedef struct SQLFunctionCache { SQLFunctionHashEntry *func; /* associated SQLFunctionHashEntry */ + bool active; /* are we executing this cache entry? */ bool lazyEvalOK; /* true if lazyEval is safe */ bool shutdown_reg; /* true if registered shutdown callback */ bool lazyEval; /* true if using lazyEval for result query */ @@ -556,6 +557,28 @@ init_sql_fcache(FunctionCallInfo fcinfo, bool lazyEvalOK) finfo->fn_extra = fcache; } + /* + * If the SQLFunctionCache is marked as active, we must have errored out + * of a prior execution. Reset state. (It might seem that we could also + * reach this during recursive invocation of a SQL function, but we won't + * because that case won't involve re-use of the same FmgrInfo.) + */ + if (fcache->active) + { + /* + * In general, this stanza should clear all the same fields that + * ShutdownSQLFunction would. Note we must clear fcache->cplan + * without doing ReleaseCachedPlan, because error cleanup from the + * prior execution would have taken care of releasing that plan. + * Likewise, if tstore is still set then it is pointing at garbage. + */ + fcache->cplan = NULL; + fcache->eslist = NULL; + fcache->tstore = NULL; + fcache->shutdown_reg = false; + fcache->active = false; + } + /* * If we are resuming execution of a set-returning function, just keep * using the same cache. We do not ask funccache.c to re-validate the @@ -1597,6 +1620,9 @@ fmgr_sql(PG_FUNCTION_ARGS) */ fcache = init_sql_fcache(fcinfo, lazyEvalOK); + /* Mark fcache as active */ + fcache->active = true; + /* Remember info that we might need later to construct tuplestore */ fcache->tscontext = tscontext; fcache->randomAccess = randomAccess; @@ -1853,6 +1879,9 @@ fmgr_sql(PG_FUNCTION_ARGS) if (es == NULL) fcache->eslist = NULL; + /* Mark fcache as inactive */ + fcache->active = false; + error_context_stack = sqlerrcontext.previous; return result; diff --git a/src/test/regress/expected/create_function_sql.out b/src/test/regress/expected/create_function_sql.out index 963b6f863ff95..da112608d6619 100644 --- a/src/test/regress/expected/create_function_sql.out +++ b/src/test/regress/expected/create_function_sql.out @@ -733,6 +733,22 @@ SELECT double_append(array_append(ARRAY[q1], q2), q3) {4,5,6,4,5,6} (2 rows) +-- Check that we can re-use a SQLFunctionCache after a run-time error. +-- This function will fail with zero-divide at run time (not plan time). +CREATE FUNCTION part_hashint4_error(value int4, seed int8) RETURNS int8 +LANGUAGE SQL STRICT IMMUTABLE PARALLEL SAFE AS +$$ SELECT value + seed + random()::int/0 $$; +-- Put it into an operator class so that FmgrInfo will be cached in relcache. +CREATE OPERATOR CLASS part_test_int4_ops_bad FOR TYPE int4 USING hash AS + FUNCTION 2 part_hashint4_error(int4, int8); +CREATE TABLE pt(i int) PARTITION BY hash (i part_test_int4_ops_bad); +CREATE TABLE p1 PARTITION OF pt FOR VALUES WITH (modulus 4, remainder 0); +INSERT INTO pt VALUES (1); +ERROR: division by zero +CONTEXT: SQL function "part_hashint4_error" statement 1 +INSERT INTO pt VALUES (1); +ERROR: division by zero +CONTEXT: SQL function "part_hashint4_error" statement 1 -- Things that shouldn't work: CREATE FUNCTION test1 (int) RETURNS int LANGUAGE SQL AS 'SELECT ''not an integer'';'; @@ -773,7 +789,7 @@ CONTEXT: SQL function "test1" during startup RESET check_function_bodies; -- Cleanup DROP SCHEMA temp_func_test CASCADE; -NOTICE: drop cascades to 35 other objects +NOTICE: drop cascades to 38 other objects DETAIL: drop cascades to function functest_a_1(text,date) drop cascades to function functest_a_2(text[]) drop cascades to function functest_a_3() @@ -808,6 +824,9 @@ drop cascades to function create_and_insert() drop cascades to table ddl_test drop cascades to function alter_and_insert() drop cascades to function double_append(anyarray,anyelement) +drop cascades to function part_hashint4_error(integer,bigint) +drop cascades to operator family part_test_int4_ops_bad for access method hash +drop cascades to table pt drop cascades to function test1(anyelement) DROP USER regress_unpriv_user; RESET search_path; diff --git a/src/test/regress/sql/create_function_sql.sql b/src/test/regress/sql/create_function_sql.sql index 6d1c102d78082..3d5f2a92093bf 100644 --- a/src/test/regress/sql/create_function_sql.sql +++ b/src/test/regress/sql/create_function_sql.sql @@ -432,6 +432,23 @@ $$ SELECT array_append($1, $2) || array_append($1, $2) $$; SELECT double_append(array_append(ARRAY[q1], q2), q3) FROM (VALUES(1,2,3), (4,5,6)) v(q1,q2,q3); +-- Check that we can re-use a SQLFunctionCache after a run-time error. + +-- This function will fail with zero-divide at run time (not plan time). +CREATE FUNCTION part_hashint4_error(value int4, seed int8) RETURNS int8 +LANGUAGE SQL STRICT IMMUTABLE PARALLEL SAFE AS +$$ SELECT value + seed + random()::int/0 $$; + +-- Put it into an operator class so that FmgrInfo will be cached in relcache. +CREATE OPERATOR CLASS part_test_int4_ops_bad FOR TYPE int4 USING hash AS + FUNCTION 2 part_hashint4_error(int4, int8); + +CREATE TABLE pt(i int) PARTITION BY hash (i part_test_int4_ops_bad); +CREATE TABLE p1 PARTITION OF pt FOR VALUES WITH (modulus 4, remainder 0); + +INSERT INTO pt VALUES (1); +INSERT INTO pt VALUES (1); + -- Things that shouldn't work: CREATE FUNCTION test1 (int) RETURNS int LANGUAGE SQL From b5d87a823f2f828f65005b31f0a547549f164e5f Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Thu, 21 Aug 2025 13:25:03 +0900 Subject: [PATCH 543/602] doc: Improve description of wal_compression The description of this GUC provides a list of the situations where full-page writes are generated. However, it is not completely exact, mentioning only the cases where full_page_writes=on or base backups. It is possible to generate full-page writes in more situations than these two, making the description confusing as it implies that no other cases exist. The description is slightly reworded to take into account that other cases are possible, without mentioning them directly to minimize the maintenance burden should FPWs be generated in more contexts in the future. Author: Jingtang Zhang Reviewed-by: Andrey Borodin Reviewed-by: Xuneng Zhou Discussion: https://postgr.es/m/CAPsk3_CtAYa_fy4p6=x7qtoutrdKvg1kGk46D5fsE=sMt2546g@mail.gmail.com Backpatch-through: 13 --- doc/src/sgml/config.sgml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index 20ccb2d6b5447..0a4b3e55ba5ed 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -3399,8 +3399,9 @@ include_dir 'conf.d' This parameter enables compression of WAL using the specified compression method. When enabled, the PostgreSQL - server compresses full page images written to WAL when - is on or during a base backup. + server compresses full page images written to WAL (e.g. when + is on, during a base backup, + etc.). A compressed page image will be decompressed during WAL replay. The supported methods are pglz, lz4 (if PostgreSQL From e8eb98754b112cf00d328becbe33958229cb20bf Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Thu, 21 Aug 2025 14:17:26 +0900 Subject: [PATCH 544/602] Apply some fat commas to commands of TAP tests This is similar to 19c6e92b13b2, in order to keep the style used in the scripts consistent for the option names and values used in commands. The places updated in this commit have been added recently in 71ea0d679543. These changes are cosmetic; there is no need for a backpatch. --- src/bin/pg_combinebackup/t/002_compare_backups.pl | 4 ++-- src/bin/pg_upgrade/t/002_pg_upgrade.pl | 6 +++--- src/test/recovery/t/027_stream_regress.pl | 8 ++++---- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/bin/pg_combinebackup/t/002_compare_backups.pl b/src/bin/pg_combinebackup/t/002_compare_backups.pl index 9c7fe56cb7c72..a3e29c055091e 100644 --- a/src/bin/pg_combinebackup/t/002_compare_backups.pl +++ b/src/bin/pg_combinebackup/t/002_compare_backups.pl @@ -174,7 +174,7 @@ $pitr1->command_ok( [ 'pg_dumpall', - '--restrict-key=test', + '--restrict-key' => 'test', '--no-sync', '--no-unlogged-table-data', '--file' => $dump1, @@ -184,7 +184,7 @@ $pitr2->command_ok( [ 'pg_dumpall', - '--restrict-key=test', + '--restrict-key' => 'test', '--no-sync', '--no-unlogged-table-data', '--file' => $dump2, diff --git a/src/bin/pg_upgrade/t/002_pg_upgrade.pl b/src/bin/pg_upgrade/t/002_pg_upgrade.pl index 4b5e895809b40..823f41e754ced 100644 --- a/src/bin/pg_upgrade/t/002_pg_upgrade.pl +++ b/src/bin/pg_upgrade/t/002_pg_upgrade.pl @@ -86,7 +86,7 @@ sub get_dump_for_comparison $node->run_log( [ 'pg_dump', '--no-sync', - '--restrict-key=test', + '--restrict-key' => 'test', '-d' => $node->connstr($db), '-f' => $dumpfile ]); @@ -428,7 +428,7 @@ sub get_dump_for_comparison # that we need to use pg_dumpall from the new node here. my @dump_command = ( 'pg_dumpall', '--no-sync', - '--restrict-key=test', + '--restrict-key' => 'test', '--dbname' => $oldnode->connstr('postgres'), '--file' => $dump1_file); # --extra-float-digits is needed when upgrading from a version older than 11. @@ -626,7 +626,7 @@ sub get_dump_for_comparison # Second dump from the upgraded instance. @dump_command = ( 'pg_dumpall', '--no-sync', - '--restrict-key=test', + '--restrict-key' => 'test', '--dbname' => $newnode->connstr('postgres'), '--file' => $dump2_file); # --extra-float-digits is needed when upgrading from a version older than 11. diff --git a/src/test/recovery/t/027_stream_regress.pl b/src/test/recovery/t/027_stream_regress.pl index 12d8852fb4b3d..589c79d97d3a9 100644 --- a/src/test/recovery/t/027_stream_regress.pl +++ b/src/test/recovery/t/027_stream_regress.pl @@ -117,7 +117,7 @@ 'pg_dumpall', '--file' => $outputdir . '/primary.dump', '--no-sync', '--no-statistics', - '--restrict-key=test', + '--restrict-key' => 'test', '--port' => $node_primary->port, '--no-unlogged-table-data', # if unlogged, standby has schema only ], @@ -127,7 +127,7 @@ 'pg_dumpall', '--file' => $outputdir . '/standby.dump', '--no-sync', '--no-statistics', - '--restrict-key=test', + '--restrict-key' => 'test', '--port' => $node_standby_1->port, ], 'dump standby server'); @@ -147,7 +147,7 @@ '--schema' => 'pg_catalog', '--file' => $outputdir . '/catalogs_primary.dump', '--no-sync', - '--restrict-key=test', + '--restrict-key' => 'test', '--port', $node_primary->port, '--no-unlogged-table-data', 'regression', @@ -159,7 +159,7 @@ '--schema' => 'pg_catalog', '--file' => $outputdir . '/catalogs_standby.dump', '--no-sync', - '--restrict-key=test', + '--restrict-key' => 'test', '--port' => $node_standby_1->port, 'regression', ], From 6e09c960ebb353c6cbcc05191c68aea4079df277 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Thu, 21 Aug 2025 09:15:49 +0200 Subject: [PATCH 545/602] PL/Python: Refactor for event trigger support Change is_trigger type from boolean to enum. That's a preparation for adding event trigger support. Author: Euler Taveira Co-authored-by: Dimitri Fontaine Reviewed-by: Pavel Stehule Discussion: https://www.postgresql.org/message-id/flat/03f03515-2068-4f5b-b357-8fb540883c38%40app.fastmail.com --- src/pl/plpython/plpy_exec.c | 2 +- src/pl/plpython/plpy_main.c | 24 ++++++++++++++++++------ src/pl/plpython/plpy_procedure.c | 15 ++++++++++----- src/pl/plpython/plpy_procedure.h | 13 +++++++++++-- src/tools/pgindent/typedefs.list | 1 + 5 files changed, 41 insertions(+), 14 deletions(-) diff --git a/src/pl/plpython/plpy_exec.c b/src/pl/plpython/plpy_exec.c index 28fbd443b98c9..22835174b6987 100644 --- a/src/pl/plpython/plpy_exec.c +++ b/src/pl/plpython/plpy_exec.c @@ -509,7 +509,7 @@ PLy_function_save_args(PLyProcedure *proc) Py_XINCREF(result->args); /* If it's a trigger, also save "TD" */ - if (proc->is_trigger) + if (proc->is_trigger == PLPY_TRIGGER) { result->td = PyDict_GetItemString(proc->globals, "TD"); Py_XINCREF(result->td); diff --git a/src/pl/plpython/plpy_main.c b/src/pl/plpython/plpy_main.c index f36eadbadc66d..900c65e8da066 100644 --- a/src/pl/plpython/plpy_main.c +++ b/src/pl/plpython/plpy_main.c @@ -38,7 +38,7 @@ PG_FUNCTION_INFO_V1(plpython3_call_handler); PG_FUNCTION_INFO_V1(plpython3_inline_handler); -static bool PLy_procedure_is_trigger(Form_pg_proc procStruct); +static PLyTrigType PLy_procedure_is_trigger(Form_pg_proc procStruct); static void plpython_error_callback(void *arg); static void plpython_inline_error_callback(void *arg); static void PLy_init_interp(void); @@ -163,7 +163,7 @@ plpython3_validator(PG_FUNCTION_ARGS) Oid funcoid = PG_GETARG_OID(0); HeapTuple tuple; Form_pg_proc procStruct; - bool is_trigger; + PLyTrigType is_trigger; if (!CheckFunctionValidatorAccess(fcinfo->flinfo->fn_oid, funcoid)) PG_RETURN_VOID(); @@ -235,14 +235,14 @@ plpython3_call_handler(PG_FUNCTION_ARGS) Relation tgrel = ((TriggerData *) fcinfo->context)->tg_relation; HeapTuple trv; - proc = PLy_procedure_get(funcoid, RelationGetRelid(tgrel), true); + proc = PLy_procedure_get(funcoid, RelationGetRelid(tgrel), PLPY_TRIGGER); exec_ctx->curr_proc = proc; trv = PLy_exec_trigger(fcinfo, proc); retval = PointerGetDatum(trv); } else { - proc = PLy_procedure_get(funcoid, InvalidOid, false); + proc = PLy_procedure_get(funcoid, InvalidOid, PLPY_NOT_TRIGGER); exec_ctx->curr_proc = proc; retval = PLy_exec_function(fcinfo, proc); } @@ -336,10 +336,22 @@ plpython3_inline_handler(PG_FUNCTION_ARGS) PG_RETURN_VOID(); } -static bool +static PLyTrigType PLy_procedure_is_trigger(Form_pg_proc procStruct) { - return (procStruct->prorettype == TRIGGEROID); + PLyTrigType ret; + + switch (procStruct->prorettype) + { + case TRIGGEROID: + ret = PLPY_TRIGGER; + break; + default: + ret = PLPY_NOT_TRIGGER; + break; + } + + return ret; } static void diff --git a/src/pl/plpython/plpy_procedure.c b/src/pl/plpython/plpy_procedure.c index c176d24e80118..22d9ef0fe0677 100644 --- a/src/pl/plpython/plpy_procedure.c +++ b/src/pl/plpython/plpy_procedure.c @@ -21,7 +21,7 @@ static HTAB *PLy_procedure_cache = NULL; -static PLyProcedure *PLy_procedure_create(HeapTuple procTup, Oid fn_oid, bool is_trigger); +static PLyProcedure *PLy_procedure_create(HeapTuple procTup, Oid fn_oid, PLyTrigType is_trigger); static bool PLy_procedure_valid(PLyProcedure *proc, HeapTuple procTup); static char *PLy_procedure_munge_source(const char *name, const char *src); @@ -63,15 +63,20 @@ PLy_procedure_name(PLyProcedure *proc) * be used with, so no sensible fn_rel can be passed. */ PLyProcedure * -PLy_procedure_get(Oid fn_oid, Oid fn_rel, bool is_trigger) +PLy_procedure_get(Oid fn_oid, Oid fn_rel, PLyTrigType is_trigger) { - bool use_cache = !(is_trigger && fn_rel == InvalidOid); + bool use_cache; HeapTuple procTup; PLyProcedureKey key; PLyProcedureEntry *volatile entry = NULL; PLyProcedure *volatile proc = NULL; bool found = false; + if (is_trigger == PLPY_TRIGGER && fn_rel == InvalidOid) + use_cache = false; + else + use_cache = true; + procTup = SearchSysCache1(PROCOID, ObjectIdGetDatum(fn_oid)); if (!HeapTupleIsValid(procTup)) elog(ERROR, "cache lookup failed for function %u", fn_oid); @@ -127,7 +132,7 @@ PLy_procedure_get(Oid fn_oid, Oid fn_rel, bool is_trigger) * Create a new PLyProcedure structure */ static PLyProcedure * -PLy_procedure_create(HeapTuple procTup, Oid fn_oid, bool is_trigger) +PLy_procedure_create(HeapTuple procTup, Oid fn_oid, PLyTrigType is_trigger) { char procName[NAMEDATALEN + 256]; Form_pg_proc procStruct; @@ -200,7 +205,7 @@ PLy_procedure_create(HeapTuple procTup, Oid fn_oid, bool is_trigger) * get information required for output conversion of the return value, * but only if this isn't a trigger. */ - if (!is_trigger) + if (is_trigger == PLPY_NOT_TRIGGER) { Oid rettype = procStruct->prorettype; HeapTuple rvTypeTup; diff --git a/src/pl/plpython/plpy_procedure.h b/src/pl/plpython/plpy_procedure.h index 5db854fc8bd2d..601b91d5d94e9 100644 --- a/src/pl/plpython/plpy_procedure.h +++ b/src/pl/plpython/plpy_procedure.h @@ -11,6 +11,15 @@ extern void init_procedure_caches(void); +/* + * Trigger type + */ +typedef enum PLyTrigType +{ + PLPY_TRIGGER, + PLPY_NOT_TRIGGER, +} PLyTrigType; + /* saved arguments for outer recursion level or set-returning function */ typedef struct PLySavedArgs { @@ -33,7 +42,7 @@ typedef struct PLyProcedure bool fn_readonly; bool is_setof; /* true, if function returns result set */ bool is_procedure; - bool is_trigger; /* called as trigger? */ + PLyTrigType is_trigger; /* called as trigger? */ PLyObToDatum result; /* Function result output conversion info */ PLyDatumToOb result_in; /* For converting input tuples in a trigger */ char *src; /* textual procedure code, after mangling */ @@ -65,7 +74,7 @@ typedef struct PLyProcedureEntry /* PLyProcedure manipulation */ extern char *PLy_procedure_name(PLyProcedure *proc); -extern PLyProcedure *PLy_procedure_get(Oid fn_oid, Oid fn_rel, bool is_trigger); +extern PLyProcedure *PLy_procedure_get(Oid fn_oid, Oid fn_rel, PLyTrigType is_trigger); extern void PLy_procedure_compile(PLyProcedure *proc, const char *src); extern void PLy_procedure_delete(PLyProcedure *proc); diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index e4a9ec65ab4da..a13e816289023 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -1990,6 +1990,7 @@ PLyScalarToOb PLySubtransactionData PLySubtransactionObject PLyTransformToOb +PLyTrigType PLyTupleToOb PLyUnicode_FromStringAndSize_t PLy_elog_impl_t From 53eff471c69dc8b0c01f046d3fdcc460eb90d0e5 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Thu, 21 Aug 2025 09:15:55 +0200 Subject: [PATCH 546/602] PL/Python: Add event trigger support Allow event triggers to be written in PL/Python. It provides a TD dictionary with some information about the event trigger. Author: Euler Taveira Co-authored-by: Dimitri Fontaine Reviewed-by: Pavel Stehule Discussion: https://www.postgresql.org/message-id/flat/03f03515-2068-4f5b-b357-8fb540883c38%40app.fastmail.com --- doc/src/sgml/plpython.sgml | 76 +++++++++++++++++++ src/pl/plpython/expected/plpython_trigger.out | 27 +++++++ src/pl/plpython/plpy_exec.c | 42 ++++++++++ src/pl/plpython/plpy_exec.h | 1 + src/pl/plpython/plpy_main.c | 11 +++ src/pl/plpython/plpy_procedure.h | 1 + src/pl/plpython/sql/plpython_trigger.sql | 24 ++++++ 7 files changed, 182 insertions(+) diff --git a/doc/src/sgml/plpython.sgml b/doc/src/sgml/plpython.sgml index cb065bf5f88db..27c4467ba7da3 100644 --- a/doc/src/sgml/plpython.sgml +++ b/doc/src/sgml/plpython.sgml @@ -662,6 +662,14 @@ $$ LANGUAGE plpython3u; in PL/Python + + PL/Python can be used to define trigger + functions. + PostgreSQL requires that a function that is to + be called as a trigger must be declared as a function with no arguments and + a return type of trigger. + + When a function is used as a trigger, the dictionary TD contains trigger-related values: @@ -769,6 +777,74 @@ $$ LANGUAGE plpython3u; + + Event Trigger Functions + + + event trigger + in PL/Python + + + + PL/Python can be used to define event triggers + (see also ). + PostgreSQL requires that a function that is to + be called as an event trigger must be declared as a function with no + arguments and a return type of event_trigger. + + + + When a function is used as an event trigger, the dictionary + TD contains trigger-related values: + + + + TD["event"] + + + The event the trigger was fired for, as a string, for example + ddl_command_start. + + + + + + TD["tag"] + + + The command tag for which the trigger was fired, as a string, for + example DROP TABLE. + + + + + + + + shows an example of an + event trigger function in PL/Python. + + + + A <application>PL/Python</application> Event Trigger Function + + + This example trigger simply raises a NOTICE message + each time a supported command is executed. + + + +CREATE OR REPLACE FUNCTION pysnitch() RETURNS event_trigger +LANGUAGE plpython3u +AS $$ + plpy.notice("TD[event] => " + TD["event"] + " ; TD[tag] => " + TD["tag"]); +$$; + +CREATE EVENT TRIGGER pysnitch ON ddl_command_start EXECUTE FUNCTION pysnitch(); + + + + Database Access diff --git a/src/pl/plpython/expected/plpython_trigger.out b/src/pl/plpython/expected/plpython_trigger.out index 64eab2fa3f4b5..bd35b220c5eda 100644 --- a/src/pl/plpython/expected/plpython_trigger.out +++ b/src/pl/plpython/expected/plpython_trigger.out @@ -646,3 +646,30 @@ SELECT * FROM recursive_trigger_test; 1 | 2 (2 rows) +-- event triggers +CREATE OR REPLACE FUNCTION pysnitch() RETURNS event_trigger +LANGUAGE plpython3u +AS $$ + plpy.notice("TD[event] => " + TD["event"] + " ; TD[tag] => " + TD["tag"]); +$$; +CREATE EVENT TRIGGER python_a_snitch ON ddl_command_start + EXECUTE PROCEDURE pysnitch(); +CREATE EVENT TRIGGER python_b_snitch ON ddl_command_end + EXECUTE PROCEDURE pysnitch(); +CREATE OR REPLACE FUNCTION foobar() RETURNS int LANGUAGE sql AS $$SELECT 1;$$; +NOTICE: TD[event] => ddl_command_start ; TD[tag] => CREATE FUNCTION +NOTICE: TD[event] => ddl_command_end ; TD[tag] => CREATE FUNCTION +ALTER FUNCTION foobar() COST 77; +NOTICE: TD[event] => ddl_command_start ; TD[tag] => ALTER FUNCTION +NOTICE: TD[event] => ddl_command_end ; TD[tag] => ALTER FUNCTION +DROP FUNCTION foobar(); +NOTICE: TD[event] => ddl_command_start ; TD[tag] => DROP FUNCTION +NOTICE: TD[event] => ddl_command_end ; TD[tag] => DROP FUNCTION +CREATE TABLE foo(); +NOTICE: TD[event] => ddl_command_start ; TD[tag] => CREATE TABLE +NOTICE: TD[event] => ddl_command_end ; TD[tag] => CREATE TABLE +DROP TABLE foo; +NOTICE: TD[event] => ddl_command_start ; TD[tag] => DROP TABLE +NOTICE: TD[event] => ddl_command_end ; TD[tag] => DROP TABLE +DROP EVENT TRIGGER python_a_snitch; +DROP EVENT TRIGGER python_b_snitch; diff --git a/src/pl/plpython/plpy_exec.c b/src/pl/plpython/plpy_exec.c index 22835174b6987..fd06b9e0e4e98 100644 --- a/src/pl/plpython/plpy_exec.c +++ b/src/pl/plpython/plpy_exec.c @@ -9,6 +9,7 @@ #include "access/htup_details.h" #include "access/xact.h" #include "catalog/pg_type.h" +#include "commands/event_trigger.h" #include "commands/trigger.h" #include "executor/spi.h" #include "funcapi.h" @@ -427,6 +428,47 @@ PLy_exec_trigger(FunctionCallInfo fcinfo, PLyProcedure *proc) return rv; } +/* + * event trigger subhandler + */ +void +PLy_exec_event_trigger(FunctionCallInfo fcinfo, PLyProcedure *proc) +{ + EventTriggerData *tdata; + PyObject *volatile pltdata = NULL; + + Assert(CALLED_AS_EVENT_TRIGGER(fcinfo)); + tdata = (EventTriggerData *) fcinfo->context; + + PG_TRY(); + { + PyObject *pltevent, + *plttag; + + pltdata = PyDict_New(); + if (!pltdata) + PLy_elog(ERROR, NULL); + + pltevent = PLyUnicode_FromString(tdata->event); + PyDict_SetItemString(pltdata, "event", pltevent); + Py_DECREF(pltevent); + + plttag = PLyUnicode_FromString(GetCommandTagName(tdata->tag)); + PyDict_SetItemString(pltdata, "tag", plttag); + Py_DECREF(plttag); + + PLy_procedure_call(proc, "TD", pltdata); + + if (SPI_finish() != SPI_OK_FINISH) + elog(ERROR, "SPI_finish() failed"); + } + PG_FINALLY(); + { + Py_XDECREF(pltdata); + } + PG_END_TRY(); +} + /* helper functions for Python code execution */ static PyObject * diff --git a/src/pl/plpython/plpy_exec.h b/src/pl/plpython/plpy_exec.h index 68da1ffcb2ef1..f35eabbd8ee8e 100644 --- a/src/pl/plpython/plpy_exec.h +++ b/src/pl/plpython/plpy_exec.h @@ -9,5 +9,6 @@ extern Datum PLy_exec_function(FunctionCallInfo fcinfo, PLyProcedure *proc); extern HeapTuple PLy_exec_trigger(FunctionCallInfo fcinfo, PLyProcedure *proc); +extern void PLy_exec_event_trigger(FunctionCallInfo fcinfo, PLyProcedure *proc); #endif /* PLPY_EXEC_H */ diff --git a/src/pl/plpython/plpy_main.c b/src/pl/plpython/plpy_main.c index 900c65e8da066..70fc2c9257a89 100644 --- a/src/pl/plpython/plpy_main.c +++ b/src/pl/plpython/plpy_main.c @@ -9,6 +9,7 @@ #include "access/htup_details.h" #include "catalog/pg_proc.h" #include "catalog/pg_type.h" +#include "commands/event_trigger.h" #include "commands/trigger.h" #include "executor/spi.h" #include "miscadmin.h" @@ -240,6 +241,13 @@ plpython3_call_handler(PG_FUNCTION_ARGS) trv = PLy_exec_trigger(fcinfo, proc); retval = PointerGetDatum(trv); } + else if (CALLED_AS_EVENT_TRIGGER(fcinfo)) + { + proc = PLy_procedure_get(funcoid, InvalidOid, PLPY_EVENT_TRIGGER); + exec_ctx->curr_proc = proc; + PLy_exec_event_trigger(fcinfo, proc); + retval = (Datum) 0; + } else { proc = PLy_procedure_get(funcoid, InvalidOid, PLPY_NOT_TRIGGER); @@ -346,6 +354,9 @@ PLy_procedure_is_trigger(Form_pg_proc procStruct) case TRIGGEROID: ret = PLPY_TRIGGER; break; + case EVENT_TRIGGEROID: + ret = PLPY_EVENT_TRIGGER; + break; default: ret = PLPY_NOT_TRIGGER; break; diff --git a/src/pl/plpython/plpy_procedure.h b/src/pl/plpython/plpy_procedure.h index 601b91d5d94e9..3ef22844a9b71 100644 --- a/src/pl/plpython/plpy_procedure.h +++ b/src/pl/plpython/plpy_procedure.h @@ -17,6 +17,7 @@ extern void init_procedure_caches(void); typedef enum PLyTrigType { PLPY_TRIGGER, + PLPY_EVENT_TRIGGER, PLPY_NOT_TRIGGER, } PLyTrigType; diff --git a/src/pl/plpython/sql/plpython_trigger.sql b/src/pl/plpython/sql/plpython_trigger.sql index 440549c0785da..e1a552e079fe8 100644 --- a/src/pl/plpython/sql/plpython_trigger.sql +++ b/src/pl/plpython/sql/plpython_trigger.sql @@ -492,3 +492,27 @@ CREATE TRIGGER recursive_trigger_trig INSERT INTO recursive_trigger_test VALUES (0, 0); UPDATE recursive_trigger_test SET a = 11 WHERE b = 0; SELECT * FROM recursive_trigger_test; + + +-- event triggers + +CREATE OR REPLACE FUNCTION pysnitch() RETURNS event_trigger +LANGUAGE plpython3u +AS $$ + plpy.notice("TD[event] => " + TD["event"] + " ; TD[tag] => " + TD["tag"]); +$$; + +CREATE EVENT TRIGGER python_a_snitch ON ddl_command_start + EXECUTE PROCEDURE pysnitch(); +CREATE EVENT TRIGGER python_b_snitch ON ddl_command_end + EXECUTE PROCEDURE pysnitch(); + +CREATE OR REPLACE FUNCTION foobar() RETURNS int LANGUAGE sql AS $$SELECT 1;$$; +ALTER FUNCTION foobar() COST 77; +DROP FUNCTION foobar(); + +CREATE TABLE foo(); +DROP TABLE foo; + +DROP EVENT TRIGGER python_a_snitch; +DROP EVENT TRIGGER python_b_snitch; From 12da45742cfd15d9fab151b25400d96a1febcbde Mon Sep 17 00:00:00 2001 From: Fujii Masao Date: Thu, 21 Aug 2025 22:18:11 +0900 Subject: [PATCH 547/602] Disallow server start with sync_replication_slots = on and wal_level < logical. Replication slot synchronization (sync_replication_slots = on) requires wal_level to be logical. This commit prevents the server from starting if sync_replication_slots is enabled but wal_level is set to minimal or replica. Failing early during startup helps users catch invalid configurations immediately, which is important because changing wal_level requires a server restart. Author: Fujii Masao Reviewed-by: Shveta Malik Discussion: https://postgr.es/m/CAH0PTU_pc3oHi__XESF9ZigCyzai1Mo3LsOdFyQA4aUDkm01RA@mail.gmail.com --- src/backend/postmaster/postmaster.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index e01d9f0cfe81e..e1d643b013d77 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -854,6 +854,9 @@ PostmasterMain(int argc, char *argv[]) if (summarize_wal && wal_level == WAL_LEVEL_MINIMAL) ereport(ERROR, (errmsg("WAL cannot be summarized when \"wal_level\" is \"minimal\""))); + if (sync_replication_slots && wal_level < WAL_LEVEL_LOGICAL) + ereport(ERROR, + (errmsg("replication slot synchronization (\"sync_replication_slots\" = on) requires \"wal_level\" >= \"logical\""))); /* * Other one-time internal sanity checks can go here, if they are fast. From 47932f3cdc4c221dbd9207cb21e63c862fedd189 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Thu, 21 Aug 2025 19:40:12 +0200 Subject: [PATCH 548/602] Use consistent type for pgaio_io_get_id() result The result of pgaio_io_get_id() was being assigned to a mix of int and uint32 variables. This fixes it to use int consistently, which seems the most correct. Also change the queue empty special value in method_worker.c to -1 from UINT32_MAX. Reviewed-by: Andres Freund Discussion: https://www.postgresql.org/message-id/70c784b3-f60b-4652-b8a6-75e5f051243e%40eisentraut.org --- src/backend/storage/aio/aio_funcs.c | 4 ++-- src/backend/storage/aio/method_worker.c | 14 +++++++------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/backend/storage/aio/aio_funcs.c b/src/backend/storage/aio/aio_funcs.c index 905ea129c813d..34f8f6327338f 100644 --- a/src/backend/storage/aio/aio_funcs.c +++ b/src/backend/storage/aio/aio_funcs.c @@ -56,7 +56,7 @@ pg_get_aios(PG_FUNCTION_ARGS) for (uint64 i = 0; i < pgaio_ctl->io_handle_count; i++) { PgAioHandle *live_ioh = &pgaio_ctl->io_handles[i]; - uint32 ioh_id = pgaio_io_get_id(live_ioh); + int ioh_id = pgaio_io_get_id(live_ioh); Datum values[PG_GET_AIOS_COLS] = {0}; bool nulls[PG_GET_AIOS_COLS] = {0}; ProcNumber owner; @@ -152,7 +152,7 @@ pg_get_aios(PG_FUNCTION_ARGS) nulls[0] = false; /* column: IO's id */ - values[1] = UInt32GetDatum(ioh_id); + values[1] = Int32GetDatum(ioh_id); /* column: IO's generation */ values[2] = Int64GetDatum(start_generation); diff --git a/src/backend/storage/aio/method_worker.c b/src/backend/storage/aio/method_worker.c index bf8f77e6ff606..b5ac073a910df 100644 --- a/src/backend/storage/aio/method_worker.c +++ b/src/backend/storage/aio/method_worker.c @@ -58,7 +58,7 @@ typedef struct PgAioWorkerSubmissionQueue uint32 mask; uint32 head; uint32 tail; - uint32 sqes[FLEXIBLE_ARRAY_MEMBER]; + int sqes[FLEXIBLE_ARRAY_MEMBER]; } PgAioWorkerSubmissionQueue; typedef struct PgAioWorkerSlot @@ -107,7 +107,7 @@ pgaio_worker_queue_shmem_size(int *queue_size) *queue_size = pg_nextpower2_32(io_worker_queue_size); return offsetof(PgAioWorkerSubmissionQueue, sqes) + - sizeof(uint32) * *queue_size; + sizeof(int) * *queue_size; } static size_t @@ -198,15 +198,15 @@ pgaio_worker_submission_queue_insert(PgAioHandle *ioh) return true; } -static uint32 +static int pgaio_worker_submission_queue_consume(void) { PgAioWorkerSubmissionQueue *queue; - uint32 result; + int result; queue = io_worker_submission_queue; if (queue->tail == queue->head) - return UINT32_MAX; /* empty */ + return -1; /* empty */ result = queue->sqes[queue->tail]; queue->tail = (queue->tail + 1) & (queue->size - 1); @@ -470,7 +470,7 @@ IoWorkerMain(const void *startup_data, size_t startup_data_len) * to ensure that we don't see an outdated data in the handle. */ LWLockAcquire(AioWorkerSubmissionQueueLock, LW_EXCLUSIVE); - if ((io_index = pgaio_worker_submission_queue_consume()) == UINT32_MAX) + if ((io_index = pgaio_worker_submission_queue_consume()) == -1) { /* * Nothing to do. Mark self idle. @@ -500,7 +500,7 @@ IoWorkerMain(const void *startup_data, size_t startup_data_len) for (int i = 0; i < nlatches; ++i) SetLatch(latches[i]); - if (io_index != UINT32_MAX) + if (io_index != -1) { PgAioHandle *ioh = NULL; From ef03ea01fe47a02cd9e7fd24a9cf98daacdf4550 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Fri, 22 Aug 2025 09:03:59 +0900 Subject: [PATCH 549/602] Ignore temporary relations in RelidByRelfilenumber() Temporary relations may share the same RelFileNumber with a permanent relation, or other temporary relations associated with other sessions. Being able to uniquely identify a temporary relation would require RelidByRelfilenumber() to know about the proc number of the temporary relation it wants to identify, something it is not designed for since its introduction in f01d1ae3a104. There are currently three callers of RelidByRelfilenumber(): - autoprewarm. - Logical decoding, reorder buffer. - pg_filenode_relation(), that attempts to find a relation OID based on a tablespace OID and a RelFileNumber. This makes the situation problematic particularly for the first two cases, leading to the possibility of random ERRORs due to inconsistencies that temporary relations can create in the cache maintained by RelidByRelfilenumber(). The third case should be less of an issue, as I suspect that there are few direct callers of pg_filenode_relation(). The window where the ERRORs are happen is very narrow, requiring an OID wraparound to create a lookup conflict in RelidByRelfilenumber() with a temporary table reusing the same OID as another relation already cached. The problem is easier to reach in workloads with a high OID consumption rate, especially with a higher number of temporary relations created. We could get pg_filenode_relation() and RelidByRelfilenumber() to work with temporary relations if provided the means to identify them with an optional proc number given in input, but the years have also shown that we do not have a use case for it, yet. Note that this could not be backpatched if pg_filenode_relation() needs changes. It is simpler to ignore temporary relations. Reported-by: Shenhao Wang Author: Vignesh C Reviewed-By: Ashutosh Bapat Reviewed-By: Robert Haas Reviewed-By: Kyotaro Horiguchi Reviewed-By: Takamichi Osumi Reviewed-By: Michael Paquier Reviewed-By: Masahiko Sawada Reported-By: Shenhao Wang Discussion: https://postgr.es/m/bbaaf9f9-ebb2-645f-54bb-34d6efc7ac42@fujitsu.com Backpatch-through: 13 --- doc/src/sgml/func/func-admin.sgml | 3 ++- src/backend/utils/adt/dbsize.c | 3 +++ src/backend/utils/cache/relfilenumbermap.c | 8 ++++++++ src/test/regress/expected/alter_table.out | 5 ++++- src/test/regress/expected/create_table.out | 12 ++++++++++++ src/test/regress/sql/alter_table.sql | 6 ++++-- src/test/regress/sql/create_table.sql | 8 ++++++++ 7 files changed, 41 insertions(+), 4 deletions(-) diff --git a/doc/src/sgml/func/func-admin.sgml b/doc/src/sgml/func/func-admin.sgml index 6347fe60b0c4e..57ff333159f00 100644 --- a/doc/src/sgml/func/func-admin.sgml +++ b/doc/src/sgml/func/func-admin.sgml @@ -1834,7 +1834,8 @@ postgres=# SELECT '0/0'::pg_lsn + pd.segment_number * ps.setting::int + :offset pg_relation_filepath. For a relation in the database's default tablespace, the tablespace can be specified as zero. Returns NULL if no relation in the current database - is associated with the given values. + is associated with the given values, or if dealing with a temporary + relation. diff --git a/src/backend/utils/adt/dbsize.c b/src/backend/utils/adt/dbsize.c index 25865b660ef83..3a059f4ace043 100644 --- a/src/backend/utils/adt/dbsize.c +++ b/src/backend/utils/adt/dbsize.c @@ -938,6 +938,9 @@ pg_relation_filenode(PG_FUNCTION_ARGS) * * We don't fail but return NULL if we cannot find a mapping. * + * Temporary relations are not detected, returning NULL (see + * RelidByRelfilenumber() for the reasons). + * * InvalidOid can be passed instead of the current database's default * tablespace. */ diff --git a/src/backend/utils/cache/relfilenumbermap.c b/src/backend/utils/cache/relfilenumbermap.c index 8a2f6f8c69318..0b6f9cf3fa191 100644 --- a/src/backend/utils/cache/relfilenumbermap.c +++ b/src/backend/utils/cache/relfilenumbermap.c @@ -130,6 +130,11 @@ InitializeRelfilenumberMap(void) * Map a relation's (tablespace, relfilenumber) to a relation's oid and cache * the result. * + * A temporary relation may share its relfilenumber with a permanent relation + * or temporary relations created in other backends. Being able to uniquely + * identify a temporary relation would require a backend's proc number, which + * we do not know about. Hence, this function ignores this case. + * * Returns InvalidOid if no relation matching the criteria could be found. */ Oid @@ -208,6 +213,9 @@ RelidByRelfilenumber(Oid reltablespace, RelFileNumber relfilenumber) { Form_pg_class classform = (Form_pg_class) GETSTRUCT(ntp); + if (classform->relpersistence == RELPERSISTENCE_TEMP) + continue; + if (found) elog(ERROR, "unexpected duplicate for tablespace %u, relfilenumber %u", diff --git a/src/test/regress/expected/alter_table.out b/src/test/regress/expected/alter_table.out index 08984dd98f168..b33e06a0d3d51 100644 --- a/src/test/regress/expected/alter_table.out +++ b/src/test/regress/expected/alter_table.out @@ -3567,12 +3567,15 @@ SELECT conname as constraint, obj_description(oid, 'pg_constraint') as comment F -- filenode function call can return NULL for a relation dropped concurrently -- with the call's surrounding query, so ignore a NULL mapped_oid for -- relations that no longer exist after all calls finish. +-- Temporary relations are ignored, as not supported by pg_filenode_relation(). CREATE TEMP TABLE filenode_mapping AS SELECT oid, mapped_oid, reltablespace, relfilenode, relname FROM pg_class, pg_filenode_relation(reltablespace, pg_relation_filenode(oid)) AS mapped_oid -WHERE relkind IN ('r', 'i', 'S', 't', 'm') AND mapped_oid IS DISTINCT FROM oid; +WHERE relkind IN ('r', 'i', 'S', 't', 'm') + AND relpersistence != 't' + AND mapped_oid IS DISTINCT FROM oid; SELECT m.* FROM filenode_mapping m LEFT JOIN pg_class c ON c.oid = m.oid WHERE c.oid IS NOT NULL OR m.mapped_oid IS NOT NULL; oid | mapped_oid | reltablespace | relfilenode | relname diff --git a/src/test/regress/expected/create_table.out b/src/test/regress/expected/create_table.out index 76604705a93cc..029beb20aaee3 100644 --- a/src/test/regress/expected/create_table.out +++ b/src/test/regress/expected/create_table.out @@ -102,6 +102,18 @@ ERROR: tables declared WITH OIDS are not supported -- but explicitly not adding oids is still supported CREATE TEMP TABLE withoutoid() WITHOUT OIDS; DROP TABLE withoutoid; CREATE TEMP TABLE withoutoid() WITH (oids = false); DROP TABLE withoutoid; +-- temporary tables are ignored by pg_filenode_relation(). +CREATE TEMP TABLE relation_filenode_check(c1 int); +SELECT relpersistence, + pg_filenode_relation (reltablespace, pg_relation_filenode(oid)) + FROM pg_class + WHERE relname = 'relation_filenode_check'; + relpersistence | pg_filenode_relation +----------------+---------------------- + t | +(1 row) + +DROP TABLE relation_filenode_check; -- check restriction with default expressions -- invalid use of column reference in default expressions CREATE TABLE default_expr_column (id int DEFAULT (id)); diff --git a/src/test/regress/sql/alter_table.sql b/src/test/regress/sql/alter_table.sql index fc6e36d0e7882..90bf5c1768238 100644 --- a/src/test/regress/sql/alter_table.sql +++ b/src/test/regress/sql/alter_table.sql @@ -2202,13 +2202,15 @@ SELECT conname as constraint, obj_description(oid, 'pg_constraint') as comment F -- filenode function call can return NULL for a relation dropped concurrently -- with the call's surrounding query, so ignore a NULL mapped_oid for -- relations that no longer exist after all calls finish. +-- Temporary relations are ignored, as not supported by pg_filenode_relation(). CREATE TEMP TABLE filenode_mapping AS SELECT oid, mapped_oid, reltablespace, relfilenode, relname FROM pg_class, pg_filenode_relation(reltablespace, pg_relation_filenode(oid)) AS mapped_oid -WHERE relkind IN ('r', 'i', 'S', 't', 'm') AND mapped_oid IS DISTINCT FROM oid; - +WHERE relkind IN ('r', 'i', 'S', 't', 'm') + AND relpersistence != 't' + AND mapped_oid IS DISTINCT FROM oid; SELECT m.* FROM filenode_mapping m LEFT JOIN pg_class c ON c.oid = m.oid WHERE c.oid IS NOT NULL OR m.mapped_oid IS NOT NULL; diff --git a/src/test/regress/sql/create_table.sql b/src/test/regress/sql/create_table.sql index 37a227148e9c2..9b3e93b416426 100644 --- a/src/test/regress/sql/create_table.sql +++ b/src/test/regress/sql/create_table.sql @@ -68,6 +68,14 @@ CREATE TABLE withoid() WITH (oids = true); CREATE TEMP TABLE withoutoid() WITHOUT OIDS; DROP TABLE withoutoid; CREATE TEMP TABLE withoutoid() WITH (oids = false); DROP TABLE withoutoid; +-- temporary tables are ignored by pg_filenode_relation(). +CREATE TEMP TABLE relation_filenode_check(c1 int); +SELECT relpersistence, + pg_filenode_relation (reltablespace, pg_relation_filenode(oid)) + FROM pg_class + WHERE relname = 'relation_filenode_check'; +DROP TABLE relation_filenode_check; + -- check restriction with default expressions -- invalid use of column reference in default expressions CREATE TABLE default_expr_column (id int DEFAULT (id)); From 13b935cd5217457f62e9936fce668d15515a3072 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Fri, 22 Aug 2025 11:59:02 +0900 Subject: [PATCH 550/602] Change dynahash.c and hsearch.h to use int64 instead of long This code was relying on "long", which is signed 8 bytes everywhere except on Windows where it is 4 bytes, that could potentially expose it to overflows, even if the current uses in the code are fine as far as I know. This code is now able to rely on the same sizeof() variable everywhere, with int64. long was used for sizes, partition counts and entry counts. Some callers of the dynahash.c routines used long declarations, that can be cleaned up to use int64 instead. There was one shortcut based on SIZEOF_LONG, that can be removed. long is entirely removed from dynahash.c and hsearch.h. Similar work was done in b1e5c9fa9ac4. Reviewed-by: Peter Eisentraut Reviewed-by: Chao Li Discussion: https://postgr.es/m/aKQYp-bKTRtRauZ6@paquier.xyz --- .../pg_stat_statements/pg_stat_statements.c | 4 +- src/backend/catalog/storage.c | 2 +- src/backend/storage/ipc/shmem.c | 4 +- src/backend/storage/lmgr/lock.c | 2 +- src/backend/storage/lmgr/predicate.c | 2 +- src/backend/utils/hash/dynahash.c | 92 +++++++++---------- src/include/storage/shmem.h | 2 +- src/include/utils/dynahash.h | 2 +- src/include/utils/hsearch.h | 16 ++-- 9 files changed, 61 insertions(+), 65 deletions(-) diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c index 9fc9635d3300d..1cb368c8590ba 100644 --- a/contrib/pg_stat_statements/pg_stat_statements.c +++ b/contrib/pg_stat_statements/pg_stat_statements.c @@ -2713,8 +2713,8 @@ entry_reset(Oid userid, Oid dbid, int64 queryid, bool minmax_only) HASH_SEQ_STATUS hash_seq; pgssEntry *entry; FILE *qfile; - long num_entries; - long num_remove = 0; + int64 num_entries; + int64 num_remove = 0; pgssHashKey key; TimestampTz stats_reset; diff --git a/src/backend/catalog/storage.c b/src/backend/catalog/storage.c index 227df90f89c97..fb784acf4af24 100644 --- a/src/backend/catalog/storage.c +++ b/src/backend/catalog/storage.c @@ -586,7 +586,7 @@ RelFileLocatorSkippingWAL(RelFileLocator rlocator) Size EstimatePendingSyncsSpace(void) { - long entries; + int64 entries; entries = pendingSyncHash ? hash_get_num_entries(pendingSyncHash) : 0; return mul_size(1 + entries, sizeof(RelFileLocator)); diff --git a/src/backend/storage/ipc/shmem.c b/src/backend/storage/ipc/shmem.c index d12a3ca06842f..a0770e867968a 100644 --- a/src/backend/storage/ipc/shmem.c +++ b/src/backend/storage/ipc/shmem.c @@ -330,8 +330,8 @@ InitShmemIndex(void) */ HTAB * ShmemInitHash(const char *name, /* table string name for shmem index */ - long init_size, /* initial table size */ - long max_size, /* max size of the table */ + int64 init_size, /* initial table size */ + int64 max_size, /* max size of the table */ HASHCTL *infoP, /* info about key and bucket size */ int hash_flags) /* info about infoP */ { diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c index f8c88147160e9..233b85b623d5d 100644 --- a/src/backend/storage/lmgr/lock.c +++ b/src/backend/storage/lmgr/lock.c @@ -443,7 +443,7 @@ void LockManagerShmemInit(void) { HASHCTL info; - long init_table_size, + int64 init_table_size, max_table_size; bool found; diff --git a/src/backend/storage/lmgr/predicate.c b/src/backend/storage/lmgr/predicate.c index c07fb58835557..c1d8511ad17a9 100644 --- a/src/backend/storage/lmgr/predicate.c +++ b/src/backend/storage/lmgr/predicate.c @@ -1145,7 +1145,7 @@ void PredicateLockShmemInit(void) { HASHCTL info; - long max_table_size; + int64 max_table_size; Size requestSize; bool found; diff --git a/src/backend/utils/hash/dynahash.c b/src/backend/utils/hash/dynahash.c index a7094917c20ca..1aeee5be42acd 100644 --- a/src/backend/utils/hash/dynahash.c +++ b/src/backend/utils/hash/dynahash.c @@ -154,7 +154,7 @@ typedef HASHBUCKET *HASHSEGMENT; typedef struct { slock_t mutex; /* spinlock for this freelist */ - long nentries; /* number of entries in associated buckets */ + int64 nentries; /* number of entries in associated buckets */ HASHELEMENT *freeList; /* chain of free elements */ } FreeListData; @@ -182,8 +182,8 @@ struct HASHHDR /* These fields can change, but not in a partitioned table */ /* Also, dsize can't change in a shared table, even if unpartitioned */ - long dsize; /* directory size */ - long nsegs; /* number of allocated segments (<= dsize) */ + int64 dsize; /* directory size */ + int64 nsegs; /* number of allocated segments (<= dsize) */ uint32 max_bucket; /* ID of maximum bucket in use */ uint32 high_mask; /* mask to modulo into entire table */ uint32 low_mask; /* mask to modulo into lower half of table */ @@ -191,9 +191,9 @@ struct HASHHDR /* These fields are fixed at hashtable creation */ Size keysize; /* hash key length in bytes */ Size entrysize; /* total user element size in bytes */ - long num_partitions; /* # partitions (must be power of 2), or 0 */ - long max_dsize; /* 'dsize' limit if directory is fixed size */ - long ssize; /* segment size --- must be power of 2 */ + int64 num_partitions; /* # partitions (must be power of 2), or 0 */ + int64 max_dsize; /* 'dsize' limit if directory is fixed size */ + int64 ssize; /* segment size --- must be power of 2 */ int sshift; /* segment shift = log2(ssize) */ int nelem_alloc; /* number of entries to allocate at once */ bool isfixed; /* if true, don't enlarge */ @@ -236,7 +236,7 @@ struct HTAB /* We keep local copies of these fixed values to reduce contention */ Size keysize; /* hash key length in bytes */ - long ssize; /* segment size --- must be power of 2 */ + int64 ssize; /* segment size --- must be power of 2 */ int sshift; /* segment shift = log2(ssize) */ /* @@ -277,12 +277,12 @@ static bool expand_table(HTAB *hashp); static HASHBUCKET get_hash_entry(HTAB *hashp, int freelist_idx); static void hdefault(HTAB *hashp); static int choose_nelem_alloc(Size entrysize); -static bool init_htab(HTAB *hashp, long nelem); +static bool init_htab(HTAB *hashp, int64 nelem); pg_noreturn static void hash_corrupted(HTAB *hashp); static uint32 hash_initial_lookup(HTAB *hashp, uint32 hashvalue, HASHBUCKET **bucketptr); -static long next_pow2_long(long num); -static int next_pow2_int(long num); +static int64 next_pow2_int64(int64 num); +static int next_pow2_int(int64 num); static void register_seq_scan(HTAB *hashp); static void deregister_seq_scan(HTAB *hashp); static bool has_seq_scans(HTAB *hashp); @@ -355,7 +355,7 @@ string_compare(const char *key1, const char *key2, Size keysize) * large nelem will penalize hash_seq_search speed without buying much. */ HTAB * -hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags) +hash_create(const char *tabname, int64 nelem, const HASHCTL *info, int flags) { HTAB *hashp; HASHHDR *hctl; @@ -697,7 +697,7 @@ choose_nelem_alloc(Size entrysize) * arrays */ static bool -init_htab(HTAB *hashp, long nelem) +init_htab(HTAB *hashp, int64 nelem) { HASHHDR *hctl = hashp->hctl; HASHSEGMENT *segp; @@ -780,10 +780,10 @@ init_htab(HTAB *hashp, long nelem) * NB: assumes that all hash structure parameters have default values! */ Size -hash_estimate_size(long num_entries, Size entrysize) +hash_estimate_size(int64 num_entries, Size entrysize) { Size size; - long nBuckets, + int64 nBuckets, nSegments, nDirEntries, nElementAllocs, @@ -791,9 +791,9 @@ hash_estimate_size(long num_entries, Size entrysize) elementAllocCnt; /* estimate number of buckets wanted */ - nBuckets = next_pow2_long(num_entries); + nBuckets = next_pow2_int64(num_entries); /* # of segments needed for nBuckets */ - nSegments = next_pow2_long((nBuckets - 1) / DEF_SEGSIZE + 1); + nSegments = next_pow2_int64((nBuckets - 1) / DEF_SEGSIZE + 1); /* directory entries */ nDirEntries = DEF_DIRSIZE; while (nDirEntries < nSegments) @@ -826,17 +826,17 @@ hash_estimate_size(long num_entries, Size entrysize) * * XXX this had better agree with the behavior of init_htab()... */ -long -hash_select_dirsize(long num_entries) +int64 +hash_select_dirsize(int64 num_entries) { - long nBuckets, + int64 nBuckets, nSegments, nDirEntries; /* estimate number of buckets wanted */ - nBuckets = next_pow2_long(num_entries); + nBuckets = next_pow2_int64(num_entries); /* # of segments needed for nBuckets */ - nSegments = next_pow2_long((nBuckets - 1) / DEF_SEGSIZE + 1); + nSegments = next_pow2_int64((nBuckets - 1) / DEF_SEGSIZE + 1); /* directory entries */ nDirEntries = DEF_DIRSIZE; while (nDirEntries < nSegments) @@ -887,7 +887,7 @@ hash_stats(const char *caller, HTAB *hashp) HASHHDR *hctl = hashp->hctl; elog(DEBUG4, - "hash_stats: Caller: %s Table Name: \"%s\" Accesses: " UINT64_FORMAT " Collisions: " UINT64_FORMAT " Expansions: " UINT64_FORMAT " Entries: %ld Key Size: %zu Max Bucket: %u Segment Count: %ld", + "hash_stats: Caller: %s Table Name: \"%s\" Accesses: " UINT64_FORMAT " Collisions: " UINT64_FORMAT " Expansions: " UINT64_FORMAT " Entries: " INT64_FORMAT " Key Size: %zu Max Bucket: %u Segment Count: " INT64_FORMAT, caller != NULL ? caller : "(unknown)", hashp->tabname, hctl->accesses, hctl->collisions, hctl->expansions, hash_get_num_entries(hashp), hctl->keysize, hctl->max_bucket, hctl->nsegs); @@ -993,7 +993,7 @@ hash_search_with_hash_value(HTAB *hashp, * Can't split if running in partitioned mode, nor if frozen, nor if * table is the subject of any active hash_seq_search scans. */ - if (hctl->freeList[0].nentries > (long) hctl->max_bucket && + if (hctl->freeList[0].nentries > (int64) hctl->max_bucket && !IS_PARTITIONED(hctl) && !hashp->frozen && !has_seq_scans(hashp)) (void) expand_table(hashp); @@ -1332,11 +1332,11 @@ get_hash_entry(HTAB *hashp, int freelist_idx) /* * hash_get_num_entries -- get the number of entries in a hashtable */ -long +int64 hash_get_num_entries(HTAB *hashp) { int i; - long sum = hashp->hctl->freeList[0].nentries; + int64 sum = hashp->hctl->freeList[0].nentries; /* * We currently don't bother with acquiring the mutexes; it's only @@ -1417,9 +1417,9 @@ hash_seq_search(HASH_SEQ_STATUS *status) HTAB *hashp; HASHHDR *hctl; uint32 max_bucket; - long ssize; - long segment_num; - long segment_ndx; + int64 ssize; + int64 segment_num; + int64 segment_ndx; HASHSEGMENT segp; uint32 curBucket; HASHELEMENT *curElem; @@ -1548,11 +1548,11 @@ expand_table(HTAB *hashp) HASHHDR *hctl = hashp->hctl; HASHSEGMENT old_seg, new_seg; - long old_bucket, + int64 old_bucket, new_bucket; - long new_segnum, + int64 new_segnum, new_segndx; - long old_segnum, + int64 old_segnum, old_segndx; HASHBUCKET *oldlink, *newlink; @@ -1620,7 +1620,7 @@ expand_table(HTAB *hashp) currElement = nextElement) { nextElement = currElement->link; - if ((long) calc_bucket(hctl, currElement->hashvalue) == old_bucket) + if ((int64) calc_bucket(hctl, currElement->hashvalue) == old_bucket) { *oldlink = currElement; oldlink = &currElement->link; @@ -1644,9 +1644,9 @@ dir_realloc(HTAB *hashp) { HASHSEGMENT *p; HASHSEGMENT *old_p; - long new_dsize; - long old_dirsize; - long new_dirsize; + int64 new_dsize; + int64 old_dirsize; + int64 new_dirsize; if (hashp->hctl->max_dsize != NO_MAX_DSIZE) return false; @@ -1780,8 +1780,8 @@ hash_initial_lookup(HTAB *hashp, uint32 hashvalue, HASHBUCKET **bucketptr) { HASHHDR *hctl = hashp->hctl; HASHSEGMENT segp; - long segment_num; - long segment_ndx; + int64 segment_num; + int64 segment_ndx; uint32 bucket; bucket = calc_bucket(hctl, hashvalue); @@ -1814,25 +1814,21 @@ hash_corrupted(HTAB *hashp) /* calculate ceil(log base 2) of num */ int -my_log2(long num) +my_log2(int64 num) { /* * guard against too-large input, which would be invalid for * pg_ceil_log2_*() */ - if (num > LONG_MAX / 2) - num = LONG_MAX / 2; + if (num > PG_INT64_MAX / 2) + num = PG_INT64_MAX / 2; -#if SIZEOF_LONG < 8 - return pg_ceil_log2_32(num); -#else return pg_ceil_log2_64(num); -#endif } -/* calculate first power of 2 >= num, bounded to what will fit in a long */ -static long -next_pow2_long(long num) +/* calculate first power of 2 >= num, bounded to what will fit in a int64 */ +static int64 +next_pow2_int64(int64 num) { /* my_log2's internal range check is sufficient */ return 1L << my_log2(num); @@ -1840,7 +1836,7 @@ next_pow2_long(long num) /* calculate first power of 2 >= num, bounded to what will fit in an int */ static int -next_pow2_int(long num) +next_pow2_int(int64 num) { if (num > INT_MAX / 2) num = INT_MAX / 2; diff --git a/src/include/storage/shmem.h b/src/include/storage/shmem.h index c1f668ded9523..8604feca93ba0 100644 --- a/src/include/storage/shmem.h +++ b/src/include/storage/shmem.h @@ -35,7 +35,7 @@ extern void *ShmemAllocNoError(Size size); extern void *ShmemAllocUnlocked(Size size); extern bool ShmemAddrIsValid(const void *addr); extern void InitShmemIndex(void); -extern HTAB *ShmemInitHash(const char *name, long init_size, long max_size, +extern HTAB *ShmemInitHash(const char *name, int64 init_size, int64 max_size, HASHCTL *infoP, int hash_flags); extern void *ShmemInitStruct(const char *name, Size size, bool *foundPtr); extern Size add_size(Size s1, Size s2); diff --git a/src/include/utils/dynahash.h b/src/include/utils/dynahash.h index 8a31d9524e2a4..a4362d3f65e59 100644 --- a/src/include/utils/dynahash.h +++ b/src/include/utils/dynahash.h @@ -15,6 +15,6 @@ #ifndef DYNAHASH_H #define DYNAHASH_H -extern int my_log2(long num); +extern int my_log2(int64 num); #endif /* DYNAHASH_H */ diff --git a/src/include/utils/hsearch.h b/src/include/utils/hsearch.h index 80deb8e543e6f..cb09a4cbe8cbd 100644 --- a/src/include/utils/hsearch.h +++ b/src/include/utils/hsearch.h @@ -65,12 +65,12 @@ typedef struct HTAB HTAB; typedef struct HASHCTL { /* Used if HASH_PARTITION flag is set: */ - long num_partitions; /* # partitions (must be power of 2) */ + int64 num_partitions; /* # partitions (must be power of 2) */ /* Used if HASH_SEGMENT flag is set: */ - long ssize; /* segment size */ + int64 ssize; /* segment size */ /* Used if HASH_DIRSIZE flag is set: */ - long dsize; /* (initial) directory size */ - long max_dsize; /* limit to dsize if dir size is limited */ + int64 dsize; /* (initial) directory size */ + int64 max_dsize; /* limit to dsize if dir size is limited */ /* Used if HASH_ELEM flag is set (which is now required): */ Size keysize; /* hash key length in bytes */ Size entrysize; /* total user element size in bytes */ @@ -129,7 +129,7 @@ typedef struct /* * prototypes for functions in dynahash.c */ -extern HTAB *hash_create(const char *tabname, long nelem, +extern HTAB *hash_create(const char *tabname, int64 nelem, const HASHCTL *info, int flags); extern void hash_destroy(HTAB *hashp); extern void hash_stats(const char *caller, HTAB *hashp); @@ -141,7 +141,7 @@ extern void *hash_search_with_hash_value(HTAB *hashp, const void *keyPtr, bool *foundPtr); extern bool hash_update_hash_key(HTAB *hashp, void *existingEntry, const void *newKeyPtr); -extern long hash_get_num_entries(HTAB *hashp); +extern int64 hash_get_num_entries(HTAB *hashp); extern void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp); extern void hash_seq_init_with_hash_value(HASH_SEQ_STATUS *status, HTAB *hashp, @@ -149,8 +149,8 @@ extern void hash_seq_init_with_hash_value(HASH_SEQ_STATUS *status, extern void *hash_seq_search(HASH_SEQ_STATUS *status); extern void hash_seq_term(HASH_SEQ_STATUS *status); extern void hash_freeze(HTAB *hashp); -extern Size hash_estimate_size(long num_entries, Size entrysize); -extern long hash_select_dirsize(long num_entries); +extern Size hash_estimate_size(int64 num_entries, Size entrysize); +extern int64 hash_select_dirsize(int64 num_entries); extern Size hash_get_shared_size(HASHCTL *info, int flags); extern void AtEOXact_HashTables(bool isCommit); extern void AtEOSubXact_HashTables(bool isCommit, int nestDepth); From 123e65fdb7fe51920ad29119484148ea1b0afcd2 Mon Sep 17 00:00:00 2001 From: Amit Kapila Date: Fri, 22 Aug 2025 05:29:36 +0000 Subject: [PATCH 551/602] Doc: Fix typo in logicaldecoding.sgml. Author: Hayato Kuroda Backpatch-through: 17, where it was introduced Discussion: https://postgr.es/m/OSCPR01MB149662EC5467B4135398E3731F532A@OSCPR01MB14966.jpnprd01.prod.outlook.com --- doc/src/sgml/logicaldecoding.sgml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/src/sgml/logicaldecoding.sgml b/doc/src/sgml/logicaldecoding.sgml index a1f2efb24208f..b803a819cf1f2 100644 --- a/doc/src/sgml/logicaldecoding.sgml +++ b/doc/src/sgml/logicaldecoding.sgml @@ -420,10 +420,10 @@ postgres=# select * from pg_logical_slot_get_changes('regression_slot', NULL, NU When slot synchronization is configured as recommended, and the initial synchronization is performed either automatically or - manually via pg_sync_replication_slot, the standby can persist the - synchronized slot only if the following condition is met: The logical - replication slot on the primary must retain WALs and system catalog - rows that are still available on the standby. This ensures data + manually via pg_sync_replication_slots, the standby + can persist the synchronized slot only if the following condition is met: + The logical replication slot on the primary must retain WALs and system + catalog rows that are still available on the standby. This ensures data integrity and allows logical replication to continue smoothly after promotion. If the required WALs or catalog rows have already been purged from the From 16a0039dc0d1d0cdfadaf38cd3a30f3c8f590c48 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Fri, 22 Aug 2025 08:51:29 +0200 Subject: [PATCH 552/602] Reduce lock level for ALTER DOMAIN ... VALIDATE CONSTRAINT Reduce from ShareLock to ShareUpdateExclusivelock. Validation during ALTER DOMAIN ... ADD CONSTRAINT keeps using ShareLock. Example: create domain d1 as int; create table t (a d1); alter domain d1 add constraint cc10 check (value > 10) not valid; begin; alter domain d1 validate constraint cc10; -- another session insert into t values (8); Now we should still be able to perform DML operations on table t while the domain constraint is being validated. The equivalent works already on table constraints. Author: jian he Reviewed-by: Dilip Kumar Reviewed-by: wenhui qiu Discussion: https://www.postgresql.org/message-id/flat/CACJufxHz92A88NLRTA2msgE2dpXpE-EoZ2QO61od76-6bfqurA%40mail.gmail.com --- src/backend/commands/typecmds.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/src/backend/commands/typecmds.c b/src/backend/commands/typecmds.c index 26d985193aea4..c6de04819f174 100644 --- a/src/backend/commands/typecmds.c +++ b/src/backend/commands/typecmds.c @@ -126,7 +126,7 @@ static Oid findTypeSubscriptingFunction(List *procname, Oid typeOid); static Oid findRangeSubOpclass(List *opcname, Oid subtype); static Oid findRangeCanonicalFunction(List *procname, Oid typeOid); static Oid findRangeSubtypeDiffFunction(List *procname, Oid subtype); -static void validateDomainCheckConstraint(Oid domainoid, const char *ccbin); +static void validateDomainCheckConstraint(Oid domainoid, const char *ccbin, LOCKMODE lockmode); static void validateDomainNotNullConstraint(Oid domainoid); static List *get_rels_with_domain(Oid domainOid, LOCKMODE lockmode); static void checkEnumOwner(HeapTuple tup); @@ -2986,7 +2986,7 @@ AlterDomainAddConstraint(List *names, Node *newConstraint, * to. */ if (!constr->skip_validation) - validateDomainCheckConstraint(domainoid, ccbin); + validateDomainCheckConstraint(domainoid, ccbin, ShareLock); /* * We must send out an sinval message for the domain, to ensure that @@ -3098,7 +3098,12 @@ AlterDomainValidateConstraint(List *names, const char *constrName) val = SysCacheGetAttrNotNull(CONSTROID, tuple, Anum_pg_constraint_conbin); conbin = TextDatumGetCString(val); - validateDomainCheckConstraint(domainoid, conbin); + /* + * Locking related relations with ShareUpdateExclusiveLock is ok because + * not-yet-valid constraints are still enforced against concurrent inserts + * or updates. + */ + validateDomainCheckConstraint(domainoid, conbin, ShareUpdateExclusiveLock); /* * Now update the catalog, while we have the door open. @@ -3191,9 +3196,16 @@ validateDomainNotNullConstraint(Oid domainoid) /* * Verify that all columns currently using the domain satisfy the given check * constraint expression. + * + * It is used to validate existing constraints and to add newly created check + * constraints to a domain. + * + * The lockmode is used for relations using the domain. It should be + * ShareLock when adding a new constraint to domain. It can be + * ShareUpdateExclusiveLock when validating an existing constraint. */ static void -validateDomainCheckConstraint(Oid domainoid, const char *ccbin) +validateDomainCheckConstraint(Oid domainoid, const char *ccbin, LOCKMODE lockmode) { Expr *expr = (Expr *) stringToNode(ccbin); List *rels; @@ -3210,9 +3222,7 @@ validateDomainCheckConstraint(Oid domainoid, const char *ccbin) exprstate = ExecPrepareExpr(expr, estate); /* Fetch relation list with attributes based on this domain */ - /* ShareLock is sufficient to prevent concurrent data changes */ - - rels = get_rels_with_domain(domainoid, ShareLock); + rels = get_rels_with_domain(domainoid, lockmode); foreach(rt, rels) { From 50f770c3d92cfb2d62724d8c810d37c1f6249a11 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Fri, 22 Aug 2025 13:07:46 +0300 Subject: [PATCH 553/602] Revert GetTransactionSnapshot() to return historic snapshot during LR Commit 1585ff7387 changed GetTransactionSnapshot() to throw an error if it's called during logical decoding, instead of returning the historic snapshot. I made that change for extra protection, because a historic snapshot can only be used to access catalog tables while GetTransactionSnapshot() is usually called when you're executing arbitrary queries. You might get very subtle visibility problems if you tried to use the historic snapshot for arbitrary queries. There's no built-in code in PostgreSQL that calls GetTransactionSnapshot() during logical decoding, but it turns out that the pglogical extension does just that, to evaluate row filter expressions. You would get weird results if the row filter runs arbitrary queries, but it is sane as long as you don't access any non-catalog tables. Even though there are no checks to enforce that in pglogical, a typical row filter expression does not access any tables and works fine. Accessing tables marked with the user_catalog_table = true option is also OK. To fix pglogical with row filters, and any other extensions that might do similar things, revert GetTransactionSnapshot() to return a historic snapshot during logical decoding. To try to still catch the unsafe usage of historic snapshots, add checks in heap_beginscan() and index_beginscan() to complain if you try to use a historic snapshot to scan a non-catalog table. We're very close to the version 18 release however, so add those new checks only in master. Backpatch-through: 18 Reported-by: Noah Misch Reviewed-by: Noah Misch Discussion: https://www.postgresql.org/message-id/20250809222338.cc.nmisch@google.com --- src/backend/access/heap/heapam.c | 9 +++++++++ src/backend/access/index/indexam.c | 8 ++++++++ src/backend/utils/time/snapmgr.c | 19 +++++++++++++++---- src/include/utils/snapmgr.h | 3 +++ 4 files changed, 35 insertions(+), 4 deletions(-) diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 0dcd6ee817e04..ee692c03c3c81 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -1143,6 +1143,15 @@ heap_beginscan(Relation relation, Snapshot snapshot, if (!(snapshot && IsMVCCSnapshot(snapshot))) scan->rs_base.rs_flags &= ~SO_ALLOW_PAGEMODE; + /* Check that a historic snapshot is not used for non-catalog tables */ + if (snapshot && + IsHistoricMVCCSnapshot(snapshot) && + !RelationIsAccessibleInLogicalDecoding(relation)) + { + elog(ERROR, "cannot query non-catalog table \"%s\" during logical decoding", + RelationGetRelationName(relation)); + } + /* * For seqscan and sample scans in a serializable transaction, acquire a * predicate lock on the entire relation. This is required not only to diff --git a/src/backend/access/index/indexam.c b/src/backend/access/index/indexam.c index 1a4f36fe0a970..31b22d9f39780 100644 --- a/src/backend/access/index/indexam.c +++ b/src/backend/access/index/indexam.c @@ -263,6 +263,14 @@ index_beginscan(Relation heapRelation, Assert(snapshot != InvalidSnapshot); + /* Check that a historic snapshot is not used for non-catalog tables */ + if (IsHistoricMVCCSnapshot(snapshot) && + !RelationIsAccessibleInLogicalDecoding(heapRelation)) + { + elog(ERROR, "cannot query non-catalog table \"%s\" during logical decoding", + RelationGetRelationName(heapRelation)); + } + scan = index_beginscan_internal(indexRelation, nkeys, norderbys, snapshot, NULL, false); /* diff --git a/src/backend/utils/time/snapmgr.c b/src/backend/utils/time/snapmgr.c index ea35f30f49457..65561cc6bc337 100644 --- a/src/backend/utils/time/snapmgr.c +++ b/src/backend/utils/time/snapmgr.c @@ -271,12 +271,23 @@ Snapshot GetTransactionSnapshot(void) { /* - * This should not be called while doing logical decoding. Historic - * snapshots are only usable for catalog access, not for general-purpose - * queries. + * Return historic snapshot if doing logical decoding. + * + * Historic snapshots are only usable for catalog access, not for + * general-purpose queries. The caller is responsible for ensuring that + * the snapshot is used correctly! (PostgreSQL code never calls this + * during logical decoding, but extensions can do it.) */ if (HistoricSnapshotActive()) - elog(ERROR, "cannot take query snapshot during logical decoding"); + { + /* + * We'll never need a non-historic transaction snapshot in this + * (sub-)transaction, so there's no need to be careful to set one up + * for later calls to GetTransactionSnapshot(). + */ + Assert(!FirstSnapshotSet); + return HistoricSnapshot; + } /* First call in transaction? */ if (!FirstSnapshotSet) diff --git a/src/include/utils/snapmgr.h b/src/include/utils/snapmgr.h index d346be716423e..604c1f902169d 100644 --- a/src/include/utils/snapmgr.h +++ b/src/include/utils/snapmgr.h @@ -56,6 +56,9 @@ extern PGDLLIMPORT SnapshotData SnapshotToastData; ((snapshot)->snapshot_type == SNAPSHOT_MVCC || \ (snapshot)->snapshot_type == SNAPSHOT_HISTORIC_MVCC) +#define IsHistoricMVCCSnapshot(snapshot) \ + ((snapshot)->snapshot_type == SNAPSHOT_HISTORIC_MVCC) + extern Snapshot GetTransactionSnapshot(void); extern Snapshot GetLatestSnapshot(void); extern void SnapshotSetCommandId(CommandId curcid); From 661f821ef0c3078c70096b09dc44fb8fed56f2b4 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Fri, 22 Aug 2025 13:35:05 +0300 Subject: [PATCH 554/602] Use ereport() rather than elog() Noah pointed this out before I committed 50f770c3d9, but I accidentally pushed the old version with elog() anyway. Oops. Reported-by: Noah Misch Discussion: https://www.postgresql.org/message-id/20250820003756.31.nmisch@google.com --- src/backend/access/heap/heapam.c | 6 ++++-- src/backend/access/index/indexam.c | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index ee692c03c3c81..7491cc3cb93bc 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -1148,8 +1148,10 @@ heap_beginscan(Relation relation, Snapshot snapshot, IsHistoricMVCCSnapshot(snapshot) && !RelationIsAccessibleInLogicalDecoding(relation)) { - elog(ERROR, "cannot query non-catalog table \"%s\" during logical decoding", - RelationGetRelationName(relation)); + ereport(ERROR, + (errcode(ERRCODE_INVALID_TRANSACTION_STATE), + errmsg("cannot query non-catalog table \"%s\" during logical decoding", + RelationGetRelationName(relation)))); } /* diff --git a/src/backend/access/index/indexam.c b/src/backend/access/index/indexam.c index 31b22d9f39780..86d11f4ec791d 100644 --- a/src/backend/access/index/indexam.c +++ b/src/backend/access/index/indexam.c @@ -267,8 +267,10 @@ index_beginscan(Relation heapRelation, if (IsHistoricMVCCSnapshot(snapshot) && !RelationIsAccessibleInLogicalDecoding(heapRelation)) { - elog(ERROR, "cannot query non-catalog table \"%s\" during logical decoding", - RelationGetRelationName(heapRelation)); + ereport(ERROR, + (errcode(ERRCODE_INVALID_TRANSACTION_STATE), + errmsg("cannot query non-catalog table \"%s\" during logical decoding", + RelationGetRelationName(heapRelation)))); } scan = index_beginscan_internal(indexRelation, nkeys, norderbys, snapshot, NULL, false); From f6f0542266f0a04e94ac2a67ae6890d6095101ab Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Fri, 22 Aug 2025 14:39:25 +0300 Subject: [PATCH 555/602] libpq: Handle OOM by disconnecting instead of hanging or skipping msgs In most cases, if an out-of-memory situation happens, we attach the error message to the connection and report it at the next PQgetResult() call. However, there are a few cases, while processing messages that are not associated with any particular query, where we handled failed allocations differently and not very nicely: - If we ran out of memory while processing an async notification, getNotify() either returned EOF, which stopped processing any further data until more data was received from the server, or silently dropped the notification. Returning EOF is problematic because if more data never arrives, e.g. because the connection was used just to wait for the notification, or because the next ReadyForQuery was already received and buffered, it would get stuck forever. Silently dropping a notification is not nice either. - (New in v18) If we ran out of memory while receiving BackendKeyData message, getBackendKeyData() returned EOF, which has the same issues as in getNotify(). - If we ran out of memory while saving a received a ParameterStatus message, we just skipped it. A later call to PQparameterStatus() would return NULL, even though the server did send the status. Change all those cases to terminate the connnection instead. Our options for reporting those errors are limited, but it seems better to terminate than try to soldier on. Applications should handle connection loss gracefully, whereas silently missing a notification, parameter status, or cancellation key could cause much weirder problems. This also changes the error message on OOM while expanding the input buffer. It used to report "cannot allocate memory for input buffer", followed by "lost synchronization with server: got message type ...". The "lost synchronization" message seems unnecessary, so remove that and report only "cannot allocate memory for input buffer". (The comment speculated that the out of memory could indeed be caused by loss of sync, but that seems highly unlikely.) This evolved from a more narrow patch by Jelte Fennema-Nio, which was reviewed by Jacob Champion. Somewhat arbitrarily, backpatch to v18 but no further. These are long-standing issues, but we haven't received any complaints from the field. We can backpatch more later, if needed. Co-authored-by: Jelte Fennema-Nio Reviewed-by: Jelte Fennema-Nio Reviewed-by: Jacob Champion Discussion: https://www.postgresql.org/message-id/df892f9f-5923-4046-9d6f-8c48d8980b50@iki.fi Backpatch-through: 18 --- src/interfaces/libpq/fe-exec.c | 13 +++- src/interfaces/libpq/fe-protocol3.c | 114 ++++++++++++++++++---------- src/interfaces/libpq/libpq-int.h | 2 +- 3 files changed, 89 insertions(+), 40 deletions(-) diff --git a/src/interfaces/libpq/fe-exec.c b/src/interfaces/libpq/fe-exec.c index 4256ae5c0cc5f..0b1e37ec30bbc 100644 --- a/src/interfaces/libpq/fe-exec.c +++ b/src/interfaces/libpq/fe-exec.c @@ -1076,8 +1076,12 @@ pqSaveMessageField(PGresult *res, char code, const char *value) /* * pqSaveParameterStatus - remember parameter status sent by backend + * + * Returns 1 on success, 0 on out-of-memory. (Note that on out-of-memory, we + * have already released the old value of the parameter, if any. The only + * really safe way to recover is to terminate the connection.) */ -void +int pqSaveParameterStatus(PGconn *conn, const char *name, const char *value) { pgParameterStatus *pstatus; @@ -1119,6 +1123,11 @@ pqSaveParameterStatus(PGconn *conn, const char *name, const char *value) pstatus->next = conn->pstatus; conn->pstatus = pstatus; } + else + { + /* out of memory */ + return 0; + } /* * Save values of settings that are of interest to libpq in fields of the @@ -1190,6 +1199,8 @@ pqSaveParameterStatus(PGconn *conn, const char *name, const char *value) { conn->scram_sha_256_iterations = atoi(value); } + + return 1; } diff --git a/src/interfaces/libpq/fe-protocol3.c b/src/interfaces/libpq/fe-protocol3.c index 1599de757d130..0cca832c06ac8 100644 --- a/src/interfaces/libpq/fe-protocol3.c +++ b/src/interfaces/libpq/fe-protocol3.c @@ -43,6 +43,7 @@ (id) == PqMsg_RowDescription) +static void handleFatalError(PGconn *conn); static void handleSyncLoss(PGconn *conn, char id, int msgLength); static int getRowDescriptions(PGconn *conn, int msgLength); static int getParamDescriptions(PGconn *conn, int msgLength); @@ -120,12 +121,12 @@ pqParseInput3(PGconn *conn) conn)) { /* - * XXX add some better recovery code... plan is to skip over - * the message using its length, then report an error. For the - * moment, just treat this like loss of sync (which indeed it - * might be!) + * Abandon the connection. There's not much else we can + * safely do; we can't just ignore the message or we could + * miss important changes to the connection state. + * pqCheckInBufferSpace() already reported the error. */ - handleSyncLoss(conn, id, msgLength); + handleFatalError(conn); } return; } @@ -456,6 +457,11 @@ pqParseInput3(PGconn *conn) /* Normal case: parsing agrees with specified length */ pqParseDone(conn, conn->inCursor); } + else if (conn->error_result && conn->status == CONNECTION_BAD) + { + /* The connection was abandoned and we already reported it */ + return; + } else { /* Trouble --- report it */ @@ -470,15 +476,14 @@ pqParseInput3(PGconn *conn) } /* - * handleSyncLoss: clean up after loss of message-boundary sync + * handleFatalError: clean up after a nonrecoverable error * - * There isn't really a lot we can do here except abandon the connection. + * This is for errors where we need to abandon the connection. The caller has + * already saved the error message in conn->errorMessage. */ static void -handleSyncLoss(PGconn *conn, char id, int msgLength) +handleFatalError(PGconn *conn) { - libpq_append_conn_error(conn, "lost synchronization with server: got message type \"%c\", length %d", - id, msgLength); /* build an error result holding the error message */ pqSaveErrorResult(conn); conn->asyncStatus = PGASYNC_READY; /* drop out of PQgetResult wait loop */ @@ -487,6 +492,19 @@ handleSyncLoss(PGconn *conn, char id, int msgLength) conn->status = CONNECTION_BAD; /* No more connection to backend */ } +/* + * handleSyncLoss: clean up after loss of message-boundary sync + * + * There isn't really a lot we can do here except abandon the connection. + */ +static void +handleSyncLoss(PGconn *conn, char id, int msgLength) +{ + libpq_append_conn_error(conn, "lost synchronization with server: got message type \"%c\", length %d", + id, msgLength); + handleFatalError(conn); +} + /* * parseInput subroutine to read a 'T' (row descriptions) message. * We'll build a new PGresult structure (unless called for a Describe @@ -1519,7 +1537,11 @@ getParameterStatus(PGconn *conn) return EOF; } /* And save it */ - pqSaveParameterStatus(conn, conn->workBuffer.data, valueBuf.data); + if (!pqSaveParameterStatus(conn, conn->workBuffer.data, valueBuf.data)) + { + libpq_append_conn_error(conn, "out of memory"); + handleFatalError(conn); + } termPQExpBuffer(&valueBuf); return 0; } @@ -1551,8 +1573,8 @@ getBackendKeyData(PGconn *conn, int msgLength) if (conn->be_cancel_key == NULL) { libpq_append_conn_error(conn, "out of memory"); - /* discard the message */ - return EOF; + handleFatalError(conn); + return 0; } if (pqGetnchar(conn->be_cancel_key, cancel_key_len, conn)) { @@ -1589,10 +1611,21 @@ getNotify(PGconn *conn) /* must save name while getting extra string */ svname = strdup(conn->workBuffer.data); if (!svname) - return EOF; + { + /* + * Notify messages can arrive at any state, so we cannot associate the + * error with any particular query. There's no way to return back an + * "async error", so the best we can do is drop the connection. That + * seems better than silently ignoring the notification. + */ + libpq_append_conn_error(conn, "out of memory"); + handleFatalError(conn); + return 0; + } if (pqGets(&conn->workBuffer, conn)) { - free(svname); + if (svname) + free(svname); return EOF; } @@ -1604,21 +1637,26 @@ getNotify(PGconn *conn) nmlen = strlen(svname); extralen = strlen(conn->workBuffer.data); newNotify = (PGnotify *) malloc(sizeof(PGnotify) + nmlen + extralen + 2); - if (newNotify) - { - newNotify->relname = (char *) newNotify + sizeof(PGnotify); - strcpy(newNotify->relname, svname); - newNotify->extra = newNotify->relname + nmlen + 1; - strcpy(newNotify->extra, conn->workBuffer.data); - newNotify->be_pid = be_pid; - newNotify->next = NULL; - if (conn->notifyTail) - conn->notifyTail->next = newNotify; - else - conn->notifyHead = newNotify; - conn->notifyTail = newNotify; + if (!newNotify) + { + free(svname); + libpq_append_conn_error(conn, "out of memory"); + handleFatalError(conn); + return 0; } + newNotify->relname = (char *) newNotify + sizeof(PGnotify); + strcpy(newNotify->relname, svname); + newNotify->extra = newNotify->relname + nmlen + 1; + strcpy(newNotify->extra, conn->workBuffer.data); + newNotify->be_pid = be_pid; + newNotify->next = NULL; + if (conn->notifyTail) + conn->notifyTail->next = newNotify; + else + conn->notifyHead = newNotify; + conn->notifyTail = newNotify; + free(svname); return 0; } @@ -1752,12 +1790,12 @@ getCopyDataMessage(PGconn *conn) conn)) { /* - * XXX add some better recovery code... plan is to skip over - * the message using its length, then report an error. For the - * moment, just treat this like loss of sync (which indeed it - * might be!) + * Abandon the connection. There's not much else we can + * safely do; we can't just ignore the message or we could + * miss important changes to the connection state. + * pqCheckInBufferSpace() already reported the error. */ - handleSyncLoss(conn, id, msgLength); + handleFatalError(conn); return -2; } return 0; @@ -2186,12 +2224,12 @@ pqFunctionCall3(PGconn *conn, Oid fnid, conn)) { /* - * XXX add some better recovery code... plan is to skip over - * the message using its length, then report an error. For the - * moment, just treat this like loss of sync (which indeed it - * might be!) + * Abandon the connection. There's not much else we can + * safely do; we can't just ignore the message or we could + * miss important changes to the connection state. + * pqCheckInBufferSpace() already reported the error. */ - handleSyncLoss(conn, id, msgLength); + handleFatalError(conn); break; } continue; diff --git a/src/interfaces/libpq/libpq-int.h b/src/interfaces/libpq/libpq-int.h index a701c25038a75..02c114f140520 100644 --- a/src/interfaces/libpq/libpq-int.h +++ b/src/interfaces/libpq/libpq-int.h @@ -746,7 +746,7 @@ extern PGresult *pqPrepareAsyncResult(PGconn *conn); extern void pqInternalNotice(const PGNoticeHooks *hooks, const char *fmt,...) pg_attribute_printf(2, 3); extern void pqSaveMessageField(PGresult *res, char code, const char *value); -extern void pqSaveParameterStatus(PGconn *conn, const char *name, +extern int pqSaveParameterStatus(PGconn *conn, const char *name, const char *value); extern int pqRowProcessor(PGconn *conn, const char **errmsgp); extern void pqCommandQueueAdvance(PGconn *conn, bool isReadyForQuery, From e411a8d25a4b39e5a896f765ca91636057e261fc Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Fri, 22 Aug 2025 14:39:29 +0300 Subject: [PATCH 556/602] libpq: Be strict about cancel key lengths The protocol documentation states that the maximum length of a cancel key is 256 bytes. This starts checking for that limit in libpq. Otherwise third party backend implementations will probably start using more bytes anyway. We also start requiring that a protocol 3.0 connection does not send a longer cancel key, to make sure that servers don't start breaking old 3.0-only clients by accident. Finally this also restricts the minimum key length to 4 bytes (both in the protocol spec and in the libpq implementation). Author: Jelte Fennema-Nio Reviewed-by: Jacob Champion Discussion: https://www.postgresql.org/message-id/df892f9f-5923-4046-9d6f-8c48d8980b50@iki.fi Backpatch-through: 18 --- doc/src/sgml/protocol.sgml | 2 +- src/interfaces/libpq/fe-protocol3.c | 21 +++++++++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/doc/src/sgml/protocol.sgml b/doc/src/sgml/protocol.sgml index e63647c093bad..b5395604fb8b7 100644 --- a/doc/src/sgml/protocol.sgml +++ b/doc/src/sgml/protocol.sgml @@ -4136,7 +4136,7 @@ psql "dbname=postgres replication=database" -c "IDENTIFY_SYSTEM;" message, indicated by the length field. - The maximum key length is 256 bytes. The + The minimum and maximum key length are 4 and 256 bytes, respectively. The PostgreSQL server only sends keys up to 32 bytes, but the larger maximum size allows for future server versions, as well as connection poolers and other middleware, to use diff --git a/src/interfaces/libpq/fe-protocol3.c b/src/interfaces/libpq/fe-protocol3.c index 0cca832c06ac8..8c2d03d8b9f1d 100644 --- a/src/interfaces/libpq/fe-protocol3.c +++ b/src/interfaces/libpq/fe-protocol3.c @@ -1569,6 +1569,27 @@ getBackendKeyData(PGconn *conn, int msgLength) cancel_key_len = 5 + msgLength - (conn->inCursor - conn->inStart); + if (cancel_key_len != 4 && conn->pversion == PG_PROTOCOL(3, 0)) + { + libpq_append_conn_error(conn, "received invalid BackendKeyData message: cancel key with length %d not allowed in protocol version 3.0 (must be 4 bytes)", cancel_key_len); + handleFatalError(conn); + return 0; + } + + if (cancel_key_len < 4) + { + libpq_append_conn_error(conn, "received invalid BackendKeyData message: cancel key with length %d is too short (minimum 4 bytes)", cancel_key_len); + handleFatalError(conn); + return 0; + } + + if (cancel_key_len > 256) + { + libpq_append_conn_error(conn, "received invalid BackendKeyData message: cancel key with length %d is too long (maximum 256 bytes)", cancel_key_len); + handleFatalError(conn); + return 0; + } + conn->be_cancel_key = malloc(cancel_key_len); if (conn->be_cancel_key == NULL) { From 807ee417e562c355360e891f415e6e8e6e4c40ed Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Fri, 22 Aug 2025 14:47:19 +0300 Subject: [PATCH 557/602] Revert unnecessary check for NULL Jelte pointed out that this was unnecessary, but I failed to remove it before pushing f6f0542266. Oops. Reviewed-by: Jelte Fennema-Nio Discussion: https://www.postgresql.org/message-id/CAGECzQT%3DxNV-V%2BvFC7YQwYQMj0wGN61b3p%3DJ1_rL6M0vbjTtrA@mail.gmail.com Backpatch-through: 18 --- src/interfaces/libpq/fe-protocol3.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/interfaces/libpq/fe-protocol3.c b/src/interfaces/libpq/fe-protocol3.c index 8c2d03d8b9f1d..43ad672abce95 100644 --- a/src/interfaces/libpq/fe-protocol3.c +++ b/src/interfaces/libpq/fe-protocol3.c @@ -1645,8 +1645,7 @@ getNotify(PGconn *conn) } if (pqGets(&conn->workBuffer, conn)) { - if (svname) - free(svname); + free(svname); return EOF; } From b63952a7811319a6525a9ea23afa294a900fa895 Mon Sep 17 00:00:00 2001 From: Nathan Bossart Date: Fri, 22 Aug 2025 11:11:28 -0500 Subject: [PATCH 558/602] vacuumdb: Fix --missing-stats-only with virtual generated columns. Statistics aren't created for virtual generated columns, so "vacuumdb --missing-stats-only" always chooses to analyze tables that have them. To fix, modify vacuumdb's query for retrieving relations that are missing statistics to exclude those columns. Oversight in commit edba754f05. Author: Yugo Nagata Reviewed-by: Fujii Masao Reviewed-by: Corey Huinker Discussion: https://postgr.es/m/20250820104226.8ba51e43164cd590b863ce41%40sraoss.co.jp Backpatch-through: 18 --- src/bin/scripts/t/100_vacuumdb.pl | 7 ++++--- src/bin/scripts/vacuumdb.c | 5 +++++ 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/bin/scripts/t/100_vacuumdb.pl b/src/bin/scripts/t/100_vacuumdb.pl index 240f0fdd3e5cb..945c30df15600 100644 --- a/src/bin/scripts/t/100_vacuumdb.pl +++ b/src/bin/scripts/t/100_vacuumdb.pl @@ -237,9 +237,10 @@ qr/cannot vacuum all databases and a specific one at the same time/, 'cannot use option --all and a dbname as argument at the same time'); -$node->safe_psql('postgres', - 'CREATE TABLE regression_vacuumdb_test AS select generate_series(1, 10) a, generate_series(2, 11) b;' -); +$node->safe_psql('postgres', q| + CREATE TABLE regression_vacuumdb_test AS select generate_series(1, 10) a, generate_series(2, 11) b; + ALTER TABLE regression_vacuumdb_test ADD COLUMN c INT GENERATED ALWAYS AS (a + b); +|); $node->issues_sql_like( [ 'vacuumdb', '--analyze-only', diff --git a/src/bin/scripts/vacuumdb.c b/src/bin/scripts/vacuumdb.c index 22093e50aa5ed..fd236087e90ae 100644 --- a/src/bin/scripts/vacuumdb.c +++ b/src/bin/scripts/vacuumdb.c @@ -14,6 +14,7 @@ #include +#include "catalog/pg_attribute_d.h" #include "catalog/pg_class_d.h" #include "common.h" #include "common/connect.h" @@ -973,6 +974,8 @@ retrieve_objects(PGconn *conn, vacuumingOptions *vacopts, " AND a.attnum OPERATOR(pg_catalog.>) 0::pg_catalog.int2\n" " AND NOT a.attisdropped\n" " AND a.attstattarget IS DISTINCT FROM 0::pg_catalog.int2\n" + " AND a.attgenerated OPERATOR(pg_catalog.<>) " + CppAsString2(ATTRIBUTE_GENERATED_VIRTUAL) "\n" " AND NOT EXISTS (SELECT NULL FROM pg_catalog.pg_statistic s\n" " WHERE s.starelid OPERATOR(pg_catalog.=) a.attrelid\n" " AND s.staattnum OPERATOR(pg_catalog.=) a.attnum\n" @@ -1010,6 +1013,8 @@ retrieve_objects(PGconn *conn, vacuumingOptions *vacopts, " AND a.attnum OPERATOR(pg_catalog.>) 0::pg_catalog.int2\n" " AND NOT a.attisdropped\n" " AND a.attstattarget IS DISTINCT FROM 0::pg_catalog.int2\n" + " AND a.attgenerated OPERATOR(pg_catalog.<>) " + CppAsString2(ATTRIBUTE_GENERATED_VIRTUAL) "\n" " AND c.relhassubclass\n" " AND NOT p.inherited\n" " AND EXISTS (SELECT NULL FROM pg_catalog.pg_inherits h\n" From c13070a27b63d9ce4850d88a63bf889a6fde26f0 Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Fri, 22 Aug 2025 18:44:39 +0300 Subject: [PATCH 559/602] Revert "Get rid of WALBufMappingLock" This reverts commit bc22dc0e0ddc2dcb6043a732415019cc6b6bf683. It appears that conditional variables are not suitable for use inside critical sections. If WaitLatch()/WaitEventSetWaitBlock() face postmaster death, they exit, releasing all locks instead of PANIC. In certain situations, this leads to data corruption. Reported-by: Andrey Borodin Discussion: https://postgr.es/m/B3C69B86-7F82-4111-B97F-0005497BB745%40yandex-team.ru Reviewed-by: Andrey Borodin Reviewed-by: Aleksander Alekseev Reviewed-by: Kirill Reshke Reviewed-by: Tom Lane Reviewed-by: Thomas Munro Reviewed-by: Tomas Vondra Reviewed-by: Andres Freund Reviewed-by: Yura Sokolov Reviewed-by: Michael Paquier Backpatch-through: 18 --- src/backend/access/transam/xlog.c | 234 ++++-------------- .../utils/activity/wait_event_names.txt | 2 +- src/include/storage/lwlocklist.h | 2 +- 3 files changed, 49 insertions(+), 189 deletions(-) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index e8909406686d0..7ffb217915190 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -303,6 +303,11 @@ static bool doPageWrites; * so it's a plain spinlock. The other locks are held longer (potentially * over I/O operations), so we use LWLocks for them. These locks are: * + * WALBufMappingLock: must be held to replace a page in the WAL buffer cache. + * It is only held while initializing and changing the mapping. If the + * contents of the buffer being replaced haven't been written yet, the mapping + * lock is released while the write is done, and reacquired afterwards. + * * WALWriteLock: must be held to write WAL buffers to disk (XLogWrite or * XLogFlush). * @@ -468,37 +473,21 @@ typedef struct XLogCtlData pg_atomic_uint64 logFlushResult; /* last byte + 1 flushed */ /* - * First initialized page in the cache (first byte position). - */ - XLogRecPtr InitializedFrom; - - /* - * Latest reserved for initialization page in the cache (last byte - * position + 1). + * Latest initialized page in the cache (last byte position + 1). * - * To change the identity of a buffer, you need to advance - * InitializeReserved first. To change the identity of a buffer that's + * To change the identity of a buffer (and InitializedUpTo), you need to + * hold WALBufMappingLock. To change the identity of a buffer that's * still dirty, the old page needs to be written out first, and for that * you need WALWriteLock, and you need to ensure that there are no * in-progress insertions to the page by calling * WaitXLogInsertionsToFinish(). */ - pg_atomic_uint64 InitializeReserved; - - /* - * Latest initialized page in the cache (last byte position + 1). - * - * InitializedUpTo is updated after the buffer initialization. After - * update, waiters got notification using InitializedUpToCondVar. - */ - pg_atomic_uint64 InitializedUpTo; - ConditionVariable InitializedUpToCondVar; + XLogRecPtr InitializedUpTo; /* * These values do not change after startup, although the pointed-to pages - * and xlblocks values certainly do. xlblocks values are changed - * lock-free according to the check for the xlog write position and are - * accompanied by changes of InitializeReserved and InitializedUpTo. + * and xlblocks values certainly do. xlblocks values are protected by + * WALBufMappingLock. */ char *pages; /* buffers for unwritten XLOG pages */ pg_atomic_uint64 *xlblocks; /* 1st byte ptr-s + XLOG_BLCKSZ */ @@ -821,9 +810,9 @@ XLogInsertRecord(XLogRecData *rdata, * fullPageWrites from changing until the insertion is finished. * * Step 2 can usually be done completely in parallel. If the required WAL - * page is not initialized yet, you have to go through AdvanceXLInsertBuffer, - * which will ensure it is initialized. But the WAL writer tries to do that - * ahead of insertions to avoid that from happening in the critical path. + * page is not initialized yet, you have to grab WALBufMappingLock to + * initialize it, but the WAL writer tries to do that ahead of insertions + * to avoid that from happening in the critical path. * *---------- */ @@ -2005,79 +1994,32 @@ AdvanceXLInsertBuffer(XLogRecPtr upto, TimeLineID tli, bool opportunistic) XLogRecPtr NewPageEndPtr = InvalidXLogRecPtr; XLogRecPtr NewPageBeginPtr; XLogPageHeader NewPage; - XLogRecPtr ReservedPtr; int npages pg_attribute_unused() = 0; - /* - * We must run the loop below inside the critical section as we expect - * XLogCtl->InitializedUpTo to eventually keep up. The most of callers - * already run inside the critical section. Except for WAL writer, which - * passed 'opportunistic == true', and therefore we don't perform - * operations that could error out. - * - * Start an explicit critical section anyway though. - */ - Assert(CritSectionCount > 0 || opportunistic); - START_CRIT_SECTION(); + LWLockAcquire(WALBufMappingLock, LW_EXCLUSIVE); - /*-- - * Loop till we get all the pages in WAL buffer before 'upto' reserved for - * initialization. Multiple process can initialize different buffers with - * this loop in parallel as following. - * - * 1. Reserve page for initialization using XLogCtl->InitializeReserved. - * 2. Initialize the reserved page. - * 3. Attempt to advance XLogCtl->InitializedUpTo, + /* + * Now that we have the lock, check if someone initialized the page + * already. */ - ReservedPtr = pg_atomic_read_u64(&XLogCtl->InitializeReserved); - while (upto >= ReservedPtr || opportunistic) + while (upto >= XLogCtl->InitializedUpTo || opportunistic) { - Assert(ReservedPtr % XLOG_BLCKSZ == 0); + nextidx = XLogRecPtrToBufIdx(XLogCtl->InitializedUpTo); /* - * Get ending-offset of the buffer page we need to replace. - * - * We don't lookup into xlblocks, but rather calculate position we - * must wait to be written. If it was written, xlblocks will have this - * position (or uninitialized) + * Get ending-offset of the buffer page we need to replace (this may + * be zero if the buffer hasn't been used yet). Fall through if it's + * already written out. */ - if (ReservedPtr + XLOG_BLCKSZ > XLogCtl->InitializedFrom + XLOG_BLCKSZ * XLOGbuffers) - OldPageRqstPtr = ReservedPtr + XLOG_BLCKSZ - (XLogRecPtr) XLOG_BLCKSZ * XLOGbuffers; - else - OldPageRqstPtr = InvalidXLogRecPtr; - - if (LogwrtResult.Write < OldPageRqstPtr && opportunistic) + OldPageRqstPtr = pg_atomic_read_u64(&XLogCtl->xlblocks[nextidx]); + if (LogwrtResult.Write < OldPageRqstPtr) { /* - * If we just want to pre-initialize as much as we can without - * flushing, give up now. + * Nope, got work to do. If we just want to pre-initialize as much + * as we can without flushing, give up now. */ - upto = ReservedPtr - 1; - break; - } - - /* - * Attempt to reserve the page for initialization. Failure means that - * this page got reserved by another process. - */ - if (!pg_atomic_compare_exchange_u64(&XLogCtl->InitializeReserved, - &ReservedPtr, - ReservedPtr + XLOG_BLCKSZ)) - continue; - - /* - * Wait till page gets correctly initialized up to OldPageRqstPtr. - */ - nextidx = XLogRecPtrToBufIdx(ReservedPtr); - while (pg_atomic_read_u64(&XLogCtl->InitializedUpTo) < OldPageRqstPtr) - ConditionVariableSleep(&XLogCtl->InitializedUpToCondVar, WAIT_EVENT_WAL_BUFFER_INIT); - ConditionVariableCancelSleep(); - Assert(pg_atomic_read_u64(&XLogCtl->xlblocks[nextidx]) == OldPageRqstPtr); - - /* Fall through if it's already written out. */ - if (LogwrtResult.Write < OldPageRqstPtr) - { - /* Nope, got work to do. */ + if (opportunistic) + break; /* Advance shared memory write request position */ SpinLockAcquire(&XLogCtl->info_lck); @@ -2092,6 +2034,14 @@ AdvanceXLInsertBuffer(XLogRecPtr upto, TimeLineID tli, bool opportunistic) RefreshXLogWriteResult(LogwrtResult); if (LogwrtResult.Write < OldPageRqstPtr) { + /* + * Must acquire write lock. Release WALBufMappingLock first, + * to make sure that all insertions that we need to wait for + * can finish (up to this same position). Otherwise we risk + * deadlock. + */ + LWLockRelease(WALBufMappingLock); + WaitXLogInsertionsToFinish(OldPageRqstPtr); LWLockAcquire(WALWriteLock, LW_EXCLUSIVE); @@ -2119,6 +2069,9 @@ AdvanceXLInsertBuffer(XLogRecPtr upto, TimeLineID tli, bool opportunistic) */ pgstat_report_fixed = true; } + /* Re-acquire WALBufMappingLock and retry */ + LWLockAcquire(WALBufMappingLock, LW_EXCLUSIVE); + continue; } } @@ -2126,9 +2079,11 @@ AdvanceXLInsertBuffer(XLogRecPtr upto, TimeLineID tli, bool opportunistic) * Now the next buffer slot is free and we can set it up to be the * next output page. */ - NewPageBeginPtr = ReservedPtr; + NewPageBeginPtr = XLogCtl->InitializedUpTo; NewPageEndPtr = NewPageBeginPtr + XLOG_BLCKSZ; + Assert(XLogRecPtrToBufIdx(NewPageBeginPtr) == nextidx); + NewPage = (XLogPageHeader) (XLogCtl->pages + nextidx * (Size) XLOG_BLCKSZ); /* @@ -2192,100 +2147,12 @@ AdvanceXLInsertBuffer(XLogRecPtr upto, TimeLineID tli, bool opportunistic) */ pg_write_barrier(); - /*----- - * Update the value of XLogCtl->xlblocks[nextidx] and try to advance - * XLogCtl->InitializedUpTo in a lock-less manner. - * - * First, let's provide a formal proof of the algorithm. Let it be 'n' - * process with the following variables in shared memory: - * f - an array of 'n' boolean flags, - * v - atomic integer variable. - * - * Also, let - * i - a number of a process, - * j - local integer variable, - * CAS(var, oldval, newval) - compare-and-swap atomic operation - * returning true on success, - * write_barrier()/read_barrier() - memory barriers. - * - * The pseudocode for each process is the following. - * - * j := i - * f[i] := true - * write_barrier() - * while CAS(v, j, j + 1): - * j := j + 1 - * read_barrier() - * if not f[j]: - * break - * - * Let's prove that v eventually reaches the value of n. - * 1. Prove by contradiction. Assume v doesn't reach n and stucks - * on k, where k < n. - * 2. Process k attempts CAS(v, k, k + 1). 1). If, as we assumed, v - * gets stuck at k, then this CAS operation must fail. Therefore, - * v < k when process k attempts CAS(v, k, k + 1). - * 3. If, as we assumed, v gets stuck at k, then the value k of v - * must be achieved by some process m, where m < k. The process - * m must observe f[k] == false. Otherwise, it will later attempt - * CAS(v, k, k + 1) with success. - * 4. Therefore, corresponding read_barrier() (while j == k) on - * process m reached before write_barrier() of process k. But then - * process k attempts CAS(v, k, k + 1) after process m successfully - * incremented v to k, and that CAS operation must succeed. - * That leads to a contradiction. So, there is no such k (k < n) - * where v gets stuck. Q.E.D. - * - * To apply this proof to the code below, we assume - * XLogCtl->InitializedUpTo will play the role of v with XLOG_BLCKSZ - * granularity. We also assume setting XLogCtl->xlblocks[nextidx] to - * NewPageEndPtr to play the role of setting f[i] to true. Also, note - * that processes can't concurrently map different xlog locations to - * the same nextidx because we previously requested that - * XLogCtl->InitializedUpTo >= OldPageRqstPtr. So, a xlog buffer can - * be taken for initialization only once the previous initialization - * takes effect on XLogCtl->InitializedUpTo. - */ - pg_atomic_write_u64(&XLogCtl->xlblocks[nextidx], NewPageEndPtr); - - pg_write_barrier(); - - while (pg_atomic_compare_exchange_u64(&XLogCtl->InitializedUpTo, &NewPageBeginPtr, NewPageEndPtr)) - { - NewPageBeginPtr = NewPageEndPtr; - NewPageEndPtr = NewPageBeginPtr + XLOG_BLCKSZ; - nextidx = XLogRecPtrToBufIdx(NewPageBeginPtr); - - pg_read_barrier(); - - if (pg_atomic_read_u64(&XLogCtl->xlblocks[nextidx]) != NewPageEndPtr) - { - /* - * Page at nextidx wasn't initialized yet, so we can't move - * InitializedUpto further. It will be moved by backend which - * will initialize nextidx. - */ - ConditionVariableBroadcast(&XLogCtl->InitializedUpToCondVar); - break; - } - } + XLogCtl->InitializedUpTo = NewPageEndPtr; npages++; } - - END_CRIT_SECTION(); - - /* - * All the pages in WAL buffer before 'upto' were reserved for - * initialization. However, some pages might be reserved by concurrent - * processes. Wait till they finish initialization. - */ - while (upto >= pg_atomic_read_u64(&XLogCtl->InitializedUpTo)) - ConditionVariableSleep(&XLogCtl->InitializedUpToCondVar, WAIT_EVENT_WAL_BUFFER_INIT); - ConditionVariableCancelSleep(); - - pg_read_barrier(); + LWLockRelease(WALBufMappingLock); #ifdef WAL_DEBUG if (XLOG_DEBUG && npages > 0) @@ -5178,10 +5045,6 @@ XLOGShmemInit(void) pg_atomic_init_u64(&XLogCtl->logWriteResult, InvalidXLogRecPtr); pg_atomic_init_u64(&XLogCtl->logFlushResult, InvalidXLogRecPtr); pg_atomic_init_u64(&XLogCtl->unloggedLSN, InvalidXLogRecPtr); - - pg_atomic_init_u64(&XLogCtl->InitializeReserved, InvalidXLogRecPtr); - pg_atomic_init_u64(&XLogCtl->InitializedUpTo, InvalidXLogRecPtr); - ConditionVariableInit(&XLogCtl->InitializedUpToCondVar); } /* @@ -6205,8 +6068,7 @@ StartupXLOG(void) memset(page + len, 0, XLOG_BLCKSZ - len); pg_atomic_write_u64(&XLogCtl->xlblocks[firstIdx], endOfRecoveryInfo->lastPageBeginPtr + XLOG_BLCKSZ); - pg_atomic_write_u64(&XLogCtl->InitializedUpTo, endOfRecoveryInfo->lastPageBeginPtr + XLOG_BLCKSZ); - XLogCtl->InitializedFrom = endOfRecoveryInfo->lastPageBeginPtr; + XLogCtl->InitializedUpTo = endOfRecoveryInfo->lastPageBeginPtr + XLOG_BLCKSZ; } else { @@ -6215,10 +6077,8 @@ StartupXLOG(void) * let the first attempt to insert a log record to initialize the next * buffer. */ - pg_atomic_write_u64(&XLogCtl->InitializedUpTo, EndOfLog); - XLogCtl->InitializedFrom = EndOfLog; + XLogCtl->InitializedUpTo = EndOfLog; } - pg_atomic_write_u64(&XLogCtl->InitializeReserved, pg_atomic_read_u64(&XLogCtl->InitializedUpTo)); /* * Update local and shared status. This is OK to do without any locks diff --git a/src/backend/utils/activity/wait_event_names.txt b/src/backend/utils/activity/wait_event_names.txt index 0be307d2ca04b..5427da5bc1b19 100644 --- a/src/backend/utils/activity/wait_event_names.txt +++ b/src/backend/utils/activity/wait_event_names.txt @@ -156,7 +156,6 @@ REPLICATION_SLOT_DROP "Waiting for a replication slot to become inactive so it c RESTORE_COMMAND "Waiting for to complete." SAFE_SNAPSHOT "Waiting to obtain a valid snapshot for a READ ONLY DEFERRABLE transaction." SYNC_REP "Waiting for confirmation from a remote server during synchronous replication." -WAL_BUFFER_INIT "Waiting on WAL buffer to be initialized." WAL_RECEIVER_EXIT "Waiting for the WAL receiver to exit." WAL_RECEIVER_WAIT_START "Waiting for startup process to send initial data for streaming replication." WAL_SUMMARY_READY "Waiting for a new WAL summary to be generated." @@ -316,6 +315,7 @@ XidGen "Waiting to allocate a new transaction ID." ProcArray "Waiting to access the shared per-process data structures (typically, to get a snapshot or report a session's transaction ID)." SInvalRead "Waiting to retrieve messages from the shared catalog invalidation queue." SInvalWrite "Waiting to add a message to the shared catalog invalidation queue." +WALBufMapping "Waiting to replace a page in WAL buffers." WALWrite "Waiting for WAL buffers to be written to disk." ControlFile "Waiting to read or update the pg_control file or create a new WAL file." MultiXactGen "Waiting to read or update shared multixact state." diff --git a/src/include/storage/lwlocklist.h b/src/include/storage/lwlocklist.h index 208d2e3a8ed9e..06a1ffd4b08b0 100644 --- a/src/include/storage/lwlocklist.h +++ b/src/include/storage/lwlocklist.h @@ -38,7 +38,7 @@ PG_LWLOCK(3, XidGen) PG_LWLOCK(4, ProcArray) PG_LWLOCK(5, SInvalRead) PG_LWLOCK(6, SInvalWrite) -/* 7 was WALBufMapping */ +PG_LWLOCK(7, WALBufMapping) PG_LWLOCK(8, WALWrite) PG_LWLOCK(9, ControlFile) /* 10 was CheckpointLock */ From b61a5c4bed7df87120df587731840e51ea7c7525 Mon Sep 17 00:00:00 2001 From: Noah Misch Date: Fri, 22 Aug 2025 20:50:28 -0700 Subject: [PATCH 560/602] Sort DO_DEFAULT_ACL dump objects independent of OIDs. Commit 0decd5e89db9f5edb9b27351082f0d74aae7a9b6 missed DO_DEFAULT_ACL, leading to assertion failures, potential dump order instability, and spurious schema diffs. Back-patch to v13, like that commit. Reported-by: Alexander Lakhin Author: Kirill Reshke Discussion: https://postgr.es/m/d32aaa8d-df7c-4f94-bcb3-4c85f02bea21@gmail.com Backpatch-through: 13 --- src/bin/pg_dump/pg_dump_sort.c | 13 +++++++++++++ src/test/regress/expected/privileges.out | 5 +++++ src/test/regress/sql/privileges.sql | 7 +++++++ 3 files changed, 25 insertions(+) diff --git a/src/bin/pg_dump/pg_dump_sort.c b/src/bin/pg_dump/pg_dump_sort.c index a02da3e9652c1..2d02456664b59 100644 --- a/src/bin/pg_dump/pg_dump_sort.c +++ b/src/bin/pg_dump/pg_dump_sort.c @@ -418,6 +418,19 @@ DOTypeNameCompare(const void *p1, const void *p2) return cmpval; } } + else if (obj1->objType == DO_DEFAULT_ACL) + { + DefaultACLInfo *daclobj1 = *(DefaultACLInfo *const *) p1; + DefaultACLInfo *daclobj2 = *(DefaultACLInfo *const *) p2; + + /* + * Sort by defaclrole, per pg_default_acl_role_nsp_obj_index. The + * (namespace, name) match (defaclnamespace, defaclobjtype). + */ + cmpval = strcmp(daclobj1->defaclrole, daclobj2->defaclrole); + if (cmpval != 0) + return cmpval; + } else if (obj1->objType == DO_PUBLICATION_REL) { PublicationRelInfo *probj1 = *(PublicationRelInfo *const *) p1; diff --git a/src/test/regress/expected/privileges.out b/src/test/regress/expected/privileges.out index 845e477da044d..8149666725b43 100644 --- a/src/test/regress/expected/privileges.out +++ b/src/test/regress/expected/privileges.out @@ -3153,6 +3153,11 @@ DROP USER regress_priv_user6; DROP USER regress_priv_user7; DROP USER regress_priv_user8; -- does not exist ERROR: role "regress_priv_user8" does not exist +-- leave some default ACLs for pg_upgrade's dump-restore test input. +ALTER DEFAULT PRIVILEGES FOR ROLE pg_signal_backend + REVOKE INSERT ON TABLES FROM pg_signal_backend; +ALTER DEFAULT PRIVILEGES FOR ROLE pg_read_all_settings + REVOKE INSERT ON TABLES FROM pg_read_all_settings; -- permissions with LOCK TABLE CREATE USER regress_locktable_user; CREATE TABLE lock_table (a int); diff --git a/src/test/regress/sql/privileges.sql b/src/test/regress/sql/privileges.sql index 661cf186cfd59..7b1e9393577ec 100644 --- a/src/test/regress/sql/privileges.sql +++ b/src/test/regress/sql/privileges.sql @@ -1863,6 +1863,13 @@ DROP USER regress_priv_user7; DROP USER regress_priv_user8; -- does not exist +-- leave some default ACLs for pg_upgrade's dump-restore test input. +ALTER DEFAULT PRIVILEGES FOR ROLE pg_signal_backend + REVOKE INSERT ON TABLES FROM pg_signal_backend; +ALTER DEFAULT PRIVILEGES FOR ROLE pg_read_all_settings + REVOKE INSERT ON TABLES FROM pg_read_all_settings; + + -- permissions with LOCK TABLE CREATE USER regress_locktable_user; CREATE TABLE lock_table (a int); From ad4412480d3ff475a6dfa4a7f449702eff78312d Mon Sep 17 00:00:00 2001 From: Noah Misch Date: Sat, 23 Aug 2025 16:46:20 -0700 Subject: [PATCH 561/602] Rewrite previous commit's test for TestUpgradeXversion compatibility. v17 introduced the MAINTAIN ON TABLES privilege. That changed the applicable "baseacls" reaching buildACLCommands(). That yielded spurious TestUpgradeXversion diffs. Change to use a TYPES privilege. Types have the same one privilege in all supported versions, so they avoid the problem. Per buildfarm. Back-patch to v13, like that commit. Discussion: https://postgr.es/m/20250823144505.88.nmisch@google.com Backpatch-through: 13 --- src/test/regress/expected/privileges.out | 4 ++-- src/test/regress/sql/privileges.sql | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/test/regress/expected/privileges.out b/src/test/regress/expected/privileges.out index 8149666725b43..6dcc95ede502c 100644 --- a/src/test/regress/expected/privileges.out +++ b/src/test/regress/expected/privileges.out @@ -3155,9 +3155,9 @@ DROP USER regress_priv_user8; -- does not exist ERROR: role "regress_priv_user8" does not exist -- leave some default ACLs for pg_upgrade's dump-restore test input. ALTER DEFAULT PRIVILEGES FOR ROLE pg_signal_backend - REVOKE INSERT ON TABLES FROM pg_signal_backend; + REVOKE USAGE ON TYPES FROM pg_signal_backend; ALTER DEFAULT PRIVILEGES FOR ROLE pg_read_all_settings - REVOKE INSERT ON TABLES FROM pg_read_all_settings; + REVOKE USAGE ON TYPES FROM pg_read_all_settings; -- permissions with LOCK TABLE CREATE USER regress_locktable_user; CREATE TABLE lock_table (a int); diff --git a/src/test/regress/sql/privileges.sql b/src/test/regress/sql/privileges.sql index 7b1e9393577ec..fe409654c0e98 100644 --- a/src/test/regress/sql/privileges.sql +++ b/src/test/regress/sql/privileges.sql @@ -1865,9 +1865,9 @@ DROP USER regress_priv_user8; -- does not exist -- leave some default ACLs for pg_upgrade's dump-restore test input. ALTER DEFAULT PRIVILEGES FOR ROLE pg_signal_backend - REVOKE INSERT ON TABLES FROM pg_signal_backend; + REVOKE USAGE ON TYPES FROM pg_signal_backend; ALTER DEFAULT PRIVILEGES FOR ROLE pg_read_all_settings - REVOKE INSERT ON TABLES FROM pg_read_all_settings; + REVOKE USAGE ON TYPES FROM pg_read_all_settings; -- permissions with LOCK TABLE From 878656dbde0d2fd4b85a8c63566e2a9f0d0f4952 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Mon, 25 Aug 2025 08:28:17 +0200 Subject: [PATCH 562/602] Formatting cleanup of guc_tables.c This cleans up a few minor formatting inconsistencies. Reviewed-by: John Naylor Discussion: https://www.postgresql.org/message-id/flat/dae6fe89-1e0c-4c3f-8d92-19d23374fb10%40eisentraut.org --- src/backend/utils/misc/guc_tables.c | 238 ++++++++++++++++++---------- 1 file changed, 155 insertions(+), 83 deletions(-) diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c index d14b1678e7fec..f137129209f65 100644 --- a/src/backend/utils/misc/guc_tables.c +++ b/src/backend/utils/misc/guc_tables.c @@ -799,6 +799,7 @@ struct config_bool ConfigureNamesBool[] = true, NULL, NULL, NULL }, + { {"enable_indexscan", PGC_USERSET, QUERY_TUNING_METHOD, gettext_noop("Enables the planner's use of index-scan plans."), @@ -809,6 +810,7 @@ struct config_bool ConfigureNamesBool[] = true, NULL, NULL, NULL }, + { {"enable_indexonlyscan", PGC_USERSET, QUERY_TUNING_METHOD, gettext_noop("Enables the planner's use of index-only-scan plans."), @@ -819,6 +821,7 @@ struct config_bool ConfigureNamesBool[] = true, NULL, NULL, NULL }, + { {"enable_bitmapscan", PGC_USERSET, QUERY_TUNING_METHOD, gettext_noop("Enables the planner's use of bitmap-scan plans."), @@ -829,6 +832,7 @@ struct config_bool ConfigureNamesBool[] = true, NULL, NULL, NULL }, + { {"enable_tidscan", PGC_USERSET, QUERY_TUNING_METHOD, gettext_noop("Enables the planner's use of TID scan plans."), @@ -839,6 +843,7 @@ struct config_bool ConfigureNamesBool[] = true, NULL, NULL, NULL }, + { {"enable_sort", PGC_USERSET, QUERY_TUNING_METHOD, gettext_noop("Enables the planner's use of explicit sort steps."), @@ -849,6 +854,7 @@ struct config_bool ConfigureNamesBool[] = true, NULL, NULL, NULL }, + { {"enable_incremental_sort", PGC_USERSET, QUERY_TUNING_METHOD, gettext_noop("Enables the planner's use of incremental sort steps."), @@ -859,6 +865,7 @@ struct config_bool ConfigureNamesBool[] = true, NULL, NULL, NULL }, + { {"enable_hashagg", PGC_USERSET, QUERY_TUNING_METHOD, gettext_noop("Enables the planner's use of hashed aggregation plans."), @@ -869,6 +876,7 @@ struct config_bool ConfigureNamesBool[] = true, NULL, NULL, NULL }, + { {"enable_material", PGC_USERSET, QUERY_TUNING_METHOD, gettext_noop("Enables the planner's use of materialization."), @@ -879,6 +887,7 @@ struct config_bool ConfigureNamesBool[] = true, NULL, NULL, NULL }, + { {"enable_memoize", PGC_USERSET, QUERY_TUNING_METHOD, gettext_noop("Enables the planner's use of memoization."), @@ -889,6 +898,7 @@ struct config_bool ConfigureNamesBool[] = true, NULL, NULL, NULL }, + { {"enable_nestloop", PGC_USERSET, QUERY_TUNING_METHOD, gettext_noop("Enables the planner's use of nested-loop join plans."), @@ -899,6 +909,7 @@ struct config_bool ConfigureNamesBool[] = true, NULL, NULL, NULL }, + { {"enable_mergejoin", PGC_USERSET, QUERY_TUNING_METHOD, gettext_noop("Enables the planner's use of merge join plans."), @@ -909,6 +920,7 @@ struct config_bool ConfigureNamesBool[] = true, NULL, NULL, NULL }, + { {"enable_hashjoin", PGC_USERSET, QUERY_TUNING_METHOD, gettext_noop("Enables the planner's use of hash join plans."), @@ -919,6 +931,7 @@ struct config_bool ConfigureNamesBool[] = true, NULL, NULL, NULL }, + { {"enable_gathermerge", PGC_USERSET, QUERY_TUNING_METHOD, gettext_noop("Enables the planner's use of gather merge plans."), @@ -929,6 +942,7 @@ struct config_bool ConfigureNamesBool[] = true, NULL, NULL, NULL }, + { {"enable_partitionwise_join", PGC_USERSET, QUERY_TUNING_METHOD, gettext_noop("Enables partitionwise join."), @@ -939,6 +953,7 @@ struct config_bool ConfigureNamesBool[] = false, NULL, NULL, NULL }, + { {"enable_partitionwise_aggregate", PGC_USERSET, QUERY_TUNING_METHOD, gettext_noop("Enables partitionwise aggregation and grouping."), @@ -949,6 +964,7 @@ struct config_bool ConfigureNamesBool[] = false, NULL, NULL, NULL }, + { {"enable_parallel_append", PGC_USERSET, QUERY_TUNING_METHOD, gettext_noop("Enables the planner's use of parallel append plans."), @@ -959,6 +975,7 @@ struct config_bool ConfigureNamesBool[] = true, NULL, NULL, NULL }, + { {"enable_parallel_hash", PGC_USERSET, QUERY_TUNING_METHOD, gettext_noop("Enables the planner's use of parallel hash plans."), @@ -969,6 +986,7 @@ struct config_bool ConfigureNamesBool[] = true, NULL, NULL, NULL }, + { {"enable_partition_pruning", PGC_USERSET, QUERY_TUNING_METHOD, gettext_noop("Enables plan-time and execution-time partition pruning."), @@ -981,6 +999,7 @@ struct config_bool ConfigureNamesBool[] = true, NULL, NULL, NULL }, + { {"enable_presorted_aggregate", PGC_USERSET, QUERY_TUNING_METHOD, gettext_noop("Enables the planner's ability to produce plans that " @@ -996,6 +1015,7 @@ struct config_bool ConfigureNamesBool[] = true, NULL, NULL, NULL }, + { {"enable_async_append", PGC_USERSET, QUERY_TUNING_METHOD, gettext_noop("Enables the planner's use of async append plans."), @@ -1006,6 +1026,7 @@ struct config_bool ConfigureNamesBool[] = true, NULL, NULL, NULL }, + { {"enable_self_join_elimination", PGC_USERSET, QUERY_TUNING_METHOD, gettext_noop("Enables removal of unique self-joins."), @@ -1016,6 +1037,7 @@ struct config_bool ConfigureNamesBool[] = true, NULL, NULL, NULL }, + { {"enable_group_by_reordering", PGC_USERSET, QUERY_TUNING_METHOD, gettext_noop("Enables reordering of GROUP BY keys."), @@ -1026,6 +1048,7 @@ struct config_bool ConfigureNamesBool[] = true, NULL, NULL, NULL }, + { {"enable_distinct_reordering", PGC_USERSET, QUERY_TUNING_METHOD, gettext_noop("Enables reordering of DISTINCT keys."), @@ -1036,6 +1059,7 @@ struct config_bool ConfigureNamesBool[] = true, NULL, NULL, NULL }, + { {"geqo", PGC_USERSET, QUERY_TUNING_GEQO, gettext_noop("Enables genetic query optimization."), @@ -1047,6 +1071,7 @@ struct config_bool ConfigureNamesBool[] = true, NULL, NULL, NULL }, + { /* * Not for general use --- used by SET SESSION AUTHORIZATION and SET @@ -1061,6 +1086,7 @@ struct config_bool ConfigureNamesBool[] = false, NULL, NULL, NULL }, + { /* * This setting itself cannot be set by ALTER SYSTEM to avoid an @@ -1077,6 +1103,7 @@ struct config_bool ConfigureNamesBool[] = true, NULL, NULL, NULL }, + { {"bonjour", PGC_POSTMASTER, CONN_AUTH_SETTINGS, gettext_noop("Enables advertising the server via Bonjour."), @@ -1086,6 +1113,7 @@ struct config_bool ConfigureNamesBool[] = false, check_bonjour, NULL, NULL }, + { {"track_commit_timestamp", PGC_POSTMASTER, REPLICATION_SENDING, gettext_noop("Collects transaction commit time."), @@ -1095,6 +1123,7 @@ struct config_bool ConfigureNamesBool[] = false, NULL, NULL, NULL }, + { {"ssl", PGC_SIGHUP, CONN_AUTH_SSL, gettext_noop("Enables SSL connections."), @@ -1104,6 +1133,7 @@ struct config_bool ConfigureNamesBool[] = false, check_ssl, NULL, NULL }, + { {"ssl_passphrase_command_supports_reload", PGC_SIGHUP, CONN_AUTH_SSL, gettext_noop("Controls whether \"ssl_passphrase_command\" is called during server reload."), @@ -1113,6 +1143,7 @@ struct config_bool ConfigureNamesBool[] = false, NULL, NULL, NULL }, + { {"ssl_prefer_server_ciphers", PGC_SIGHUP, CONN_AUTH_SSL, gettext_noop("Give priority to server ciphersuite order."), @@ -1122,6 +1153,7 @@ struct config_bool ConfigureNamesBool[] = true, NULL, NULL, NULL }, + { {"fsync", PGC_SIGHUP, WAL_SETTINGS, gettext_noop("Forces synchronization of updates to disk."), @@ -1134,6 +1166,7 @@ struct config_bool ConfigureNamesBool[] = true, NULL, NULL, NULL }, + { {"ignore_checksum_failure", PGC_SUSET, DEVELOPER_OPTIONS, gettext_noop("Continues processing after a checksum failure."), @@ -1149,6 +1182,7 @@ struct config_bool ConfigureNamesBool[] = false, NULL, NULL, NULL }, + { {"zero_damaged_pages", PGC_SUSET, DEVELOPER_OPTIONS, gettext_noop("Continues processing past damaged page headers."), @@ -1163,6 +1197,7 @@ struct config_bool ConfigureNamesBool[] = false, NULL, NULL, NULL }, + { {"ignore_invalid_pages", PGC_POSTMASTER, DEVELOPER_OPTIONS, gettext_noop("Continues recovery after an invalid pages failure."), @@ -1182,6 +1217,7 @@ struct config_bool ConfigureNamesBool[] = false, NULL, NULL, NULL }, + { {"full_page_writes", PGC_SIGHUP, WAL_SETTINGS, gettext_noop("Writes full pages to WAL when first modified after a checkpoint."), @@ -1235,6 +1271,7 @@ struct config_bool ConfigureNamesBool[] = true, NULL, NULL, NULL }, + { {"trace_connection_negotiation", PGC_POSTMASTER, DEVELOPER_OPTIONS, gettext_noop("Logs details of pre-authentication connection handshake."), @@ -1245,6 +1282,7 @@ struct config_bool ConfigureNamesBool[] = false, NULL, NULL, NULL }, + { {"log_disconnections", PGC_SU_BACKEND, LOGGING_WHAT, gettext_noop("Logs end of a session, including duration."), @@ -1254,6 +1292,7 @@ struct config_bool ConfigureNamesBool[] = false, NULL, NULL, NULL }, + { {"log_replication_commands", PGC_SUSET, LOGGING_WHAT, gettext_noop("Logs each replication command."), @@ -1263,6 +1302,7 @@ struct config_bool ConfigureNamesBool[] = false, NULL, NULL, NULL }, + { {"debug_assertions", PGC_INTERNAL, PRESET_OPTIONS, gettext_noop("Shows whether the running server has assertion checks enabled."), @@ -1283,6 +1323,7 @@ struct config_bool ConfigureNamesBool[] = false, NULL, NULL, NULL }, + { {"restart_after_crash", PGC_SIGHUP, ERROR_HANDLING_OPTIONS, gettext_noop("Reinitialize server after backend crash."), @@ -1292,6 +1333,7 @@ struct config_bool ConfigureNamesBool[] = true, NULL, NULL, NULL }, + { {"remove_temp_files_after_crash", PGC_SIGHUP, DEVELOPER_OPTIONS, gettext_noop("Remove temporary files after backend crash."), @@ -1302,6 +1344,7 @@ struct config_bool ConfigureNamesBool[] = true, NULL, NULL, NULL }, + { {"send_abort_for_crash", PGC_SIGHUP, DEVELOPER_OPTIONS, gettext_noop("Send SIGABRT not SIGQUIT to child processes after backend crash."), @@ -1312,6 +1355,7 @@ struct config_bool ConfigureNamesBool[] = false, NULL, NULL, NULL }, + { {"send_abort_for_kill", PGC_SIGHUP, DEVELOPER_OPTIONS, gettext_noop("Send SIGABRT not SIGKILL to stuck child processes."), @@ -1332,6 +1376,7 @@ struct config_bool ConfigureNamesBool[] = false, NULL, NULL, NULL }, + #ifdef DEBUG_NODE_TESTS_ENABLED { {"debug_copy_parse_plan_trees", PGC_SUSET, DEVELOPER_OPTIONS, @@ -1350,6 +1395,7 @@ struct config_bool ConfigureNamesBool[] = #endif NULL, NULL, NULL }, + { {"debug_write_read_parse_plan_trees", PGC_SUSET, DEVELOPER_OPTIONS, gettext_noop("Set this to force all parse and plan trees to be passed through " @@ -1367,6 +1413,7 @@ struct config_bool ConfigureNamesBool[] = #endif NULL, NULL, NULL }, + { {"debug_raw_expression_coverage_test", PGC_SUSET, DEVELOPER_OPTIONS, gettext_noop("Set this to force all raw parse trees for DML statements to be scanned " @@ -1385,6 +1432,7 @@ struct config_bool ConfigureNamesBool[] = NULL, NULL, NULL }, #endif /* DEBUG_NODE_TESTS_ENABLED */ + { {"debug_print_parse", PGC_USERSET, LOGGING_WHAT, gettext_noop("Logs each query's parse tree."), @@ -1394,6 +1442,7 @@ struct config_bool ConfigureNamesBool[] = false, NULL, NULL, NULL }, + { {"debug_print_rewritten", PGC_USERSET, LOGGING_WHAT, gettext_noop("Logs each query's rewritten parse tree."), @@ -1403,6 +1452,7 @@ struct config_bool ConfigureNamesBool[] = false, NULL, NULL, NULL }, + { {"debug_print_plan", PGC_USERSET, LOGGING_WHAT, gettext_noop("Logs each query's execution plan."), @@ -1412,6 +1462,7 @@ struct config_bool ConfigureNamesBool[] = false, NULL, NULL, NULL }, + { {"debug_pretty_print", PGC_USERSET, LOGGING_WHAT, gettext_noop("Indents parse and plan tree displays."), @@ -1421,6 +1472,7 @@ struct config_bool ConfigureNamesBool[] = true, NULL, NULL, NULL }, + { {"log_parser_stats", PGC_SUSET, STATS_MONITORING, gettext_noop("Writes parser performance statistics to the server log."), @@ -1430,6 +1482,7 @@ struct config_bool ConfigureNamesBool[] = false, check_stage_log_stats, NULL, NULL }, + { {"log_planner_stats", PGC_SUSET, STATS_MONITORING, gettext_noop("Writes planner performance statistics to the server log."), @@ -1439,6 +1492,7 @@ struct config_bool ConfigureNamesBool[] = false, check_stage_log_stats, NULL, NULL }, + { {"log_executor_stats", PGC_SUSET, STATS_MONITORING, gettext_noop("Writes executor performance statistics to the server log."), @@ -1448,6 +1502,7 @@ struct config_bool ConfigureNamesBool[] = false, check_stage_log_stats, NULL, NULL }, + { {"log_statement_stats", PGC_SUSET, STATS_MONITORING, gettext_noop("Writes cumulative performance statistics to the server log."), @@ -1457,6 +1512,7 @@ struct config_bool ConfigureNamesBool[] = false, check_log_stats, NULL, NULL }, + #ifdef BTREE_BUILD_STATS { {"log_btree_build_stats", PGC_SUSET, DEVELOPER_OPTIONS, @@ -1481,6 +1537,7 @@ struct config_bool ConfigureNamesBool[] = true, NULL, NULL, NULL }, + { {"track_counts", PGC_SUSET, STATS_CUMULATIVE, gettext_noop("Collects statistics on database activity."), @@ -1490,6 +1547,7 @@ struct config_bool ConfigureNamesBool[] = true, NULL, NULL, NULL }, + { {"track_cost_delay_timing", PGC_SUSET, STATS_CUMULATIVE, gettext_noop("Collects timing statistics for cost-based vacuum delay."), @@ -1499,6 +1557,7 @@ struct config_bool ConfigureNamesBool[] = false, NULL, NULL, NULL }, + { {"track_io_timing", PGC_SUSET, STATS_CUMULATIVE, gettext_noop("Collects timing statistics for database I/O activity."), @@ -1508,6 +1567,7 @@ struct config_bool ConfigureNamesBool[] = false, NULL, NULL, NULL }, + { {"track_wal_io_timing", PGC_SUSET, STATS_CUMULATIVE, gettext_noop("Collects timing statistics for WAL I/O activity."), @@ -1560,6 +1620,7 @@ struct config_bool ConfigureNamesBool[] = false, NULL, NULL, NULL }, + { {"trace_userlocks", PGC_SUSET, DEVELOPER_OPTIONS, gettext_noop("Emits information about user lock usage."), @@ -1570,6 +1631,7 @@ struct config_bool ConfigureNamesBool[] = false, NULL, NULL, NULL }, + { {"trace_lwlocks", PGC_SUSET, DEVELOPER_OPTIONS, gettext_noop("Emits information about lightweight lock usage."), @@ -1580,6 +1642,7 @@ struct config_bool ConfigureNamesBool[] = false, NULL, NULL, NULL }, + { {"debug_deadlocks", PGC_SUSET, DEVELOPER_OPTIONS, gettext_noop("Dumps information about all current locks when a deadlock timeout occurs."), @@ -1601,6 +1664,7 @@ struct config_bool ConfigureNamesBool[] = false, NULL, NULL, NULL }, + { {"log_lock_failures", PGC_SUSET, LOGGING_WHAT, gettext_noop("Logs lock failures."), @@ -1610,6 +1674,7 @@ struct config_bool ConfigureNamesBool[] = false, NULL, NULL, NULL }, + { {"log_recovery_conflict_waits", PGC_SIGHUP, LOGGING_WHAT, gettext_noop("Logs standby recovery conflict waits."), @@ -1619,6 +1684,7 @@ struct config_bool ConfigureNamesBool[] = false, NULL, NULL, NULL }, + { {"log_hostname", PGC_SIGHUP, LOGGING_WHAT, gettext_noop("Logs the host name in the connection logs."), @@ -1631,6 +1697,7 @@ struct config_bool ConfigureNamesBool[] = false, NULL, NULL, NULL }, + { {"transform_null_equals", PGC_USERSET, COMPAT_OPTIONS_OTHER, gettext_noop("Treats \"expr=NULL\" as \"expr IS NULL\"."), @@ -1644,6 +1711,7 @@ struct config_bool ConfigureNamesBool[] = false, NULL, NULL, NULL }, + { {"default_transaction_read_only", PGC_USERSET, CLIENT_CONN_STATEMENT, gettext_noop("Sets the default read-only status of new transactions."), @@ -1654,6 +1722,7 @@ struct config_bool ConfigureNamesBool[] = false, NULL, NULL, NULL }, + { {"transaction_read_only", PGC_USERSET, CLIENT_CONN_STATEMENT, gettext_noop("Sets the current transaction's read-only status."), @@ -1664,6 +1733,7 @@ struct config_bool ConfigureNamesBool[] = false, check_transaction_read_only, NULL, NULL }, + { {"default_transaction_deferrable", PGC_USERSET, CLIENT_CONN_STATEMENT, gettext_noop("Sets the default deferrable status of new transactions."), @@ -1673,6 +1743,7 @@ struct config_bool ConfigureNamesBool[] = false, NULL, NULL, NULL }, + { {"transaction_deferrable", PGC_USERSET, CLIENT_CONN_STATEMENT, gettext_noop("Whether to defer a read-only serializable transaction until it can be executed with no possible serialization failures."), @@ -1683,6 +1754,7 @@ struct config_bool ConfigureNamesBool[] = false, check_transaction_deferrable, NULL, NULL }, + { {"row_security", PGC_USERSET, CLIENT_CONN_STATEMENT, gettext_noop("Enables row security."), @@ -1692,6 +1764,7 @@ struct config_bool ConfigureNamesBool[] = true, NULL, NULL, NULL }, + { {"check_function_bodies", PGC_USERSET, CLIENT_CONN_STATEMENT, gettext_noop("Check routine bodies during CREATE FUNCTION and CREATE PROCEDURE."), @@ -1701,6 +1774,7 @@ struct config_bool ConfigureNamesBool[] = true, NULL, NULL, NULL }, + { {"array_nulls", PGC_USERSET, COMPAT_OPTIONS_PREVIOUS, gettext_noop("Enables input of NULL elements in arrays."), @@ -1728,6 +1802,7 @@ struct config_bool ConfigureNamesBool[] = false, check_default_with_oids, NULL, NULL }, + { {"logging_collector", PGC_POSTMASTER, LOGGING_WHERE, gettext_noop("Start a subprocess to capture stderr, csvlog and/or jsonlog into log files."), @@ -1737,6 +1812,7 @@ struct config_bool ConfigureNamesBool[] = false, NULL, NULL, NULL }, + { {"log_truncate_on_rotation", PGC_SIGHUP, LOGGING_WHERE, gettext_noop("Truncate existing log files of same name during log rotation."), @@ -1775,8 +1851,7 @@ struct config_bool ConfigureNamesBool[] = #ifdef DEBUG_BOUNDED_SORT /* this is undocumented because not exposed in a standard build */ { - { - "optimize_bounded_sort", PGC_USERSET, QUERY_TUNING_METHOD, + {"optimize_bounded_sort", PGC_USERSET, QUERY_TUNING_METHOD, gettext_noop("Enables bounded sorting using heap sort."), NULL, GUC_NOT_IN_SAMPLE | GUC_EXPLAIN @@ -1961,7 +2036,7 @@ struct config_bool ConfigureNamesBool[] = { {"quote_all_identifiers", PGC_USERSET, COMPAT_OPTIONS_PREVIOUS, gettext_noop("When generating SQL fragments, quote all identifiers."), - NULL, + NULL }, "e_all_identifiers, false, @@ -2091,6 +2166,7 @@ struct config_bool ConfigureNamesBool[] = { {"data_sync_retry", PGC_POSTMASTER, ERROR_HANDLING_OPTIONS, gettext_noop("Whether to continue running after a failure to sync data files."), + NULL }, &data_sync_retry, false, @@ -2100,6 +2176,7 @@ struct config_bool ConfigureNamesBool[] = { {"wal_receiver_create_temp_slot", PGC_SIGHUP, REPLICATION_STANDBY, gettext_noop("Sets whether a WAL receiver should create a temporary replication slot if no permanent slot is configured."), + NULL }, &wal_receiver_create_temp_slot, false, @@ -2109,7 +2186,7 @@ struct config_bool ConfigureNamesBool[] = { {"event_triggers", PGC_SUSET, CLIENT_CONN_STATEMENT, gettext_noop("Enables event triggers."), - gettext_noop("When enabled, event triggers will fire for all applicable statements."), + gettext_noop("When enabled, event triggers will fire for all applicable statements.") }, &event_triggers, true, @@ -2119,6 +2196,7 @@ struct config_bool ConfigureNamesBool[] = { {"sync_replication_slots", PGC_SIGHUP, REPLICATION_STANDBY, gettext_noop("Enables a physical standby to synchronize logical failover replication slots from the primary server."), + NULL }, &sync_replication_slots, false, @@ -2128,6 +2206,7 @@ struct config_bool ConfigureNamesBool[] = { {"md5_password_warnings", PGC_USERSET, CONN_AUTH_AUTH, gettext_noop("Enables deprecation warnings for MD5 passwords."), + NULL }, &md5_password_warnings, true, @@ -2137,6 +2216,7 @@ struct config_bool ConfigureNamesBool[] = { {"vacuum_truncate", PGC_USERSET, VACUUM_DEFAULT, gettext_noop("Enables vacuum to truncate empty pages at the end of the table."), + NULL }, &vacuum_truncate, true, @@ -2163,6 +2243,7 @@ struct config_int ConfigureNamesInt[] = 0, 0, INT_MAX / 2, NULL, NULL, NULL }, + { {"post_auth_delay", PGC_BACKEND, DEVELOPER_OPTIONS, gettext_noop("Sets the amount of time to wait after " @@ -2174,6 +2255,7 @@ struct config_int ConfigureNamesInt[] = 0, 0, INT_MAX / 1000000, NULL, NULL, NULL }, + { {"default_statistics_target", PGC_USERSET, QUERY_TUNING_OTHER, gettext_noop("Sets the default statistics target."), @@ -2184,6 +2266,7 @@ struct config_int ConfigureNamesInt[] = 100, 1, MAX_STATISTICS_TARGET, NULL, NULL, NULL }, + { {"from_collapse_limit", PGC_USERSET, QUERY_TUNING_OTHER, gettext_noop("Sets the FROM-list size beyond which subqueries " @@ -2197,6 +2280,7 @@ struct config_int ConfigureNamesInt[] = 8, 1, INT_MAX, NULL, NULL, NULL }, + { {"join_collapse_limit", PGC_USERSET, QUERY_TUNING_OTHER, gettext_noop("Sets the FROM-list size beyond which JOIN " @@ -2210,6 +2294,7 @@ struct config_int ConfigureNamesInt[] = 8, 1, INT_MAX, NULL, NULL, NULL }, + { {"geqo_threshold", PGC_USERSET, QUERY_TUNING_GEQO, gettext_noop("Sets the threshold of FROM items beyond which GEQO is used."), @@ -2220,6 +2305,7 @@ struct config_int ConfigureNamesInt[] = 12, 2, INT_MAX, NULL, NULL, NULL }, + { {"geqo_effort", PGC_USERSET, QUERY_TUNING_GEQO, gettext_noop("GEQO: effort is used to set the default for other GEQO parameters."), @@ -2230,6 +2316,7 @@ struct config_int ConfigureNamesInt[] = DEFAULT_GEQO_EFFORT, MIN_GEQO_EFFORT, MAX_GEQO_EFFORT, NULL, NULL, NULL }, + { {"geqo_pool_size", PGC_USERSET, QUERY_TUNING_GEQO, gettext_noop("GEQO: number of individuals in the population."), @@ -2240,6 +2327,7 @@ struct config_int ConfigureNamesInt[] = 0, 0, INT_MAX, NULL, NULL, NULL }, + { {"geqo_generations", PGC_USERSET, QUERY_TUNING_GEQO, gettext_noop("GEQO: number of iterations of the algorithm."), @@ -2547,7 +2635,6 @@ struct config_int ConfigureNamesInt[] = NULL, NULL, show_log_file_mode }, - { {"data_directory_mode", PGC_INTERNAL, PRESET_OPTIONS, gettext_noop("Shows the mode of the data directory."), @@ -2714,6 +2801,7 @@ struct config_int ConfigureNamesInt[] = FirstNormalObjectId, 0, INT_MAX, NULL, NULL, NULL }, + { {"trace_lock_table", PGC_SUSET, DEVELOPER_OPTIONS, gettext_noop("Sets the OID of the table with unconditionally lock tracing."), @@ -2830,6 +2918,7 @@ struct config_int ConfigureNamesInt[] = 1600000000, 0, 2100000000, NULL, NULL, NULL }, + { {"vacuum_multixact_failsafe_age", PGC_USERSET, VACUUM_FREEZING, gettext_noop("Multixact age at which VACUUM should trigger failsafe to avoid a wraparound outage."), @@ -2916,7 +3005,7 @@ struct config_int ConfigureNamesInt[] = { {"max_notify_queue_pages", PGC_POSTMASTER, RESOURCES_DISK, gettext_noop("Sets the maximum number of allocated pages for NOTIFY / LISTEN queue."), - NULL, + NULL }, &max_notify_queue_pages, 1048576, 64, INT_MAX, @@ -2952,8 +3041,7 @@ struct config_int ConfigureNamesInt[] = GUC_UNIT_MB }, &min_wal_size_mb, - DEFAULT_MIN_WAL_SEGS * (DEFAULT_XLOG_SEG_SIZE / (1024 * 1024)), - 2, MAX_KILOBYTES, + DEFAULT_MIN_WAL_SEGS * (DEFAULT_XLOG_SEG_SIZE / (1024 * 1024)), 2, MAX_KILOBYTES, NULL, NULL, NULL }, @@ -2964,8 +3052,7 @@ struct config_int ConfigureNamesInt[] = GUC_UNIT_MB }, &max_wal_size_mb, - DEFAULT_MAX_WAL_SEGS * (DEFAULT_XLOG_SEG_SIZE / (1024 * 1024)), - 2, MAX_KILOBYTES, + DEFAULT_MAX_WAL_SEGS * (DEFAULT_XLOG_SEG_SIZE / (1024 * 1024)), 2, MAX_KILOBYTES, NULL, assign_max_wal_size, NULL }, @@ -3237,68 +3324,53 @@ struct config_int ConfigureNamesInt[] = }, { - {"effective_io_concurrency", - PGC_USERSET, - RESOURCES_IO, + {"effective_io_concurrency", PGC_USERSET, RESOURCES_IO, gettext_noop("Number of simultaneous requests that can be handled efficiently by the disk subsystem."), gettext_noop("0 disables simultaneous requests."), GUC_EXPLAIN }, &effective_io_concurrency, - DEFAULT_EFFECTIVE_IO_CONCURRENCY, - 0, MAX_IO_CONCURRENCY, + DEFAULT_EFFECTIVE_IO_CONCURRENCY, 0, MAX_IO_CONCURRENCY, NULL, NULL, NULL }, { - {"maintenance_io_concurrency", - PGC_USERSET, - RESOURCES_IO, + {"maintenance_io_concurrency", PGC_USERSET, RESOURCES_IO, gettext_noop("A variant of \"effective_io_concurrency\" that is used for maintenance work."), gettext_noop("0 disables simultaneous requests."), GUC_EXPLAIN }, &maintenance_io_concurrency, - DEFAULT_MAINTENANCE_IO_CONCURRENCY, - 0, MAX_IO_CONCURRENCY, - NULL, assign_maintenance_io_concurrency, - NULL + DEFAULT_MAINTENANCE_IO_CONCURRENCY, 0, MAX_IO_CONCURRENCY, + NULL, assign_maintenance_io_concurrency, NULL }, { - {"io_max_combine_limit", - PGC_POSTMASTER, - RESOURCES_IO, + {"io_max_combine_limit", PGC_POSTMASTER, RESOURCES_IO, gettext_noop("Server-wide limit that clamps io_combine_limit."), NULL, GUC_UNIT_BLOCKS }, &io_max_combine_limit, - DEFAULT_IO_COMBINE_LIMIT, - 1, MAX_IO_COMBINE_LIMIT, + DEFAULT_IO_COMBINE_LIMIT, 1, MAX_IO_COMBINE_LIMIT, NULL, assign_io_max_combine_limit, NULL }, { - {"io_combine_limit", - PGC_USERSET, - RESOURCES_IO, + {"io_combine_limit", PGC_USERSET, RESOURCES_IO, gettext_noop("Limit on the size of data reads and writes."), NULL, GUC_UNIT_BLOCKS }, &io_combine_limit_guc, - DEFAULT_IO_COMBINE_LIMIT, - 1, MAX_IO_COMBINE_LIMIT, + DEFAULT_IO_COMBINE_LIMIT, 1, MAX_IO_COMBINE_LIMIT, NULL, assign_io_combine_limit, NULL }, { - {"io_max_concurrency", - PGC_POSTMASTER, - RESOURCES_IO, + {"io_max_concurrency", PGC_POSTMASTER, RESOURCES_IO, gettext_noop("Max number of IOs that one process can execute simultaneously."), - NULL, + NULL }, &io_max_concurrency, -1, -1, 1024, @@ -3306,11 +3378,9 @@ struct config_int ConfigureNamesInt[] = }, { - {"io_workers", - PGC_SIGHUP, - RESOURCES_IO, + {"io_workers", PGC_SIGHUP, RESOURCES_IO, gettext_noop("Number of IO worker processes, for io_method=worker."), - NULL, + NULL }, &io_workers, 3, 1, MAX_IO_WORKERS, @@ -3329,11 +3399,9 @@ struct config_int ConfigureNamesInt[] = }, { - {"max_worker_processes", - PGC_POSTMASTER, - RESOURCES_WORKER_PROCESSES, + {"max_worker_processes", PGC_POSTMASTER, RESOURCES_WORKER_PROCESSES, gettext_noop("Maximum number of concurrent worker processes."), - NULL, + NULL }, &max_worker_processes, 8, 0, MAX_BACKENDS, @@ -3341,11 +3409,9 @@ struct config_int ConfigureNamesInt[] = }, { - {"max_logical_replication_workers", - PGC_POSTMASTER, - REPLICATION_SUBSCRIBERS, + {"max_logical_replication_workers", PGC_POSTMASTER, REPLICATION_SUBSCRIBERS, gettext_noop("Maximum number of logical replication worker processes."), - NULL, + NULL }, &max_logical_replication_workers, 4, 0, MAX_BACKENDS, @@ -3353,11 +3419,9 @@ struct config_int ConfigureNamesInt[] = }, { - {"max_sync_workers_per_subscription", - PGC_SIGHUP, - REPLICATION_SUBSCRIBERS, + {"max_sync_workers_per_subscription", PGC_SIGHUP, REPLICATION_SUBSCRIBERS, gettext_noop("Maximum number of table synchronization workers per subscription."), - NULL, + NULL }, &max_sync_workers_per_subscription, 2, 0, MAX_BACKENDS, @@ -3365,11 +3429,9 @@ struct config_int ConfigureNamesInt[] = }, { - {"max_parallel_apply_workers_per_subscription", - PGC_SIGHUP, - REPLICATION_SUBSCRIBERS, + {"max_parallel_apply_workers_per_subscription", PGC_SIGHUP, REPLICATION_SUBSCRIBERS, gettext_noop("Maximum number of parallel apply workers per subscription."), - NULL, + NULL }, &max_parallel_apply_workers_per_subscription, 2, 0, MAX_PARALLEL_WORKER_LIMIT, @@ -3377,9 +3439,7 @@ struct config_int ConfigureNamesInt[] = }, { - {"max_active_replication_origins", - PGC_POSTMASTER, - REPLICATION_SUBSCRIBERS, + {"max_active_replication_origins", PGC_POSTMASTER, REPLICATION_SUBSCRIBERS, gettext_noop("Sets the maximum number of active replication origins."), NULL }, @@ -3497,9 +3557,7 @@ struct config_int ConfigureNamesInt[] = GUC_UNIT_BYTE | GUC_NOT_IN_SAMPLE | GUC_DISALLOW_IN_FILE | GUC_RUNTIME_COMPUTED }, &wal_segment_size, - DEFAULT_XLOG_SEG_SIZE, - WalSegMinSize, - WalSegMaxSize, + DEFAULT_XLOG_SEG_SIZE, WalSegMinSize, WalSegMaxSize, check_wal_segment_size, NULL, NULL }, @@ -3510,9 +3568,7 @@ struct config_int ConfigureNamesInt[] = GUC_UNIT_MIN, }, &wal_summary_keep_time, - 10 * HOURS_PER_DAY * MINS_PER_HOUR, /* 10 days */ - 0, - INT_MAX / SECS_PER_MINUTE, + 10 * HOURS_PER_DAY * MINS_PER_HOUR /* 10 days */ , 0, INT_MAX / SECS_PER_MINUTE, NULL, NULL, NULL }, @@ -3526,6 +3582,7 @@ struct config_int ConfigureNamesInt[] = 60, 1, INT_MAX / 1000, NULL, NULL, NULL }, + { {"autovacuum_vacuum_threshold", PGC_SIGHUP, VACUUM_AUTOVACUUM, gettext_noop("Minimum number of tuple updates or deletes prior to vacuum."), @@ -3535,6 +3592,7 @@ struct config_int ConfigureNamesInt[] = 50, 0, INT_MAX, NULL, NULL, NULL }, + { {"autovacuum_vacuum_max_threshold", PGC_SIGHUP, VACUUM_AUTOVACUUM, gettext_noop("Maximum number of tuple updates or deletes prior to vacuum."), @@ -3544,6 +3602,7 @@ struct config_int ConfigureNamesInt[] = 100000000, -1, INT_MAX, NULL, NULL, NULL }, + { {"autovacuum_vacuum_insert_threshold", PGC_SIGHUP, VACUUM_AUTOVACUUM, gettext_noop("Minimum number of tuple inserts prior to vacuum."), @@ -3553,6 +3612,7 @@ struct config_int ConfigureNamesInt[] = 1000, -1, INT_MAX, NULL, NULL, NULL }, + { {"autovacuum_analyze_threshold", PGC_SIGHUP, VACUUM_AUTOVACUUM, gettext_noop("Minimum number of tuple inserts, updates, or deletes prior to analyze."), @@ -3562,6 +3622,7 @@ struct config_int ConfigureNamesInt[] = 50, 0, INT_MAX, NULL, NULL, NULL }, + { /* see varsup.c for why this is PGC_POSTMASTER not PGC_SIGHUP */ {"autovacuum_freeze_max_age", PGC_POSTMASTER, VACUUM_AUTOVACUUM, @@ -3574,6 +3635,7 @@ struct config_int ConfigureNamesInt[] = 200000000, 100000, 2000000000, NULL, NULL, NULL }, + { /* see multixact.c for why this is PGC_POSTMASTER not PGC_SIGHUP */ {"autovacuum_multixact_freeze_max_age", PGC_POSTMASTER, VACUUM_AUTOVACUUM, @@ -3584,6 +3646,7 @@ struct config_int ConfigureNamesInt[] = 400000000, 10000, 2000000000, NULL, NULL, NULL }, + { /* see max_connections */ {"autovacuum_worker_slots", PGC_POSTMASTER, VACUUM_AUTOVACUUM, @@ -3594,6 +3657,7 @@ struct config_int ConfigureNamesInt[] = 16, 1, MAX_BACKENDS, NULL, NULL, NULL }, + { {"autovacuum_max_workers", PGC_SIGHUP, VACUUM_AUTOVACUUM, gettext_noop("Sets the maximum number of simultaneously running autovacuum worker processes."), @@ -3673,7 +3737,7 @@ struct config_int ConfigureNamesInt[] = {"ssl_renegotiation_limit", PGC_USERSET, COMPAT_OPTIONS_PREVIOUS, gettext_noop("SSL renegotiation is no longer supported; this can only be 0."), NULL, - GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE | GUC_DISALLOW_IN_FILE, + GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE | GUC_DISALLOW_IN_FILE }, &ssl_renegotiation_limit, 0, 0, 0, @@ -3695,7 +3759,7 @@ struct config_int ConfigureNamesInt[] = { {"gin_fuzzy_search_limit", PGC_USERSET, CLIENT_CONN_OTHER, gettext_noop("Sets the maximum allowed result for exact search by GIN."), - gettext_noop("0 means no limit."), + gettext_noop("0 means no limit.") }, &GinFuzzySearchLimit, 0, 0, INT_MAX, @@ -3707,7 +3771,7 @@ struct config_int ConfigureNamesInt[] = gettext_noop("Sets the planner's assumption about the total size of the data caches."), gettext_noop("That is, the total size of the caches (kernel cache and shared buffers) used for PostgreSQL data files. " "This is measured in disk pages, which are normally 8 kB each."), - GUC_UNIT_BLOCKS | GUC_EXPLAIN, + GUC_UNIT_BLOCKS | GUC_EXPLAIN }, &effective_cache_size, DEFAULT_EFFECTIVE_CACHE_SIZE, 1, INT_MAX, @@ -3718,7 +3782,7 @@ struct config_int ConfigureNamesInt[] = {"min_parallel_table_scan_size", PGC_USERSET, QUERY_TUNING_COST, gettext_noop("Sets the minimum amount of table data for a parallel scan."), gettext_noop("If the planner estimates that it will read a number of table pages too small to reach this limit, a parallel scan will not be considered."), - GUC_UNIT_BLOCKS | GUC_EXPLAIN, + GUC_UNIT_BLOCKS | GUC_EXPLAIN }, &min_parallel_table_scan_size, (8 * 1024 * 1024) / BLCKSZ, 0, INT_MAX / 3, @@ -3729,7 +3793,7 @@ struct config_int ConfigureNamesInt[] = {"min_parallel_index_scan_size", PGC_USERSET, QUERY_TUNING_COST, gettext_noop("Sets the minimum amount of index data for a parallel scan."), gettext_noop("If the planner estimates that it will read a number of index pages too small to reach this limit, a parallel scan will not be considered."), - GUC_UNIT_BLOCKS | GUC_EXPLAIN, + GUC_UNIT_BLOCKS | GUC_EXPLAIN }, &min_parallel_index_scan_size, (512 * 1024) / BLCKSZ, 0, INT_MAX / 3, @@ -3842,7 +3906,7 @@ struct config_int ConfigureNamesInt[] = gettext_noop("Time between progress updates for " "long-running startup operations."), gettext_noop("0 disables progress updates."), - GUC_UNIT_MS, + GUC_UNIT_MS }, &log_startup_progress_interval, 10000, 0, INT_MAX, @@ -3880,6 +3944,7 @@ struct config_real ConfigureNamesReal[] = DEFAULT_SEQ_PAGE_COST, 0, DBL_MAX, NULL, NULL, NULL }, + { {"random_page_cost", PGC_USERSET, QUERY_TUNING_COST, gettext_noop("Sets the planner's estimate of the cost of a " @@ -3891,6 +3956,7 @@ struct config_real ConfigureNamesReal[] = DEFAULT_RANDOM_PAGE_COST, 0, DBL_MAX, NULL, NULL, NULL }, + { {"cpu_tuple_cost", PGC_USERSET, QUERY_TUNING_COST, gettext_noop("Sets the planner's estimate of the cost of " @@ -3902,6 +3968,7 @@ struct config_real ConfigureNamesReal[] = DEFAULT_CPU_TUPLE_COST, 0, DBL_MAX, NULL, NULL, NULL }, + { {"cpu_index_tuple_cost", PGC_USERSET, QUERY_TUNING_COST, gettext_noop("Sets the planner's estimate of the cost of " @@ -3913,6 +3980,7 @@ struct config_real ConfigureNamesReal[] = DEFAULT_CPU_INDEX_TUPLE_COST, 0, DBL_MAX, NULL, NULL, NULL }, + { {"cpu_operator_cost", PGC_USERSET, QUERY_TUNING_COST, gettext_noop("Sets the planner's estimate of the cost of " @@ -3924,6 +3992,7 @@ struct config_real ConfigureNamesReal[] = DEFAULT_CPU_OPERATOR_COST, 0, DBL_MAX, NULL, NULL, NULL }, + { {"parallel_tuple_cost", PGC_USERSET, QUERY_TUNING_COST, gettext_noop("Sets the planner's estimate of the cost of " @@ -3935,6 +4004,7 @@ struct config_real ConfigureNamesReal[] = DEFAULT_PARALLEL_TUPLE_COST, 0, DBL_MAX, NULL, NULL, NULL }, + { {"parallel_setup_cost", PGC_USERSET, QUERY_TUNING_COST, gettext_noop("Sets the planner's estimate of the cost of " @@ -4011,8 +4081,7 @@ struct config_real ConfigureNamesReal[] = GUC_EXPLAIN }, &Geqo_selection_bias, - DEFAULT_GEQO_SELECTION_BIAS, - MIN_GEQO_SELECTION_BIAS, MAX_GEQO_SELECTION_BIAS, + DEFAULT_GEQO_SELECTION_BIAS, MIN_GEQO_SELECTION_BIAS, MAX_GEQO_SELECTION_BIAS, NULL, NULL, NULL }, { @@ -4229,6 +4298,7 @@ struct config_string ConfigureNamesString[] = "", check_recovery_target, assign_recovery_target, NULL }, + { {"recovery_target_xid", PGC_POSTMASTER, WAL_RECOVERY_TARGET, gettext_noop("Sets the transaction ID up to which recovery will proceed."), @@ -4238,6 +4308,7 @@ struct config_string ConfigureNamesString[] = "", check_recovery_target_xid, assign_recovery_target_xid, NULL }, + { {"recovery_target_time", PGC_POSTMASTER, WAL_RECOVERY_TARGET, gettext_noop("Sets the time stamp up to which recovery will proceed."), @@ -4247,6 +4318,7 @@ struct config_string ConfigureNamesString[] = "", check_recovery_target_time, assign_recovery_target_time, NULL }, + { {"recovery_target_name", PGC_POSTMASTER, WAL_RECOVERY_TARGET, gettext_noop("Sets the named restore point up to which recovery will proceed."), @@ -4256,6 +4328,7 @@ struct config_string ConfigureNamesString[] = "", check_recovery_target_name, assign_recovery_target_name, NULL }, + { {"recovery_target_lsn", PGC_POSTMASTER, WAL_RECOVERY_TARGET, gettext_noop("Sets the LSN of the write-ahead log location up to which recovery will proceed."), @@ -4566,6 +4639,7 @@ struct config_string ConfigureNamesString[] = "stderr", check_log_destination, assign_log_destination, NULL }, + { {"log_directory", PGC_SIGHUP, LOGGING_WHERE, gettext_noop("Sets the destination directory for log files."), @@ -4577,6 +4651,7 @@ struct config_string ConfigureNamesString[] = "log", check_canonical_path, NULL, NULL }, + { {"log_filename", PGC_SIGHUP, LOGGING_WHERE, gettext_noop("Sets the file name pattern for log files."), @@ -4620,6 +4695,7 @@ struct config_string ConfigureNamesString[] = "GMT", check_timezone, assign_timezone, show_timezone }, + { {"timezone_abbreviations", PGC_USERSET, CLIENT_CONN_LOCALE, gettext_noop("Selects a file of time zone abbreviations."), @@ -4984,7 +5060,6 @@ struct config_string ConfigureNamesString[] = check_log_connections, assign_log_connections, NULL }, - /* End-of-list marker */ { {NULL, 0, 0, NULL, NULL}, NULL, NULL, NULL, NULL, NULL @@ -5053,8 +5128,7 @@ struct config_enum ConfigureNamesEnum[] = NULL }, &default_toast_compression, - TOAST_PGLZ_COMPRESSION, - default_toast_compression_options, + TOAST_PGLZ_COMPRESSION, default_toast_compression_options, NULL, NULL, NULL }, @@ -5148,8 +5222,7 @@ struct config_enum ConfigureNamesEnum[] = NULL }, &syslog_facility, - DEFAULT_SYSLOG_FACILITY, - syslog_facility_options, + DEFAULT_SYSLOG_FACILITY, syslog_facility_options, NULL, assign_syslog_facility, NULL }, @@ -5203,7 +5276,6 @@ struct config_enum ConfigureNamesEnum[] = NULL, NULL, NULL }, - { {"stats_fetch_consistency", PGC_USERSET, STATS_CUMULATIVE, gettext_noop("Sets the consistency of accesses to statistics data."), @@ -5369,8 +5441,8 @@ struct config_enum ConfigureNamesEnum[] = GUC_SUPERUSER_ONLY }, &ssl_min_protocol_version, - PG_TLS1_2_VERSION, - ssl_protocol_versions_info + 1, /* don't allow PG_TLS_ANY */ + PG_TLS1_2_VERSION, ssl_protocol_versions_info + 1, /* don't allow + * PG_TLS_ANY */ NULL, NULL, NULL }, @@ -5381,14 +5453,14 @@ struct config_enum ConfigureNamesEnum[] = GUC_SUPERUSER_ONLY }, &ssl_max_protocol_version, - PG_TLS_ANY, - ssl_protocol_versions_info, + PG_TLS_ANY, ssl_protocol_versions_info, NULL, NULL, NULL }, { {"recovery_init_sync_method", PGC_SIGHUP, ERROR_HANDLING_OPTIONS, gettext_noop("Sets the method for synchronizing the data directory before crash recovery."), + NULL }, &recovery_init_sync_method, DATA_DIR_SYNC_METHOD_FSYNC, recovery_init_sync_method_options, From 3ef2b863a376af35e8d5f2cd1bb4311ec7a9f299 Mon Sep 17 00:00:00 2001 From: Nathan Bossart Date: Mon, 25 Aug 2025 11:08:26 -0500 Subject: [PATCH 563/602] Use PqMsg_* macros in fe-protocol3.c. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Oversight in commit f4b54e1ed9. Reviewed-by: Jacob Champion Reviewed-by: Fabrízio de Royes Mello Discussion: https://postgr.es/m/aKx5vEbbP03JNgtp%40nathan --- src/include/libpq/protocol.h | 1 + src/interfaces/libpq/fe-protocol3.c | 12 ++++++------ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/include/libpq/protocol.h b/src/include/libpq/protocol.h index c64e628628d4b..7bf90053bcb6d 100644 --- a/src/include/libpq/protocol.h +++ b/src/include/libpq/protocol.h @@ -66,6 +66,7 @@ /* These are the codes sent by parallel workers to leader processes. */ + #define PqMsg_Progress 'P' diff --git a/src/interfaces/libpq/fe-protocol3.c b/src/interfaces/libpq/fe-protocol3.c index 43ad672abce95..da7a8db68c80c 100644 --- a/src/interfaces/libpq/fe-protocol3.c +++ b/src/interfaces/libpq/fe-protocol3.c @@ -2262,7 +2262,7 @@ pqFunctionCall3(PGconn *conn, Oid fnid, */ switch (id) { - case 'V': /* function result */ + case PqMsg_FunctionCallResponse: if (pqGetInt(actual_result_len, 4, conn)) continue; if (*actual_result_len != -1) @@ -2283,22 +2283,22 @@ pqFunctionCall3(PGconn *conn, Oid fnid, /* correctly finished function result message */ status = PGRES_COMMAND_OK; break; - case 'E': /* error return */ + case PqMsg_ErrorResponse: if (pqGetErrorNotice3(conn, true)) continue; status = PGRES_FATAL_ERROR; break; - case 'A': /* notify message */ + case PqMsg_NotificationResponse: /* handle notify and go back to processing return values */ if (getNotify(conn)) continue; break; - case 'N': /* notice */ + case PqMsg_NoticeResponse: /* handle notice and go back to processing return values */ if (pqGetErrorNotice3(conn, false)) continue; break; - case 'Z': /* backend is ready for new query */ + case PqMsg_ReadyForQuery: if (getReadyForQuery(conn)) continue; @@ -2330,7 +2330,7 @@ pqFunctionCall3(PGconn *conn, Oid fnid, } /* and we're out */ return pqPrepareAsyncResult(conn); - case 'S': /* parameter status */ + case PqMsg_ParameterStatus: if (getParameterStatus(conn)) continue; break; From 52ecd05aeef851b7d3688b0f0a633132c691eb32 Mon Sep 17 00:00:00 2001 From: Jacob Champion Date: Mon, 25 Aug 2025 09:27:39 -0700 Subject: [PATCH 564/602] oauth: Always link with -lm for floor() libpq-oauth uses floor() but did not link against libm. Since libpq itself uses -lm, nothing in the buildfarm has had problems with libpq-oauth yet, and it seems difficult to hit a failure in practice. But commit 1443b6c0e attempted to add an executable based on libpq-oauth, which ran into link-time failures with Clang due to this omission. It seems prudent to fix this for both the module and the executable simultaneously so that no one trips over it in the future. This is a Makefile-only change. The Meson side already pulls in libm, through the os_deps dependency. Discussion: https://postgr.es/m/CAOYmi%2Bn6ORcmV10k%2BdAs%2Bp0b9QJ4bfpk0WuHQaF5ODXxM8Y36A%40mail.gmail.com Backpatch-through: 18 --- src/interfaces/libpq-oauth/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/interfaces/libpq-oauth/Makefile b/src/interfaces/libpq-oauth/Makefile index 682f17413b3a4..8819fa8650e6d 100644 --- a/src/interfaces/libpq-oauth/Makefile +++ b/src/interfaces/libpq-oauth/Makefile @@ -47,7 +47,7 @@ $(stlib): override OBJS += $(OBJS_STATIC) $(stlib): $(OBJS_STATIC) SHLIB_LINK_INTERNAL = $(libpq_pgport_shlib) -SHLIB_LINK = $(LIBCURL_LDFLAGS) $(LIBCURL_LDLIBS) $(filter -lintl, $(LIBS)) +SHLIB_LINK = $(LIBCURL_LDFLAGS) $(LIBCURL_LDLIBS) $(filter -lintl -lm, $(LIBS)) SHLIB_PREREQS = submake-libpq SHLIB_EXPORTS = exports.txt From 4e1e417330d42cb19c7d439cd50eea20f25c7518 Mon Sep 17 00:00:00 2001 From: Jacob Champion Date: Mon, 25 Aug 2025 09:27:45 -0700 Subject: [PATCH 565/602] oauth: Add unit tests for multiplexer handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To better record the internal behaviors of oauth-curl.c, add a unit test suite for the socket and timer handling code. This is all based on TAP and driven by our existing Test::More infrastructure. This commit is a replay of 1443b6c0e, which was reverted due to buildfarm failures. Compared with that, this version protects the build targets in the Makefile with a with_libcurl conditional, and it tweaks the code style in 001_oauth.pl. Reviewed-by: Dagfinn Ilmari Mannsåker Reviewed-by: Andrew Dunstan Discussion: https://postgr.es/m/CAOYmi+nDZxJHaWj9_jRSyf8uMToCADAmOfJEggsKW-kY7aUwHA@mail.gmail.com Discussion: https://postgr.es/m/CAOYmi+m=xY0P_uAzAP_884uF-GhQ3wrineGwc9AEnb6fYxVqVQ@mail.gmail.com --- src/interfaces/libpq-oauth/Makefile | 36 +- src/interfaces/libpq-oauth/meson.build | 35 ++ src/interfaces/libpq-oauth/t/001_oauth.pl | 24 + src/interfaces/libpq-oauth/test-oauth-curl.c | 527 +++++++++++++++++++ 4 files changed, 618 insertions(+), 4 deletions(-) create mode 100644 src/interfaces/libpq-oauth/t/001_oauth.pl create mode 100644 src/interfaces/libpq-oauth/test-oauth-curl.c diff --git a/src/interfaces/libpq-oauth/Makefile b/src/interfaces/libpq-oauth/Makefile index 8819fa8650e6d..c8c38947ace1b 100644 --- a/src/interfaces/libpq-oauth/Makefile +++ b/src/interfaces/libpq-oauth/Makefile @@ -54,10 +54,6 @@ SHLIB_EXPORTS = exports.txt # Disable -bundle_loader on macOS. BE_DLLLIBS = -# By default, a library without an SONAME doesn't get a static library, so we -# add it to the build explicitly. -all: all-lib all-static-lib - # Shared library stuff include $(top_srcdir)/src/Makefile.shlib @@ -66,6 +62,28 @@ include $(top_srcdir)/src/Makefile.shlib %_shlib.o: %.c %.o $(CC) $(CFLAGS) $(CFLAGS_SL) $(CPPFLAGS) $(CPPFLAGS_SHLIB) -c $< -o $@ +.PHONY: all-tests +all-tests: oauth_tests$(X) + +oauth_tests$(X): test-oauth-curl.o oauth-utils.o $(WIN32RES) | submake-libpgport submake-libpq + $(CC) $(CFLAGS) $^ $(LDFLAGS) $(LDFLAGS_EX) $(SHLIB_LINK) -o $@ + +# +# Top-Level Targets +# +# The existence of a t/ folder induces the buildfarm to run Make directly on +# this subdirectory, bypassing the recursion skip in src/interfaces/Makefile. +# Wrap the standard build targets in a with_libcurl conditional to avoid +# building OAuth code on platforms that haven't requested it. (The "clean"-style +# targets remain available.) +# + +ifeq ($(with_libcurl), yes) + +# By default, a library without an SONAME doesn't get a static library, so we +# add it to the build explicitly. +all: all-lib all-static-lib + # Ignore the standard rules for SONAME-less installation; we want both the # static and shared libraries to go into libdir. install: all installdirs $(stlib) $(shlib) @@ -75,9 +93,19 @@ install: all installdirs $(stlib) $(shlib) installdirs: $(MKDIR_P) '$(DESTDIR)$(libdir)' +check: all-tests + $(prove_check) + +installcheck: all-tests + $(prove_installcheck) + +endif # with_libcurl + uninstall: rm -f '$(DESTDIR)$(libdir)/$(stlib)' rm -f '$(DESTDIR)$(libdir)/$(shlib)' clean distclean: clean-lib rm -f $(OBJS) $(OBJS_STATIC) $(OBJS_SHLIB) + rm -f test-oauth-curl.o oauth_tests$(X) + rm -rf tmp_check diff --git a/src/interfaces/libpq-oauth/meson.build b/src/interfaces/libpq-oauth/meson.build index df064c59a4070..505e1671b8637 100644 --- a/src/interfaces/libpq-oauth/meson.build +++ b/src/interfaces/libpq-oauth/meson.build @@ -47,3 +47,38 @@ libpq_oauth_so = shared_module(libpq_oauth_name, link_args: export_fmt.format(export_file.full_path()), kwargs: default_lib_args, ) + +libpq_oauth_test_deps = [] + +oauth_test_sources = files('test-oauth-curl.c') + libpq_oauth_so_sources + +if host_system == 'windows' + oauth_test_sources += rc_bin_gen.process(win32ver_rc, extra_args: [ + '--NAME', 'oauth_tests', + '--FILEDESC', 'OAuth unit test program',]) +endif + +libpq_oauth_test_deps += executable('oauth_tests', + oauth_test_sources, + dependencies: [frontend_shlib_code, libpq, libpq_oauth_deps], + kwargs: default_bin_args + { + 'c_args': default_bin_args.get('c_args', []) + libpq_oauth_so_c_args, + 'c_pch': pch_postgres_fe_h, + 'include_directories': [libpq_inc, postgres_inc], + 'install': false, + } +) + +testprep_targets += libpq_oauth_test_deps + +tests += { + 'name': 'libpq-oauth', + 'sd': meson.current_source_dir(), + 'bd': meson.current_build_dir(), + 'tap': { + 'tests': [ + 't/001_oauth.pl', + ], + 'deps': libpq_oauth_test_deps, + }, +} diff --git a/src/interfaces/libpq-oauth/t/001_oauth.pl b/src/interfaces/libpq-oauth/t/001_oauth.pl new file mode 100644 index 0000000000000..6c972056bbd49 --- /dev/null +++ b/src/interfaces/libpq-oauth/t/001_oauth.pl @@ -0,0 +1,24 @@ +# Copyright (c) 2025, PostgreSQL Global Development Group +use strict; +use warnings FATAL => 'all'; + +use PostgreSQL::Test::Utils; +use Test::More; + +# Defer entirely to the oauth_tests executable. stdout/err is routed through +# Test::More so that our logging infrastructure can handle it correctly. Using +# IPC::Run::new_chunker seems to help interleave the two streams a little better +# than without. +# +# TODO: prove can also deal with native executables itself, which we could +# probably make use of via PROVE_TESTS on the Makefile side. But the Meson setup +# calls Perl directly, which would require more code to work around... and +# there's still the matter of logging. +my $builder = Test::More->builder; +my $out = $builder->output; +my $err = $builder->failure_output; + +IPC::Run::run ['oauth_tests'], + '>' => (IPC::Run::new_chunker, sub { $out->print($_[0]) }), + '2>' => (IPC::Run::new_chunker, sub { $err->print($_[0]) }) + or die "oauth_tests returned $?"; diff --git a/src/interfaces/libpq-oauth/test-oauth-curl.c b/src/interfaces/libpq-oauth/test-oauth-curl.c new file mode 100644 index 0000000000000..8263aff2f4ad4 --- /dev/null +++ b/src/interfaces/libpq-oauth/test-oauth-curl.c @@ -0,0 +1,527 @@ +/* + * test-oauth-curl.c + * + * A unit test driver for libpq-oauth. This #includes oauth-curl.c, which lets + * the tests reference static functions and other internals. + * + * USE_ASSERT_CHECKING is required, to make it easy for tests to wrap + * must-succeed code as part of test setup. + * + * Copyright (c) 2025, PostgreSQL Global Development Group + */ + +#include "oauth-curl.c" + +#include + +#ifdef USE_ASSERT_CHECKING + +/* + * TAP Helpers + */ + +static int num_tests = 0; + +/* + * Reports ok/not ok to the TAP stream on stdout. + */ +#define ok(OK, TEST) \ + ok_impl(OK, TEST, #OK, __FILE__, __LINE__) + +static bool +ok_impl(bool ok, const char *test, const char *teststr, const char *file, int line) +{ + printf("%sok %d - %s\n", ok ? "" : "not ", ++num_tests, test); + + if (!ok) + { + printf("# at %s:%d:\n", file, line); + printf("# expression is false: %s\n", teststr); + } + + return ok; +} + +/* + * Like ok(this == that), but with more diagnostics on failure. + * + * Only works on ints, but luckily that's all we need here. Note that the much + * simpler-looking macro implementation + * + * is_diag(ok(THIS == THAT, TEST), THIS, #THIS, THAT, #THAT) + * + * suffers from multiple evaluation of the macro arguments... + */ +#define is(THIS, THAT, TEST) \ + do { \ + int this_ = (THIS), \ + that_ = (THAT); \ + is_diag( \ + ok_impl(this_ == that_, TEST, #THIS " == " #THAT, __FILE__, __LINE__), \ + this_, #THIS, that_, #THAT \ + ); \ + } while (0) + +static bool +is_diag(bool ok, int this, const char *thisstr, int that, const char *thatstr) +{ + if (!ok) + printf("# %s = %d; %s = %d\n", thisstr, this, thatstr, that); + + return ok; +} + +/* + * Utilities + */ + +/* + * Creates a partially-initialized async_ctx for the purposes of testing. Free + * with free_test_actx(). + */ +static struct async_ctx * +init_test_actx(void) +{ + struct async_ctx *actx; + + actx = calloc(1, sizeof(*actx)); + Assert(actx); + + actx->mux = PGINVALID_SOCKET; + actx->timerfd = -1; + actx->debugging = true; + + initPQExpBuffer(&actx->errbuf); + + Assert(setup_multiplexer(actx)); + + return actx; +} + +static void +free_test_actx(struct async_ctx *actx) +{ + termPQExpBuffer(&actx->errbuf); + + if (actx->mux != PGINVALID_SOCKET) + close(actx->mux); + if (actx->timerfd >= 0) + close(actx->timerfd); + + free(actx); +} + +static char dummy_buf[4 * 1024]; /* for fill_pipe/drain_pipe */ + +/* + * Writes to the write side of a pipe until it won't take any more data. Returns + * the amount written. + */ +static ssize_t +fill_pipe(int fd) +{ + int mode; + ssize_t written = 0; + + /* Don't block. */ + Assert((mode = fcntl(fd, F_GETFL)) != -1); + Assert(fcntl(fd, F_SETFL, mode | O_NONBLOCK) == 0); + + while (true) + { + ssize_t w; + + w = write(fd, dummy_buf, sizeof(dummy_buf)); + if (w < 0) + { + if (errno != EAGAIN && errno != EWOULDBLOCK) + { + perror("write to pipe"); + written = -1; + } + break; + } + + written += w; + } + + /* Reset the descriptor flags. */ + Assert(fcntl(fd, F_SETFD, mode) == 0); + + return written; +} + +/* + * Drains the requested amount of data from the read side of a pipe. + */ +static bool +drain_pipe(int fd, ssize_t n) +{ + Assert(n > 0); + + while (n) + { + size_t to_read = (n <= sizeof(dummy_buf)) ? n : sizeof(dummy_buf); + ssize_t drained; + + drained = read(fd, dummy_buf, to_read); + if (drained < 0) + { + perror("read from pipe"); + return false; + } + + n -= drained; + } + + return true; +} + +/* + * Tests whether the multiplexer is marked ready by the deadline. This is a + * macro so that file/line information makes sense during failures. + * + * NB: our current multiplexer implementations (epoll/kqueue) are *readable* + * when the underlying libcurl sockets are *writable*. This behavior is pinned + * here to record that expectation; PGRES_POLLING_READING is hardcoded + * throughout the flow and would need to be changed if a new multiplexer does + * something different. + */ +#define mux_is_ready(MUX, DEADLINE, TEST) \ + do { \ + int res_ = PQsocketPoll(MUX, 1, 0, DEADLINE); \ + Assert(res_ != -1); \ + ok(res_ > 0, "multiplexer is ready " TEST); \ + } while (0) + +/* + * The opposite of mux_is_ready(). + */ +#define mux_is_not_ready(MUX, TEST) \ + do { \ + int res_ = PQsocketPoll(MUX, 1, 0, 0); \ + Assert(res_ != -1); \ + is(res_, 0, "multiplexer is not ready " TEST); \ + } while (0) + +/* + * Test Suites + */ + +/* Per-suite timeout. Set via the PG_TEST_TIMEOUT_DEFAULT envvar. */ +static pg_usec_time_t timeout_us = 180 * 1000 * 1000; + +static void +test_set_timer(void) +{ + struct async_ctx *actx = init_test_actx(); + const pg_usec_time_t deadline = PQgetCurrentTimeUSec() + timeout_us; + + printf("# test_set_timer\n"); + + /* A zero-duration timer should result in a near-immediate ready signal. */ + Assert(set_timer(actx, 0)); + mux_is_ready(actx->mux, deadline, "when timer expires"); + is(timer_expired(actx), 1, "timer_expired() returns 1 when timer expires"); + + /* Resetting the timer far in the future should unset the ready signal. */ + Assert(set_timer(actx, INT_MAX)); + mux_is_not_ready(actx->mux, "when timer is reset to the future"); + is(timer_expired(actx), 0, "timer_expired() returns 0 with unexpired timer"); + + /* Setting another zero-duration timer should override the previous one. */ + Assert(set_timer(actx, 0)); + mux_is_ready(actx->mux, deadline, "when timer is re-expired"); + is(timer_expired(actx), 1, "timer_expired() returns 1 when timer is re-expired"); + + /* And disabling that timer should once again unset the ready signal. */ + Assert(set_timer(actx, -1)); + mux_is_not_ready(actx->mux, "when timer is unset"); + is(timer_expired(actx), 0, "timer_expired() returns 0 when timer is unset"); + + { + bool expired; + + /* Make sure drain_timer_events() functions correctly as well. */ + Assert(set_timer(actx, 0)); + mux_is_ready(actx->mux, deadline, "when timer is re-expired (drain_timer_events)"); + + Assert(drain_timer_events(actx, &expired)); + mux_is_not_ready(actx->mux, "when timer is drained after expiring"); + is(expired, 1, "drain_timer_events() reports expiration"); + is(timer_expired(actx), 0, "timer_expired() returns 0 after timer is drained"); + + /* A second drain should do nothing. */ + Assert(drain_timer_events(actx, &expired)); + mux_is_not_ready(actx->mux, "when timer is drained a second time"); + is(expired, 0, "drain_timer_events() reports no expiration"); + is(timer_expired(actx), 0, "timer_expired() still returns 0"); + } + + free_test_actx(actx); +} + +static void +test_register_socket(void) +{ + struct async_ctx *actx = init_test_actx(); + int pipefd[2]; + int rfd, + wfd; + bool bidirectional; + + /* Create a local pipe for communication. */ + Assert(pipe(pipefd) == 0); + rfd = pipefd[0]; + wfd = pipefd[1]; + + /* + * Some platforms (FreeBSD) implement bidirectional pipes, affecting the + * behavior of some of these tests. Store that knowledge for later. + */ + bidirectional = PQsocketPoll(rfd /* read */ , 0, 1 /* write */ , 0) > 0; + + /* + * This suite runs twice -- once using CURL_POLL_IN/CURL_POLL_OUT for + * read/write operations, respectively, and once using CURL_POLL_INOUT for + * both sides. + */ + for (int inout = 0; inout < 2; inout++) + { + const int in_event = inout ? CURL_POLL_INOUT : CURL_POLL_IN; + const int out_event = inout ? CURL_POLL_INOUT : CURL_POLL_OUT; + const pg_usec_time_t deadline = PQgetCurrentTimeUSec() + timeout_us; + size_t bidi_pipe_size = 0; /* silence compiler warnings */ + + printf("# test_register_socket %s\n", inout ? "(INOUT)" : ""); + + /* + * At the start of the test, the read side should be blocked and the + * write side should be open. (There's a mistake at the end of this + * loop otherwise.) + */ + Assert(PQsocketPoll(rfd, 1, 0, 0) == 0); + Assert(PQsocketPoll(wfd, 0, 1, 0) > 0); + + /* + * For bidirectional systems, emulate unidirectional behavior here by + * filling up the "read side" of the pipe. + */ + if (bidirectional) + Assert((bidi_pipe_size = fill_pipe(rfd)) > 0); + + /* Listen on the read side. The multiplexer shouldn't be ready yet. */ + Assert(register_socket(NULL, rfd, in_event, actx, NULL) == 0); + mux_is_not_ready(actx->mux, "when fd is not readable"); + + /* Writing to the pipe should result in a read-ready multiplexer. */ + Assert(write(wfd, "x", 1) == 1); + mux_is_ready(actx->mux, deadline, "when fd is readable"); + + /* + * Update the registration to wait on write events instead. The + * multiplexer should be unset. + */ + Assert(register_socket(NULL, rfd, CURL_POLL_OUT, actx, NULL) == 0); + mux_is_not_ready(actx->mux, "when waiting for writes on readable fd"); + + /* Re-register for read events. */ + Assert(register_socket(NULL, rfd, in_event, actx, NULL) == 0); + mux_is_ready(actx->mux, deadline, "when waiting for reads again"); + + /* Stop listening. The multiplexer should be unset. */ + Assert(register_socket(NULL, rfd, CURL_POLL_REMOVE, actx, NULL) == 0); + mux_is_not_ready(actx->mux, "when readable fd is removed"); + + /* Listen again. */ + Assert(register_socket(NULL, rfd, in_event, actx, NULL) == 0); + mux_is_ready(actx->mux, deadline, "when readable fd is re-added"); + + /* + * Draining the pipe should unset the multiplexer again, once the old + * event is cleared. + */ + Assert(drain_pipe(rfd, 1)); + Assert(comb_multiplexer(actx)); + mux_is_not_ready(actx->mux, "when fd is drained"); + + /* Undo any unidirectional emulation. */ + if (bidirectional) + Assert(drain_pipe(wfd, bidi_pipe_size)); + + /* Listen on the write side. An empty buffer should be writable. */ + Assert(register_socket(NULL, rfd, CURL_POLL_REMOVE, actx, NULL) == 0); + Assert(register_socket(NULL, wfd, out_event, actx, NULL) == 0); + mux_is_ready(actx->mux, deadline, "when fd is writable"); + + /* As above, wait on read events instead. */ + Assert(register_socket(NULL, wfd, CURL_POLL_IN, actx, NULL) == 0); + mux_is_not_ready(actx->mux, "when waiting for reads on writable fd"); + + /* Re-register for write events. */ + Assert(register_socket(NULL, wfd, out_event, actx, NULL) == 0); + mux_is_ready(actx->mux, deadline, "when waiting for writes again"); + + { + ssize_t written; + + /* + * Fill the pipe. Once the old writable event is cleared, the mux + * should not be ready. + */ + Assert((written = fill_pipe(wfd)) > 0); + printf("# pipe buffer is full at %zd bytes\n", written); + + Assert(comb_multiplexer(actx)); + mux_is_not_ready(actx->mux, "when fd buffer is full"); + + /* Drain the pipe again. */ + Assert(drain_pipe(rfd, written)); + mux_is_ready(actx->mux, deadline, "when fd buffer is drained"); + } + + /* Stop listening. */ + Assert(register_socket(NULL, wfd, CURL_POLL_REMOVE, actx, NULL) == 0); + mux_is_not_ready(actx->mux, "when fd is removed"); + + /* Make sure an expired timer doesn't interfere with event draining. */ + { + bool expired; + + /* Make the rfd appear unidirectional if necessary. */ + if (bidirectional) + Assert((bidi_pipe_size = fill_pipe(rfd)) > 0); + + /* Set the timer and wait for it to expire. */ + Assert(set_timer(actx, 0)); + Assert(PQsocketPoll(actx->timerfd, 1, 0, deadline) > 0); + is(timer_expired(actx), 1, "timer is expired"); + + /* Register for read events and make the fd readable. */ + Assert(register_socket(NULL, rfd, in_event, actx, NULL) == 0); + Assert(write(wfd, "x", 1) == 1); + mux_is_ready(actx->mux, deadline, "when fd is readable and timer expired"); + + /* + * Draining the pipe should unset the multiplexer again, once the + * old event is drained and the timer is reset. + * + * Order matters, since comb_multiplexer() doesn't have to remove + * stale events when active events exist. Follow the call sequence + * used in the code: drain the timer expiration, drain the pipe, + * then clear the stale events. + */ + Assert(drain_timer_events(actx, &expired)); + Assert(drain_pipe(rfd, 1)); + Assert(comb_multiplexer(actx)); + + is(expired, 1, "drain_timer_events() reports expiration"); + is(timer_expired(actx), 0, "timer is no longer expired"); + mux_is_not_ready(actx->mux, "when fd is drained and timer reset"); + + /* Stop listening. */ + Assert(register_socket(NULL, rfd, CURL_POLL_REMOVE, actx, NULL) == 0); + + /* Undo any unidirectional emulation. */ + if (bidirectional) + Assert(drain_pipe(wfd, bidi_pipe_size)); + } + + /* Ensure comb_multiplexer() can handle multiple stale events. */ + { + int rfd2, + wfd2; + + /* Create a second local pipe. */ + Assert(pipe(pipefd) == 0); + rfd2 = pipefd[0]; + wfd2 = pipefd[1]; + + /* Make both rfds appear unidirectional if necessary. */ + if (bidirectional) + { + Assert((bidi_pipe_size = fill_pipe(rfd)) > 0); + Assert(fill_pipe(rfd2) == bidi_pipe_size); + } + + /* Register for read events on both fds, and make them readable. */ + Assert(register_socket(NULL, rfd, in_event, actx, NULL) == 0); + Assert(register_socket(NULL, rfd2, in_event, actx, NULL) == 0); + + Assert(write(wfd, "x", 1) == 1); + Assert(write(wfd2, "x", 1) == 1); + + mux_is_ready(actx->mux, deadline, "when two fds are readable"); + + /* + * Drain both fds. comb_multiplexer() should then ensure that the + * mux is no longer readable. + */ + Assert(drain_pipe(rfd, 1)); + Assert(drain_pipe(rfd2, 1)); + Assert(comb_multiplexer(actx)); + mux_is_not_ready(actx->mux, "when two fds are drained"); + + /* Stop listening. */ + Assert(register_socket(NULL, rfd, CURL_POLL_REMOVE, actx, NULL) == 0); + Assert(register_socket(NULL, rfd2, CURL_POLL_REMOVE, actx, NULL) == 0); + + /* Undo any unidirectional emulation. */ + if (bidirectional) + { + Assert(drain_pipe(wfd, bidi_pipe_size)); + Assert(drain_pipe(wfd2, bidi_pipe_size)); + } + + close(rfd2); + close(wfd2); + } + } + + close(rfd); + close(wfd); + free_test_actx(actx); +} + +int +main(int argc, char *argv[]) +{ + const char *timeout; + + /* Grab the default timeout. */ + timeout = getenv("PG_TEST_TIMEOUT_DEFAULT"); + if (timeout) + { + int timeout_s = atoi(timeout); + + if (timeout_s > 0) + timeout_us = timeout_s * 1000 * 1000; + } + + /* + * Set up line buffering for our output, to let stderr interleave in the + * log files. + */ + setvbuf(stdout, NULL, PG_IOLBF, 0); + + test_set_timer(); + test_register_socket(); + + printf("1..%d\n", num_tests); + return 0; +} + +#else /* !USE_ASSERT_CHECKING */ + +/* + * Skip the test suite when we don't have assertions. + */ +int +main(int argc, char *argv[]) +{ + printf("1..0 # skip: cassert is not enabled\n"); + + return 0; +} + +#endif /* USE_ASSERT_CHECKING */ From 989b2e4d5c95f6b183e76f3eb06d2d360651ccf2 Mon Sep 17 00:00:00 2001 From: Nathan Bossart Date: Mon, 25 Aug 2025 14:11:01 -0500 Subject: [PATCH 566/602] Use PqMsg_* macros in applyparallelworker.c. Oversight in commit f4b54e1ed9. Author: Ranier Vilela Discussion: https://postgr.es/m/CAEudQAobFsHaLMypA6C96-9YExvF4AcU1xNPoPuNYRVm3mq4dg%40mail.gmail.com --- src/backend/replication/logical/applyparallelworker.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/backend/replication/logical/applyparallelworker.c b/src/backend/replication/logical/applyparallelworker.c index cd0e19176fdb2..31a92d1a24abd 100644 --- a/src/backend/replication/logical/applyparallelworker.c +++ b/src/backend/replication/logical/applyparallelworker.c @@ -1007,7 +1007,7 @@ ProcessParallelApplyMessage(StringInfo msg) switch (msgtype) { - case 'E': /* ErrorResponse */ + case PqMsg_ErrorResponse: { ErrorData edata; @@ -1044,11 +1044,11 @@ ProcessParallelApplyMessage(StringInfo msg) /* * Don't need to do anything about NoticeResponse and - * NotifyResponse as the logical replication worker doesn't need - * to send messages to the client. + * NotificationResponse as the logical replication worker doesn't + * need to send messages to the client. */ - case 'N': - case 'A': + case PqMsg_NoticeResponse: + case PqMsg_NotificationResponse: break; default: From 99234e9ddc02f45eb122f83d49031e2c517d0af8 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Mon, 25 Aug 2025 22:59:00 +0200 Subject: [PATCH 567/602] Message wording improvements Use "row" instead of "tuple" for user-facing information for logical replication conflicts. --- doc/src/sgml/logical-replication.sgml | 28 +++++++++++----------- src/backend/executor/execReplication.c | 6 ++--- src/backend/replication/logical/conflict.c | 26 ++++++++++---------- src/include/replication/conflict.h | 4 ++-- src/test/subscription/t/001_rep_changes.pl | 4 ++-- src/test/subscription/t/013_partition.pl | 8 +++---- src/test/subscription/t/029_on_error.pl | 2 +- src/test/subscription/t/030_origin.pl | 4 ++-- src/test/subscription/t/035_conflicts.pl | 22 ++++++++--------- 9 files changed, 52 insertions(+), 52 deletions(-) diff --git a/doc/src/sgml/logical-replication.sgml b/doc/src/sgml/logical-replication.sgml index 0ac29928f17ec..9ccd5ec500601 100644 --- a/doc/src/sgml/logical-replication.sgml +++ b/doc/src/sgml/logical-replication.sgml @@ -1824,7 +1824,7 @@ Publications: update_missing - The tuple to be updated was not found. The update will simply be + The row to be updated was not found. The update will simply be skipped in this scenario. @@ -1845,7 +1845,7 @@ Publications: delete_missing - The tuple to be deleted was not found. The delete will simply be + The row to be deleted was not found. The delete will simply be skipped in this scenario. @@ -1879,8 +1879,8 @@ DETAIL: detailed_explanation. where detail_values is one of: Key (column_name , ...)=(column_value , ...) - existing local tuple (column_name , ...)=(column_value , ...) - remote tuple (column_name , ...)=(column_value , ...) + existing local row (column_name , ...)=(column_value , ...) + remote row (column_name , ...)=(column_value , ...) replica identity {(column_name , ...)=(column_value , ...) | full (column_name , ...)=(column_value , ...)} @@ -1914,32 +1914,32 @@ DETAIL: detailed_explanation. detailed_explanation includes the origin, transaction ID, and commit timestamp of the transaction that - modified the existing local tuple, if available. + modified the existing local row, if available. The Key section includes the key values of the local - tuple that violated a unique constraint for + row that violated a unique constraint for insert_exists, update_exists or multiple_unique_conflicts conflicts. - The existing local tuple section includes the local - tuple if its origin differs from the remote tuple for + The existing local row section includes the local + row if its origin differs from the remote row for update_origin_differs or delete_origin_differs - conflicts, or if the key value conflicts with the remote tuple for + conflicts, or if the key value conflicts with the remote row for insert_exists, update_exists or multiple_unique_conflicts conflicts. - The remote tuple section includes the new tuple from + The remote row section includes the new row from the remote insert or update operation that caused the conflict. Note that - for an update operation, the column value of the new tuple will be null + for an update operation, the column value of the new row will be null if the value is unchanged and toasted. @@ -1947,7 +1947,7 @@ DETAIL: detailed_explanation. The replica identity section includes the replica identity key values that were used to search for the existing local - tuple to be updated or deleted. This may include the full tuple value + row to be updated or deleted. This may include the full row value if the local relation is marked with REPLICA IDENTITY FULL. @@ -1955,7 +1955,7 @@ DETAIL: detailed_explanation. column_name is the column name. - For existing local tuple, remote tuple, + For existing local row, remote row, and replica identity full cases, column names are logged only if the user lacks the privilege to access all columns of the table. If column names are present, they appear in the same order @@ -2012,7 +2012,7 @@ DETAIL: detailed_explanation. ERROR: conflict detected on relation "public.test": conflict=insert_exists DETAIL: Key already exists in unique index "t_pkey", which was modified locally in transaction 740 at 2024-06-26 10:47:04.727375+08. -Key (c)=(1); existing local tuple (1, 'local'); remote tuple (1, 'remote'). +Key (c)=(1); existing local row (1, 'local'); remote row (1, 'remote'). CONTEXT: processing remote data for replication origin "pg_16395" during "INSERT" for replication target relation "public.test" in transaction 725 finished at 0/014C0378 The LSN of the transaction that contains the change violating the constraint and diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c index da0cbf41d6f88..b409d4ecbf525 100644 --- a/src/backend/executor/execReplication.c +++ b/src/backend/executor/execReplication.c @@ -852,10 +852,10 @@ ExecSimpleRelationInsert(ResultRelInfo *resultRelInfo, conflictindexes, false); /* - * Checks the conflict indexes to fetch the conflicting local tuple - * and reports the conflict. We perform this check here, instead of + * Checks the conflict indexes to fetch the conflicting local row and + * reports the conflict. We perform this check here, instead of * performing an additional index scan before the actual insertion and - * reporting the conflict if any conflicting tuples are found. This is + * reporting the conflict if any conflicting rows are found. This is * to avoid the overhead of executing the extra scan for each INSERT * operation, even when no conflict arises, which could introduce * significant overhead to replication, particularly in cases where diff --git a/src/backend/replication/logical/conflict.c b/src/backend/replication/logical/conflict.c index 2fd3e8bbda50b..166955922650f 100644 --- a/src/backend/replication/logical/conflict.c +++ b/src/backend/replication/logical/conflict.c @@ -55,7 +55,7 @@ static char *build_index_value_desc(EState *estate, Relation localrel, /* * Get the xmin and commit timestamp data (origin and timestamp) associated - * with the provided local tuple. + * with the provided local row. * * Return true if the commit timestamp data was found, false otherwise. */ @@ -89,12 +89,12 @@ GetTupleTransactionInfo(TupleTableSlot *localslot, TransactionId *xmin, * This function is used to report a conflict while applying replication * changes. * - * 'searchslot' should contain the tuple used to search the local tuple to be + * 'searchslot' should contain the tuple used to search the local row to be * updated or deleted. * * 'remoteslot' should contain the remote new tuple, if any. * - * conflicttuples is a list of local tuples that caused the conflict and the + * conflicttuples is a list of local rows that caused the conflict and the * conflict related information. See ConflictTupleInfo. * * The caller must ensure that all the indexes passed in ConflictTupleInfo are @@ -191,9 +191,9 @@ errcode_apply_conflict(ConflictType type) * * The DETAIL line comprises of two parts: * 1. Explanation of the conflict type, including the origin and commit - * timestamp of the existing local tuple. - * 2. Display of conflicting key, existing local tuple, remote new tuple, and - * replica identity columns, if any. The remote old tuple is excluded as its + * timestamp of the existing local row. + * 2. Display of conflicting key, existing local row, remote new row, and + * replica identity columns, if any. The remote old row is excluded as its * information is covered in the replica identity columns. */ static void @@ -313,7 +313,7 @@ errdetail_apply_conflict(EState *estate, ResultRelInfo *relinfo, localslot, remoteslot, indexoid); /* - * Next, append the key values, existing local tuple, remote tuple and + * Next, append the key values, existing local row, remote row, and * replica identity columns after the message. */ if (val_desc) @@ -331,7 +331,7 @@ errdetail_apply_conflict(EState *estate, ResultRelInfo *relinfo, /* * Helper function to build the additional details for conflicting key, - * existing local tuple, remote tuple, and replica identity columns. + * existing local row, remote row, and replica identity columns. * * If the return value is NULL, it indicates that the current user lacks * permissions to view the columns involved. @@ -373,7 +373,7 @@ build_tuple_value_details(EState *estate, ResultRelInfo *relinfo, { /* * The 'modifiedCols' only applies to the new tuple, hence we pass - * NULL for the existing local tuple. + * NULL for the existing local row. */ desc = ExecBuildSlotValueDescription(relid, localslot, tupdesc, NULL, 64); @@ -383,12 +383,12 @@ build_tuple_value_details(EState *estate, ResultRelInfo *relinfo, if (tuple_value.len > 0) { appendStringInfoString(&tuple_value, "; "); - appendStringInfo(&tuple_value, _("existing local tuple %s"), + appendStringInfo(&tuple_value, _("existing local row %s"), desc); } else { - appendStringInfo(&tuple_value, _("Existing local tuple %s"), + appendStringInfo(&tuple_value, _("Existing local row %s"), desc); } } @@ -415,11 +415,11 @@ build_tuple_value_details(EState *estate, ResultRelInfo *relinfo, if (tuple_value.len > 0) { appendStringInfoString(&tuple_value, "; "); - appendStringInfo(&tuple_value, _("remote tuple %s"), desc); + appendStringInfo(&tuple_value, _("remote row %s"), desc); } else { - appendStringInfo(&tuple_value, _("Remote tuple %s"), desc); + appendStringInfo(&tuple_value, _("Remote row %s"), desc); } } } diff --git a/src/include/replication/conflict.h b/src/include/replication/conflict.h index ff3cb8416ecff..e516caa5c73fc 100644 --- a/src/include/replication/conflict.h +++ b/src/include/replication/conflict.h @@ -57,7 +57,7 @@ typedef enum #define CONFLICT_NUM_TYPES (CT_MULTIPLE_UNIQUE_CONFLICTS + 1) /* - * Information for the existing local tuple that caused the conflict. + * Information for the existing local row that caused the conflict. */ typedef struct ConflictTupleInfo { @@ -69,7 +69,7 @@ typedef struct ConflictTupleInfo * the conflict */ RepOriginId origin; /* origin identifier of the modification */ TimestampTz ts; /* timestamp of when the modification on the - * conflicting local tuple occurred */ + * conflicting local row occurred */ } ConflictTupleInfo; extern bool GetTupleTransactionInfo(TupleTableSlot *localslot, diff --git a/src/test/subscription/t/001_rep_changes.pl b/src/test/subscription/t/001_rep_changes.pl index 916fdb48b3b34..ca55d8df50d84 100644 --- a/src/test/subscription/t/001_rep_changes.pl +++ b/src/test/subscription/t/001_rep_changes.pl @@ -365,10 +365,10 @@ my $logfile = slurp_file($node_subscriber->logfile, $log_location); ok( $logfile =~ - qr/conflict detected on relation "public.tab_full_pk": conflict=update_missing.*\n.*DETAIL:.* Could not find the row to be updated.*\n.*Remote tuple \(1, quux\); replica identity \(a\)=\(1\)/m, + qr/conflict detected on relation "public.tab_full_pk": conflict=update_missing.*\n.*DETAIL:.* Could not find the row to be updated.*\n.*Remote row \(1, quux\); replica identity \(a\)=\(1\)/m, 'update target row is missing'); ok( $logfile =~ - qr/conflict detected on relation "public.tab_full": conflict=update_missing.*\n.*DETAIL:.* Could not find the row to be updated.*\n.*Remote tuple \(26\); replica identity full \(25\)/m, + qr/conflict detected on relation "public.tab_full": conflict=update_missing.*\n.*DETAIL:.* Could not find the row to be updated.*\n.*Remote row \(26\); replica identity full \(25\)/m, 'update target row is missing'); ok( $logfile =~ qr/conflict detected on relation "public.tab_full_pk": conflict=delete_missing.*\n.*DETAIL:.* Could not find the row to be deleted.*\n.*Replica identity \(a\)=\(2\)/m, diff --git a/src/test/subscription/t/013_partition.pl b/src/test/subscription/t/013_partition.pl index 4f78dd48815f0..763a91e75a3ea 100644 --- a/src/test/subscription/t/013_partition.pl +++ b/src/test/subscription/t/013_partition.pl @@ -368,7 +368,7 @@ BEGIN my $logfile = slurp_file($node_subscriber1->logfile(), $log_location); ok( $logfile =~ - qr/conflict detected on relation "public.tab1_2_2": conflict=update_missing.*\n.*DETAIL:.* Could not find the row to be updated.*\n.*Remote tuple \(null, 4, quux\); replica identity \(a\)=\(4\)/, + qr/conflict detected on relation "public.tab1_2_2": conflict=update_missing.*\n.*DETAIL:.* Could not find the row to be updated.*\n.*Remote row \(null, 4, quux\); replica identity \(a\)=\(4\)/, 'update target row is missing in tab1_2_2'); ok( $logfile =~ qr/conflict detected on relation "public.tab1_1": conflict=delete_missing.*\n.*DETAIL:.* Could not find the row to be deleted.*\n.*Replica identity \(a\)=\(1\)/, @@ -781,7 +781,7 @@ BEGIN $logfile = slurp_file($node_subscriber1->logfile(), $log_location); ok( $logfile =~ - qr/conflict detected on relation "public.tab2_1": conflict=update_missing.*\n.*DETAIL:.* Could not find the row to be updated.*\n.*Remote tuple \(pub_tab2, quux, 5\); replica identity \(a\)=\(5\)/, + qr/conflict detected on relation "public.tab2_1": conflict=update_missing.*\n.*DETAIL:.* Could not find the row to be updated.*\n.*Remote row \(pub_tab2, quux, 5\); replica identity \(a\)=\(5\)/, 'update target row is missing in tab2_1'); ok( $logfile =~ qr/conflict detected on relation "public.tab2_1": conflict=delete_missing.*\n.*DETAIL:.* Could not find the row to be deleted.*\n.*Replica identity \(a\)=\(1\)/, @@ -802,8 +802,8 @@ BEGIN $logfile = slurp_file($node_subscriber1->logfile(), $log_location); ok( $logfile =~ - qr/conflict detected on relation "public.tab2_1": conflict=update_origin_differs.*\n.*DETAIL:.* Updating the row that was modified locally in transaction [0-9]+ at .*\n.*Existing local tuple \(yyy, null, 3\); remote tuple \(pub_tab2, quux, 3\); replica identity \(a\)=\(3\)/, - 'updating a tuple that was modified by a different origin'); + qr/conflict detected on relation "public.tab2_1": conflict=update_origin_differs.*\n.*DETAIL:.* Updating the row that was modified locally in transaction [0-9]+ at .*\n.*Existing local row \(yyy, null, 3\); remote row \(pub_tab2, quux, 3\); replica identity \(a\)=\(3\)/, + 'updating a row that was modified by a different origin'); # The remaining tests no longer test conflict detection. $node_subscriber1->append_conf('postgresql.conf', diff --git a/src/test/subscription/t/029_on_error.pl b/src/test/subscription/t/029_on_error.pl index 243662e1240c8..b59d20599fd23 100644 --- a/src/test/subscription/t/029_on_error.pl +++ b/src/test/subscription/t/029_on_error.pl @@ -30,7 +30,7 @@ sub test_skip_lsn # ERROR with its CONTEXT when retrieving this information. my $contents = slurp_file($node_subscriber->logfile, $offset); $contents =~ - qr/conflict detected on relation "public.tbl".*\n.*DETAIL:.* Key already exists in unique index "tbl_pkey", modified by .*origin.* transaction \d+ at .*\n.*Key \(i\)=\(\d+\); existing local tuple .*; remote tuple .*\n.*CONTEXT:.* for replication target relation "public.tbl" in transaction \d+, finished at ([[:xdigit:]]+\/[[:xdigit:]]+)/m + qr/conflict detected on relation "public.tbl".*\n.*DETAIL:.* Key already exists in unique index "tbl_pkey", modified by .*origin.* transaction \d+ at .*\n.*Key \(i\)=\(\d+\); existing local row .*; remote row .*\n.*CONTEXT:.* for replication target relation "public.tbl" in transaction \d+, finished at ([[:xdigit:]]+\/[[:xdigit:]]+)/m or die "could not get error-LSN"; my $lsn = $1; diff --git a/src/test/subscription/t/030_origin.pl b/src/test/subscription/t/030_origin.pl index 5b82848e5e6d3..ec6518ca01004 100644 --- a/src/test/subscription/t/030_origin.pl +++ b/src/test/subscription/t/030_origin.pl @@ -163,7 +163,7 @@ $node_C->safe_psql('postgres', "UPDATE tab SET a = 33 WHERE a = 32;"); $node_B->wait_for_log( - qr/conflict detected on relation "public.tab": conflict=update_origin_differs.*\n.*DETAIL:.* Updating the row that was modified by a different origin ".*" in transaction [0-9]+ at .*\n.*Existing local tuple \(32\); remote tuple \(33\); replica identity \(a\)=\(32\)/ + qr/conflict detected on relation "public.tab": conflict=update_origin_differs.*\n.*DETAIL:.* Updating the row that was modified by a different origin ".*" in transaction [0-9]+ at .*\n.*Existing local row \(32\); remote row \(33\); replica identity \(a\)=\(32\)/ ); $node_B->safe_psql('postgres', "DELETE FROM tab;"); @@ -179,7 +179,7 @@ $node_C->safe_psql('postgres', "DELETE FROM tab WHERE a = 33;"); $node_B->wait_for_log( - qr/conflict detected on relation "public.tab": conflict=delete_origin_differs.*\n.*DETAIL:.* Deleting the row that was modified by a different origin ".*" in transaction [0-9]+ at .*\n.*Existing local tuple \(33\); replica identity \(a\)=\(33\)/ + qr/conflict detected on relation "public.tab": conflict=delete_origin_differs.*\n.*DETAIL:.* Deleting the row that was modified by a different origin ".*" in transaction [0-9]+ at .*\n.*Existing local row \(33\); replica identity \(a\)=\(33\)/ ); # The remaining tests no longer test conflict detection. diff --git a/src/test/subscription/t/035_conflicts.pl b/src/test/subscription/t/035_conflicts.pl index 36aeb14c563af..6b4a9fb88150c 100644 --- a/src/test/subscription/t/035_conflicts.pl +++ b/src/test/subscription/t/035_conflicts.pl @@ -79,11 +79,11 @@ $node_subscriber->wait_for_log( qr/conflict detected on relation \"public.conf_tab\": conflict=multiple_unique_conflicts.* .*Key already exists in unique index \"conf_tab_pkey\".* -.*Key \(a\)=\(2\); existing local tuple \(2, 2, 2\); remote tuple \(2, 3, 4\).* +.*Key \(a\)=\(2\); existing local row \(2, 2, 2\); remote row \(2, 3, 4\).* .*Key already exists in unique index \"conf_tab_b_key\".* -.*Key \(b\)=\(3\); existing local tuple \(3, 3, 3\); remote tuple \(2, 3, 4\).* +.*Key \(b\)=\(3\); existing local row \(3, 3, 3\); remote row \(2, 3, 4\).* .*Key already exists in unique index \"conf_tab_c_key\".* -.*Key \(c\)=\(4\); existing local tuple \(4, 4, 4\); remote tuple \(2, 3, 4\)./, +.*Key \(c\)=\(4\); existing local row \(4, 4, 4\); remote row \(2, 3, 4\)./, $log_offset); pass('multiple_unique_conflicts detected during insert'); @@ -111,11 +111,11 @@ $node_subscriber->wait_for_log( qr/conflict detected on relation \"public.conf_tab\": conflict=multiple_unique_conflicts.* .*Key already exists in unique index \"conf_tab_pkey\".* -.*Key \(a\)=\(6\); existing local tuple \(6, 6, 6\); remote tuple \(6, 7, 8\).* +.*Key \(a\)=\(6\); existing local row \(6, 6, 6\); remote row \(6, 7, 8\).* .*Key already exists in unique index \"conf_tab_b_key\".* -.*Key \(b\)=\(7\); existing local tuple \(7, 7, 7\); remote tuple \(6, 7, 8\).* +.*Key \(b\)=\(7\); existing local row \(7, 7, 7\); remote row \(6, 7, 8\).* .*Key already exists in unique index \"conf_tab_c_key\".* -.*Key \(c\)=\(8\); existing local tuple \(8, 8, 8\); remote tuple \(6, 7, 8\)./, +.*Key \(c\)=\(8\); existing local row \(8, 8, 8\); remote row \(6, 7, 8\)./, $log_offset); pass('multiple_unique_conflicts detected during update'); @@ -139,9 +139,9 @@ $node_subscriber->wait_for_log( qr/conflict detected on relation \"public.conf_tab_2_p1\": conflict=multiple_unique_conflicts.* .*Key already exists in unique index \"conf_tab_2_p1_pkey\".* -.*Key \(a\)=\(55\); existing local tuple \(55, 2, 3\); remote tuple \(55, 2, 3\).* +.*Key \(a\)=\(55\); existing local row \(55, 2, 3\); remote row \(55, 2, 3\).* .*Key already exists in unique index \"conf_tab_2_p1_a_b_key\".* -.*Key \(a, b\)=\(55, 2\); existing local tuple \(55, 2, 3\); remote tuple \(55, 2, 3\)./, +.*Key \(a, b\)=\(55, 2\); existing local row \(55, 2, 3\); remote row \(55, 2, 3\)./, $log_offset); pass('multiple_unique_conflicts detected on a leaf partition during insert'); @@ -314,7 +314,7 @@ ok( $logfile =~ qr/conflict detected on relation "public.tab": conflict=delete_origin_differs.* .*DETAIL:.* Deleting the row that was modified locally in transaction [0-9]+ at .* -.*Existing local tuple \(1, 3\); replica identity \(a\)=\(1\)/, +.*Existing local row \(1, 3\); replica identity \(a\)=\(1\)/, 'delete target row was modified in tab'); $log_location = -s $node_A->logfile; @@ -327,7 +327,7 @@ ok( $logfile =~ qr/conflict detected on relation "public.tab": conflict=update_deleted.* .*DETAIL:.* The row to be updated was deleted locally in transaction [0-9]+ at .* -.*Remote tuple \(1, 3\); replica identity \(a\)=\(1\)/, +.*Remote row \(1, 3\); replica identity \(a\)=\(1\)/, 'update target row was deleted in tab'); # Remember the next transaction ID to be assigned @@ -383,7 +383,7 @@ ok( $logfile =~ qr/conflict detected on relation "public.tab": conflict=update_deleted.* .*DETAIL:.* The row to be updated was deleted locally in transaction [0-9]+ at .* -.*Remote tuple \(2, 4\); replica identity full \(2, 2\)/, +.*Remote row \(2, 4\); replica identity full \(2, 2\)/, 'update target row was deleted in tab'); ############################################################################### From f5e0186f865cc188ef6f4b2bc77d0c028f78195e Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Tue, 26 Aug 2025 10:43:14 +0200 Subject: [PATCH 568/602] Raise C requirement to C11 This changes configure and meson.build to require at least C11, instead of the previous C99. The installation documentation is updated accordingly. configure.ac previously used AC_PROG_CC_C99 to activate C99. But there is no AC_PROG_CC_C11 in Autoconf 2.69, because it's too old. (Also, post-2.69, the AC_PROG_CC_Cnn macros were deprecated and AC_PROG_CC activates the last supported C mode.) We could update the required Autoconf version, but that might be a separate project that no one wants to undertake at the moment. Instead, we open-code the test for C11 using some inspiration from later Autoconf versions. But instead of writing an elaborate test program, we keep it simple and just check __STDC_VERSION__, which should be good enough in practice. In meson.build, we update the existing C99 test to C11, but again we just check for __STDC_VERSION__. This also removes the separate option for the conforming preprocessor on MSVC, added by commit 8fd9bb1d965, since that is activated automatically in C11 mode. Note, we don't use the "official" way to set the C standard in Meson using the c_std project option, because that is impossible to use correctly (see ). Reviewed-by: David Rowley Discussion: https://www.postgresql.org/message-id/flat/01a69441-af54-4822-891b-ca28e05b215a@eisentraut.org --- configure | 204 +++++---------------------------- configure.ac | 29 ++++- doc/src/sgml/installation.sgml | 7 +- doc/src/sgml/sources.sgml | 12 +- meson.build | 59 ++++------ 5 files changed, 83 insertions(+), 228 deletions(-) diff --git a/configure b/configure index 507a2437c3308..39c68161ceced 100755 --- a/configure +++ b/configure @@ -4475,190 +4475,49 @@ ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $CC option to accept ISO C99" >&5 -$as_echo_n "checking for $CC option to accept ISO C99... " >&6; } -if ${ac_cv_prog_cc_c99+:} false; then : + +# Detect option needed for C11 +# loosely modeled after code in later Autoconf versions +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $CC option to accept ISO C11" >&5 +$as_echo_n "checking for $CC option to accept ISO C11... " >&6; } + +if ${pgac_cv_prog_cc_c11+:} false; then : $as_echo_n "(cached) " >&6 else - ac_cv_prog_cc_c99=no -ac_save_CC=$CC -cat confdefs.h - <<_ACEOF >conftest.$ac_ext + pgac_cv_prog_cc_c11=no +pgac_save_CC=$CC +for pgac_arg in '' '-std=gnu11' '-std=c11'; do + CC="$pgac_save_CC $pgac_arg" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ -#include -#include -#include -#include -#include - -// Check varargs macros. These examples are taken from C99 6.10.3.5. -#define debug(...) fprintf (stderr, __VA_ARGS__) -#define showlist(...) puts (#__VA_ARGS__) -#define report(test,...) ((test) ? puts (#test) : printf (__VA_ARGS__)) -static void -test_varargs_macros (void) -{ - int x = 1234; - int y = 5678; - debug ("Flag"); - debug ("X = %d\n", x); - showlist (The first, second, and third items.); - report (x>y, "x is %d but y is %d", x, y); -} - -// Check long long types. -#define BIG64 18446744073709551615ull -#define BIG32 4294967295ul -#define BIG_OK (BIG64 / BIG32 == 4294967297ull && BIG64 % BIG32 == 0) -#if !BIG_OK - your preprocessor is broken; -#endif -#if BIG_OK -#else - your preprocessor is broken; +#if !defined __STDC_VERSION__ || __STDC_VERSION__ < 201112L +# error "Compiler does not advertise C11 conformance" #endif -static long long int bignum = -9223372036854775807LL; -static unsigned long long int ubignum = BIG64; - -struct incomplete_array -{ - int datasize; - double data[]; -}; - -struct named_init { - int number; - const wchar_t *name; - double average; -}; - -typedef const char *ccp; - -static inline int -test_restrict (ccp restrict text) -{ - // See if C++-style comments work. - // Iterate through items via the restricted pointer. - // Also check for declarations in for loops. - for (unsigned int i = 0; *(text+i) != '\0'; ++i) - continue; - return 0; -} - -// Check varargs and va_copy. -static void -test_varargs (const char *format, ...) -{ - va_list args; - va_start (args, format); - va_list args_copy; - va_copy (args_copy, args); - - const char *str; - int number; - float fnumber; - - while (*format) - { - switch (*format++) - { - case 's': // string - str = va_arg (args_copy, const char *); - break; - case 'd': // int - number = va_arg (args_copy, int); - break; - case 'f': // float - fnumber = va_arg (args_copy, double); - break; - default: - break; - } - } - va_end (args_copy); - va_end (args); -} - -int -main () -{ - - // Check bool. - _Bool success = false; - - // Check restrict. - if (test_restrict ("String literal") == 0) - success = true; - char *restrict newvar = "Another string"; - - // Check varargs. - test_varargs ("s, d' f .", "string", 65, 34.234); - test_varargs_macros (); - - // Check flexible array members. - struct incomplete_array *ia = - malloc (sizeof (struct incomplete_array) + (sizeof (double) * 10)); - ia->datasize = 10; - for (int i = 0; i < ia->datasize; ++i) - ia->data[i] = i * 1.234; - - // Check named initializers. - struct named_init ni = { - .number = 34, - .name = L"Test wide string", - .average = 543.34343, - }; - - ni.number = 58; - - int dynamic_array[ni.number]; - dynamic_array[ni.number - 1] = 543; - - // work around unused variable warnings - return (!success || bignum == 0LL || ubignum == 0uLL || newvar[0] == 'x' - || dynamic_array[ni.number - 1] != 543); - - ; - return 0; -} _ACEOF -for ac_arg in '' -std=gnu99 -std=c99 -c99 -AC99 -D_STDC_C99= -qlanglvl=extc99 -do - CC="$ac_save_CC $ac_arg" - if ac_fn_c_try_compile "$LINENO"; then : - ac_cv_prog_cc_c99=$ac_arg +if ac_fn_c_try_compile "$LINENO"; then : + pgac_cv_prog_cc_c11=$pgac_arg fi -rm -f core conftest.err conftest.$ac_objext - test "x$ac_cv_prog_cc_c99" != "xno" && break +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + test x"$pgac_cv_prog_cc_c11" != x"no" && break done -rm -f conftest.$ac_ext -CC=$ac_save_CC - -fi -# AC_CACHE_VAL -case "x$ac_cv_prog_cc_c99" in - x) - { $as_echo "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 -$as_echo "none needed" >&6; } ;; - xno) - { $as_echo "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 -$as_echo "unsupported" >&6; } ;; - *) - CC="$CC $ac_cv_prog_cc_c99" - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c99" >&5 -$as_echo "$ac_cv_prog_cc_c99" >&6; } ;; -esac -if test "x$ac_cv_prog_cc_c99" != xno; then : - +CC=$pgac_save_CC fi - -# Error out if the compiler does not support C99, as the codebase -# relies on that. -if test "$ac_cv_prog_cc_c99" = no; then - as_fn_error $? "C compiler \"$CC\" does not support C99" "$LINENO" 5 +if test x"$pgac_cv_prog_cc_c11" = x"no"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 +$as_echo "unsupported" >&6; } + as_fn_error $? "C compiler \"$CC\" does not support C11" "$LINENO" 5 +elif test x"$pgac_cv_prog_cc_c11" = x""; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 +$as_echo "none needed" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_prog_cc_c11" >&5 +$as_echo "$pgac_cv_prog_cc_c11" >&6; } + CC="$CC $pgac_cv_prog_cc_c11" fi + ac_ext=cpp ac_cpp='$CXXCPP $CPPFLAGS' ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' @@ -4920,7 +4779,6 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu # Check if it's Intel's compiler, which (usually) pretends to be gcc, # but has idiosyncrasies of its own. We assume icc will define # __INTEL_COMPILER regardless of CFLAGS. - cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ diff --git a/configure.ac b/configure.ac index 5f4548adc5cd1..066e3976c0aac 100644 --- a/configure.ac +++ b/configure.ac @@ -364,14 +364,33 @@ pgac_cc_list="gcc cc" pgac_cxx_list="g++ c++" AC_PROG_CC([$pgac_cc_list]) -AC_PROG_CC_C99() -# Error out if the compiler does not support C99, as the codebase -# relies on that. -if test "$ac_cv_prog_cc_c99" = no; then - AC_MSG_ERROR([C compiler "$CC" does not support C99]) +# Detect option needed for C11 +# loosely modeled after code in later Autoconf versions +AC_MSG_CHECKING([for $CC option to accept ISO C11]) +AC_CACHE_VAL([pgac_cv_prog_cc_c11], +[pgac_cv_prog_cc_c11=no +pgac_save_CC=$CC +for pgac_arg in '' '-std=gnu11' '-std=c11'; do + CC="$pgac_save_CC $pgac_arg" + AC_COMPILE_IFELSE([AC_LANG_SOURCE([[#if !defined __STDC_VERSION__ || __STDC_VERSION__ < 201112L +# error "Compiler does not advertise C11 conformance" +#endif]])], [[pgac_cv_prog_cc_c11=$pgac_arg]]) + test x"$pgac_cv_prog_cc_c11" != x"no" && break +done +CC=$pgac_save_CC]) + +if test x"$pgac_cv_prog_cc_c11" = x"no"; then + AC_MSG_RESULT([unsupported]) + AC_MSG_ERROR([C compiler "$CC" does not support C11]) +elif test x"$pgac_cv_prog_cc_c11" = x""; then + AC_MSG_RESULT([none needed]) +else + AC_MSG_RESULT([$pgac_cv_prog_cc_c11]) + CC="$CC $pgac_cv_prog_cc_c11" fi + AC_PROG_CXX([$pgac_cxx_list]) # Check if it's Intel's compiler, which (usually) pretends to be gcc, diff --git a/doc/src/sgml/installation.sgml b/doc/src/sgml/installation.sgml index 8e5da767c48b2..a4ad80a678211 100644 --- a/doc/src/sgml/installation.sgml +++ b/doc/src/sgml/installation.sgml @@ -71,10 +71,9 @@ - You need an ISO/ANSI C compiler (at least - C99-compliant). Recent - versions of GCC are recommended, but - PostgreSQL is known to build using a wide variety + You need a C compiler that supports at least C11. Recent versions of + GCC are recommended, but + PostgreSQL is known to build using a variety of compilers from different vendors. diff --git a/doc/src/sgml/sources.sgml b/doc/src/sgml/sources.sgml index fa68d4d024a93..261f19b3534b5 100644 --- a/doc/src/sgml/sources.sgml +++ b/doc/src/sgml/sources.sgml @@ -907,12 +907,12 @@ BETTER: unrecognized node type: 42 C Standard Code in PostgreSQL should only rely on language - features available in the C99 standard. That means a conforming - C99 compiler has to be able to compile postgres, at least aside + features available in the C11 standard. That means a conforming + C11 compiler has to be able to compile postgres, at least aside from a few platform dependent pieces. - A few features included in the C99 standard are, at this time, not + A few features included in the C11 standard are, at this time, not permitted to be used in core PostgreSQL code. This currently includes variable length arrays, intermingled declarations and code, // comments, universal @@ -924,13 +924,11 @@ BETTER: unrecognized node type: 42 features can be used, if a fallback is provided. - For example _Static_assert() and + For example typeof() and __builtin_constant_p are currently used, even though they are from newer revisions of the C standard and a GCC extension respectively. If not available - we respectively fall back to using a C99 compatible replacement that - performs the same checks, but emits rather cryptic messages and do not - use __builtin_constant_p. + we do not use them. diff --git a/meson.build b/meson.build index a87eb913fff30..3aaa37be92067 100644 --- a/meson.build +++ b/meson.build @@ -280,10 +280,6 @@ elif host_system == 'windows' # define before including for getting localtime_r() etc. on MinGW cppflags += '-D_POSIX_C_SOURCE' endif - if cc.get_id() == 'msvc' - # required for VA_ARGS_NARGS() in c.h; requires VS 2019 - cppflags += '/Zc:preprocessor' - endif export_file_format = 'win' export_file_suffix = 'def' @@ -550,44 +546,29 @@ dir_doc_extension = dir_doc / 'extension' # used, they need to be added to test_c_args as well. ############################################################### -# Do we need -std=c99 to compile C99 code? We don't want to add -std=c99 -# unnecessarily, because we optionally rely on newer features. -c99_test = ''' -#include -#include -#include -#include - -struct named_init_test { - int a; - int b; -}; - -extern void structfunc(struct named_init_test); - -int main(int argc, char **argv) -{ - struct named_init_test nit = { - .a = 3, - .b = 5, - }; - - for (int loop_var = 0; loop_var < 3; loop_var++) - { - nit.a += nit.b; - } - - structfunc((struct named_init_test){1, 0}); - - return nit.a != 0; -} +# Do we need an option to enable C11? +c11_test = ''' +#if !defined __STDC_VERSION__ || __STDC_VERSION__ < 201112L +# error "Compiler does not advertise C11 conformance" +#endif ''' -if not cc.compiles(c99_test, name: 'c99') - if cc.compiles(c99_test, name: 'c99 with -std=c99', args: ['-std=c99']) - cflags += '-std=c99' +if not cc.compiles(c11_test, name: 'C11') + c11_ok = false + if cc.get_id() == 'msvc' + c11_test_args = ['/std:c11'] else - error('C compiler does not support C99') + c11_test_args = ['-std=gnu11', '-std=c11'] + endif + foreach arg : c11_test_args + if cc.compiles(c11_test, name: 'C11 with @0@'.format(arg), args: [arg]) + c11_ok = true + cflags += arg + break + endif + endforeach + if not c11_ok + error('C compiler does not support C11') endif endif From f8ce9ed220b5edd8ba751706bbcdbcc5b64be66e Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Tue, 26 Aug 2025 12:49:44 +0300 Subject: [PATCH 569/602] Further clarify documentation for the initcap function This is a follow-up for commit c2c2c7e225. It further clarifies the following in the initcap function documentation: * Document that title case is used for digraphs in specific locales, * Reference particular ICU function used, * Add note about the purpose of the function. Discussion: https://postgr.es/m/804cc10ef95d4d3b298e76b181fd9437%40postgrespro.ru Author: Oleg Tselebrovskiy Co-authored-by: Alexander Korotkov Reviewed-by: Jeff Davis Reviewed-by: Peter Eisentraut --- doc/src/sgml/func/func-string.sgml | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/doc/src/sgml/func/func-string.sgml b/doc/src/sgml/func/func-string.sgml index 3eec93eb3395b..01cc94c234e62 100644 --- a/doc/src/sgml/func/func-string.sgml +++ b/doc/src/sgml/func/func-string.sgml @@ -693,12 +693,21 @@ text - Converts the first letter of each word to upper case and the - rest to lower case. When using the libc locale - provider, words are sequences of alphanumeric characters separated - by non-alphanumeric characters; when using the ICU locale provider, - words are separated according to - Unicode Standard Annex #29. + Converts the first letter of each word to upper case (or title case + if the letter is a digraph and locale is ICU or + builtin PG_UNICODE_FAST) + and the rest to lower case. When using the libc or + builtin locale provider, words are sequences of + alphanumeric characters separated by non-alphanumeric characters; + when using the ICU locale provider, words are separated according to + u_strToTitle ICU function. + + + This function is primarily used for convenient + display, and the specific result should not be relied upon because of + the differences between locale providers and between different + ICU versions. If specific word boundary rules are desired, + it is recommended to write a custom function. initcap('hi THOMAS') From d713cf9b65ae1e7090a62c83bd78dec867871185 Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Tue, 26 Aug 2025 12:51:32 +0300 Subject: [PATCH 570/602] Refactor variable names in remove_self_joins_one_group() Rename inner and outer to rrel and krel, respectively, to highlight their connection to r and k indexes. For the same reason, rename imark and omark to rmark and kmark. Discussion: https://postgr.es/m/18c6bd6c-6d2a-419a-b0da-dfedef34b585%40gmail.com Author: Andrei Lepikhov Reviewed-by: Greg Sabino Mullane Backpatch-through: 18 --- src/backend/optimizer/plan/analyzejoins.c | 50 +++++++++++------------ 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c index da92d8ee41442..15e8235163995 100644 --- a/src/backend/optimizer/plan/analyzejoins.c +++ b/src/backend/optimizer/plan/analyzejoins.c @@ -2141,21 +2141,21 @@ remove_self_joins_one_group(PlannerInfo *root, Relids relids) while ((r = bms_next_member(relids, r)) > 0) { - RelOptInfo *inner = root->simple_rel_array[r]; + RelOptInfo *rrel = root->simple_rel_array[r]; k = r; while ((k = bms_next_member(relids, k)) > 0) { Relids joinrelids = NULL; - RelOptInfo *outer = root->simple_rel_array[k]; + RelOptInfo *krel = root->simple_rel_array[k]; List *restrictlist; List *selfjoinquals; List *otherjoinquals; ListCell *lc; bool jinfo_check = true; - PlanRowMark *omark = NULL; - PlanRowMark *imark = NULL; + PlanRowMark *kmark = NULL; + PlanRowMark *rmark = NULL; List *uclauses = NIL; /* A sanity check: the relations have the same Oid. */ @@ -2195,19 +2195,19 @@ remove_self_joins_one_group(PlannerInfo *root, Relids relids) if (rowMark->rti == k) { - Assert(imark == NULL); - imark = rowMark; + Assert(rmark == NULL); + rmark = rowMark; } else if (rowMark->rti == r) { - Assert(omark == NULL); - omark = rowMark; + Assert(kmark == NULL); + kmark = rowMark; } - if (omark && imark) + if (kmark && rmark) break; } - if (omark && imark && omark->markType != imark->markType) + if (kmark && rmark && kmark->markType != rmark->markType) continue; /* @@ -2228,8 +2228,8 @@ remove_self_joins_one_group(PlannerInfo *root, Relids relids) * build_joinrel_restrictlist() routine. */ restrictlist = generate_join_implied_equalities(root, joinrelids, - inner->relids, - outer, NULL); + rrel->relids, + krel, NULL); if (restrictlist == NIL) continue; @@ -2239,7 +2239,7 @@ remove_self_joins_one_group(PlannerInfo *root, Relids relids) * otherjoinquals. */ split_selfjoin_quals(root, restrictlist, &selfjoinquals, - &otherjoinquals, inner->relid, outer->relid); + &otherjoinquals, rrel->relid, krel->relid); Assert(list_length(restrictlist) == (list_length(selfjoinquals) + list_length(otherjoinquals))); @@ -2250,17 +2250,17 @@ remove_self_joins_one_group(PlannerInfo *root, Relids relids) * degenerate case works only if both sides have the same clause. * So doesn't matter which side to add. */ - selfjoinquals = list_concat(selfjoinquals, outer->baserestrictinfo); + selfjoinquals = list_concat(selfjoinquals, krel->baserestrictinfo); /* - * Determine if the inner table can duplicate outer rows. We must - * bypass the unique rel cache here since we're possibly using a - * subset of join quals. We can use 'force_cache' == true when all - * join quals are self-join quals. Otherwise, we could end up - * putting false negatives in the cache. + * Determine if the rrel can duplicate outer rows. We must bypass + * the unique rel cache here since we're possibly using a subset + * of join quals. We can use 'force_cache' == true when all join + * quals are self-join quals. Otherwise, we could end up putting + * false negatives in the cache. */ - if (!innerrel_is_unique_ext(root, joinrelids, inner->relids, - outer, JOIN_INNER, selfjoinquals, + if (!innerrel_is_unique_ext(root, joinrelids, rrel->relids, + krel, JOIN_INNER, selfjoinquals, list_length(otherjoinquals) == 0, &uclauses)) continue; @@ -2276,14 +2276,14 @@ remove_self_joins_one_group(PlannerInfo *root, Relids relids) * expressions, or we won't match the same row on each side of the * join. */ - if (!match_unique_clauses(root, inner, uclauses, outer->relid)) + if (!match_unique_clauses(root, rrel, uclauses, krel->relid)) continue; /* - * We can remove either relation, so remove the inner one in order - * to simplify this loop. + * Remove rrel ReloptInfo from the planner structures and the + * corresponding row mark. */ - remove_self_join_rel(root, omark, imark, outer, inner, restrictlist); + remove_self_join_rel(root, kmark, rmark, krel, rrel, restrictlist); result = bms_add_member(result, r); From 5f6f951f88e53630b3ebe9bde762a9612ca6202f Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Sun, 24 Aug 2025 03:34:22 +0300 Subject: [PATCH 571/602] Improve RowMark handling during Self-Join Elimination The Self-Join Elimination SJE feature messes up keeping and removing RowMark's in remove_self_joins_one_group(). That didn't lead to user-level error, because the planned RowMark is only used to reference a rtable entry in later execution stages. An RTE entry for keeping and removing relations is identical and refers to the same relation OID. To reduce confusion and prevent future issues, this commit cleans up the code and fixes the incorrect behaviour. Furthermore, it includes sanity checks in setrefs.c on existing non-null RTE and RelOptInfo entries for each RowMark. Discussion: https://postgr.es/m/18c6bd6c-6d2a-419a-b0da-dfedef34b585%40gmail.com Author: Andrei Lepikhov Reviewed-by: Greg Sabino Mullane Backpatch-through: 18 --- src/backend/optimizer/plan/analyzejoins.c | 9 ++++++--- src/backend/optimizer/plan/setrefs.c | 4 ++++ 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c index 15e8235163995..2a3dea88a94fb 100644 --- a/src/backend/optimizer/plan/analyzejoins.c +++ b/src/backend/optimizer/plan/analyzejoins.c @@ -631,6 +631,7 @@ remove_leftjoinrel_from_query(PlannerInfo *root, int relid, * remove_join_clause_from_rels will touch it.) */ root->simple_rel_array[relid] = NULL; + root->simple_rte_array[relid] = NULL; /* And nuke the RelOptInfo, just in case there's another access path */ pfree(rel); @@ -1978,10 +1979,12 @@ remove_self_join_rel(PlannerInfo *root, PlanRowMark *kmark, PlanRowMark *rmark, * remove_join_clause_from_rels will touch it.) */ root->simple_rel_array[toRemove->relid] = NULL; + root->simple_rte_array[toRemove->relid] = NULL; /* And nuke the RelOptInfo, just in case there's another access path. */ pfree(toRemove); + /* * Now repeat construction of attr_needed bits coming from all other * sources. @@ -2193,12 +2196,12 @@ remove_self_joins_one_group(PlannerInfo *root, Relids relids) { PlanRowMark *rowMark = (PlanRowMark *) lfirst(lc); - if (rowMark->rti == k) + if (rowMark->rti == r) { Assert(rmark == NULL); rmark = rowMark; } - else if (rowMark->rti == r) + else if (rowMark->rti == k) { Assert(kmark == NULL); kmark = rowMark; @@ -2253,7 +2256,7 @@ remove_self_joins_one_group(PlannerInfo *root, Relids relids) selfjoinquals = list_concat(selfjoinquals, krel->baserestrictinfo); /* - * Determine if the rrel can duplicate outer rows. We must bypass + * Determine if the rrel can duplicate outer rows. We must bypass * the unique rel cache here since we're possibly using a subset * of join quals. We can use 'force_cache' == true when all join * quals are self-join quals. Otherwise, we could end up putting diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c index 846e44186c366..d706546f33264 100644 --- a/src/backend/optimizer/plan/setrefs.c +++ b/src/backend/optimizer/plan/setrefs.c @@ -307,6 +307,10 @@ set_plan_references(PlannerInfo *root, Plan *plan) PlanRowMark *rc = lfirst_node(PlanRowMark, lc); PlanRowMark *newrc; + /* sanity check on existing row marks */ + Assert(root->simple_rel_array[rc->rti] != NULL && + root->simple_rte_array[rc->rti] != NULL); + /* flat copy is enough since all fields are scalars */ newrc = (PlanRowMark *) palloc(sizeof(PlanRowMark)); memcpy(newrc, rc, sizeof(PlanRowMark)); From b55068236c5e92e211a6bbf8816933fdac02da80 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Tue, 26 Aug 2025 11:38:41 -0400 Subject: [PATCH 572/602] Do CHECK_FOR_INTERRUPTS inside, not before, scanGetItem. The CHECK_FOR_INTERRUPTS call in gingetbitmap turns out to be inadequate to prevent a long uninterruptible loop, because we now know a case where looping occurs within scanGetItem. While the next patch will fix the bug that caused that, it seems foolish to assume that no similar patterns are possible. Let's do the CFI within scanGetItem's retry loop, instead. This demonstrably allows canceling out of the loop exhibited in bug #19031. Bug: #19031 Reported-by: Tim Wood Author: Tom Lane Discussion: https://postgr.es/m/19031-0638148643d25548@postgresql.org Backpatch-through: 13 --- src/backend/access/gin/ginget.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/backend/access/gin/ginget.c b/src/backend/access/gin/ginget.c index f29ccd3c2d1ff..656299b1b528f 100644 --- a/src/backend/access/gin/ginget.c +++ b/src/backend/access/gin/ginget.c @@ -1327,6 +1327,8 @@ scanGetItem(IndexScanDesc scan, ItemPointerData advancePast, */ do { + CHECK_FOR_INTERRUPTS(); + ItemPointerSetMin(item); match = true; for (i = 0; i < so->nkeys && match; i++) @@ -1966,8 +1968,6 @@ gingetbitmap(IndexScanDesc scan, TIDBitmap *tbm) for (;;) { - CHECK_FOR_INTERRUPTS(); - if (!scanGetItem(scan, iptr, &iptr, &recheck)) break; From 327b7324d0715f5e1cdd3765f96a486c7b47e778 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Tue, 26 Aug 2025 12:08:57 -0400 Subject: [PATCH 573/602] Put "excludeOnly" GIN scan keys at the end of the scankey array. Commit 4b754d6c1 introduced the concept of an excludeOnly scan key, which cannot select matching index entries but can reject non-matching tuples, for example a tsquery such as '!term'. There are poorly-documented assumptions that such scan keys do not appear as the first scan key. ginNewScanKey did nothing to ensure that, however, with the result that certain GIN index searches could go into an infinite loop while apparently-equivalent queries with the clauses in a different order were fine. Fix by teaching ginNewScanKey to place all excludeOnly scan keys after all not-excludeOnly ones. So far as we know at present, it might be sufficient to avoid the case where the very first scan key is excludeOnly; but I'm not very convinced that there aren't other dependencies on the ordering. Bug: #19031 Reported-by: Tim Wood Author: Tom Lane Discussion: https://postgr.es/m/19031-0638148643d25548@postgresql.org Backpatch-through: 13 --- contrib/pg_trgm/expected/pg_trgm.out | 46 ++++++++++++++++++++++++++++ contrib/pg_trgm/sql/pg_trgm.sql | 8 +++++ src/backend/access/gin/ginscan.c | 43 ++++++++++++++++++++++++++ 3 files changed, 97 insertions(+) diff --git a/contrib/pg_trgm/expected/pg_trgm.out b/contrib/pg_trgm/expected/pg_trgm.out index 0b70d9de25624..04da98170ab15 100644 --- a/contrib/pg_trgm/expected/pg_trgm.out +++ b/contrib/pg_trgm/expected/pg_trgm.out @@ -4693,6 +4693,23 @@ select count(*) from test_trgm where t like '%99%' and t like '%qw%'; 19 (1 row) +explain (costs off) +select count(*) from test_trgm where t %> '' and t %> '%qwerty%'; + QUERY PLAN +------------------------------------------------------------------------- + Aggregate + -> Bitmap Heap Scan on test_trgm + Recheck Cond: ((t %> ''::text) AND (t %> '%qwerty%'::text)) + -> Bitmap Index Scan on trgm_idx + Index Cond: ((t %> ''::text) AND (t %> '%qwerty%'::text)) +(5 rows) + +select count(*) from test_trgm where t %> '' and t %> '%qwerty%'; + count +------- + 0 +(1 row) + -- ensure that pending-list items are handled correctly, too create temp table t_test_trgm(t text COLLATE "C"); create index t_trgm_idx on t_test_trgm using gin (t gin_trgm_ops); @@ -4731,6 +4748,23 @@ select count(*) from t_test_trgm where t like '%99%' and t like '%qw%'; 1 (1 row) +explain (costs off) +select count(*) from t_test_trgm where t %> '' and t %> '%qwerty%'; + QUERY PLAN +------------------------------------------------------------------------- + Aggregate + -> Bitmap Heap Scan on t_test_trgm + Recheck Cond: ((t %> ''::text) AND (t %> '%qwerty%'::text)) + -> Bitmap Index Scan on t_trgm_idx + Index Cond: ((t %> ''::text) AND (t %> '%qwerty%'::text)) +(5 rows) + +select count(*) from t_test_trgm where t %> '' and t %> '%qwerty%'; + count +------- + 0 +(1 row) + -- run the same queries with sequential scan to check the results set enable_bitmapscan=off; set enable_seqscan=on; @@ -4746,6 +4780,12 @@ select count(*) from test_trgm where t like '%99%' and t like '%qw%'; 19 (1 row) +select count(*) from test_trgm where t %> '' and t %> '%qwerty%'; + count +------- + 0 +(1 row) + select count(*) from t_test_trgm where t like '%99%' and t like '%qwerty%'; count ------- @@ -4758,6 +4798,12 @@ select count(*) from t_test_trgm where t like '%99%' and t like '%qw%'; 1 (1 row) +select count(*) from t_test_trgm where t %> '' and t %> '%qwerty%'; + count +------- + 0 +(1 row) + reset enable_bitmapscan; create table test2(t text COLLATE "C"); insert into test2 values ('abcdef'); diff --git a/contrib/pg_trgm/sql/pg_trgm.sql b/contrib/pg_trgm/sql/pg_trgm.sql index 340c9891899f0..44debced6d581 100644 --- a/contrib/pg_trgm/sql/pg_trgm.sql +++ b/contrib/pg_trgm/sql/pg_trgm.sql @@ -80,6 +80,9 @@ select count(*) from test_trgm where t like '%99%' and t like '%qwerty%'; explain (costs off) select count(*) from test_trgm where t like '%99%' and t like '%qw%'; select count(*) from test_trgm where t like '%99%' and t like '%qw%'; +explain (costs off) +select count(*) from test_trgm where t %> '' and t %> '%qwerty%'; +select count(*) from test_trgm where t %> '' and t %> '%qwerty%'; -- ensure that pending-list items are handled correctly, too create temp table t_test_trgm(t text COLLATE "C"); create index t_trgm_idx on t_test_trgm using gin (t gin_trgm_ops); @@ -90,14 +93,19 @@ select count(*) from t_test_trgm where t like '%99%' and t like '%qwerty%'; explain (costs off) select count(*) from t_test_trgm where t like '%99%' and t like '%qw%'; select count(*) from t_test_trgm where t like '%99%' and t like '%qw%'; +explain (costs off) +select count(*) from t_test_trgm where t %> '' and t %> '%qwerty%'; +select count(*) from t_test_trgm where t %> '' and t %> '%qwerty%'; -- run the same queries with sequential scan to check the results set enable_bitmapscan=off; set enable_seqscan=on; select count(*) from test_trgm where t like '%99%' and t like '%qwerty%'; select count(*) from test_trgm where t like '%99%' and t like '%qw%'; +select count(*) from test_trgm where t %> '' and t %> '%qwerty%'; select count(*) from t_test_trgm where t like '%99%' and t like '%qwerty%'; select count(*) from t_test_trgm where t like '%99%' and t like '%qw%'; +select count(*) from t_test_trgm where t %> '' and t %> '%qwerty%'; reset enable_bitmapscan; create table test2(t text COLLATE "C"); diff --git a/src/backend/access/gin/ginscan.c b/src/backend/access/gin/ginscan.c index c2d1771bd77b5..26081693383c7 100644 --- a/src/backend/access/gin/ginscan.c +++ b/src/backend/access/gin/ginscan.c @@ -271,6 +271,7 @@ ginNewScanKey(IndexScanDesc scan) ScanKey scankey = scan->keyData; GinScanOpaque so = (GinScanOpaque) scan->opaque; int i; + int numExcludeOnly; bool hasNullQuery = false; bool attrHasNormalScan[INDEX_MAX_KEYS] = {false}; MemoryContext oldCtx; @@ -393,6 +394,7 @@ ginNewScanKey(IndexScanDesc scan) * excludeOnly scan key must receive a GIN_CAT_EMPTY_QUERY hidden entry * and be set to normal (excludeOnly = false). */ + numExcludeOnly = 0; for (i = 0; i < so->nkeys; i++) { GinScanKey key = &so->keys[i]; @@ -406,6 +408,47 @@ ginNewScanKey(IndexScanDesc scan) ginScanKeyAddHiddenEntry(so, key, GIN_CAT_EMPTY_QUERY); attrHasNormalScan[key->attnum - 1] = true; } + else + numExcludeOnly++; + } + + /* + * If we left any excludeOnly scan keys as-is, move them to the end of the + * scan key array: they must appear after normal key(s). + */ + if (numExcludeOnly > 0) + { + GinScanKey tmpkeys; + int iNormalKey; + int iExcludeOnly; + + /* We'd better have made at least one normal key */ + Assert(numExcludeOnly < so->nkeys); + /* Make a temporary array to hold the re-ordered scan keys */ + tmpkeys = (GinScanKey) palloc(so->nkeys * sizeof(GinScanKeyData)); + /* Re-order the keys ... */ + iNormalKey = 0; + iExcludeOnly = so->nkeys - numExcludeOnly; + for (i = 0; i < so->nkeys; i++) + { + GinScanKey key = &so->keys[i]; + + if (key->excludeOnly) + { + memcpy(tmpkeys + iExcludeOnly, key, sizeof(GinScanKeyData)); + iExcludeOnly++; + } + else + { + memcpy(tmpkeys + iNormalKey, key, sizeof(GinScanKeyData)); + iNormalKey++; + } + } + Assert(iNormalKey == so->nkeys - numExcludeOnly); + Assert(iExcludeOnly == so->nkeys); + /* ... and copy them back to so->keys[] */ + memcpy(so->keys, tmpkeys, so->nkeys * sizeof(GinScanKeyData)); + pfree(tmpkeys); } /* From 984d7165dde7133dbdddc95e95ff56bbb177f628 Mon Sep 17 00:00:00 2001 From: Nathan Bossart Date: Tue, 26 Aug 2025 14:49:01 -0500 Subject: [PATCH 574/602] Document privileges required for vacuumdb --missing-stats-only. When vacuumdb's --missing-stats-only option is used, the catalog query for retrieving the list of relations to process must read pg_statistic and pg_statistic_ext_data. However, those catalogs can only be read by superusers by default, so --missing-stats-only is effectively superuser-only. This is unfortunate, but since the option is primarily intended for use by administrators after running pg_upgrade, let's just live with it for v18. This commit adds a note about the aforementioned privilege requirements to the documentation for --missing-stats-only. We first tried to improve matters by modifying the query to read the pg_stats and pg_stats_ext system views instead. While that is indeed more lenient from a privilege standpoint, it is also borderline incomprehensible. pg_stats shows rows for which the user has the SELECT privilege on the corresponding column, and pg_stats_ext shows rows for tables the user owns. Meanwhile, ANALYZE requires either MAINTAIN on the table or, for non-shared relations, ownership of the database. But even if the privilege discrepancies were tolerable, the performance impact was not. Ultimately, the modified query was substantially more expensive, so we abandoned the idea. For v19, perhaps we could introduce a simple, inexpensive way to discover which relations are missing statistics, such as a system function or view with similar privilege requirements to ANALYZE. Unfortunately, it is far too late for anything like that in v18. Reviewed-by: Yugo Nagata Reviewed-by: Fujii Masao Discussion: https://postgr.es/m/CAHGQGwHh43suEfss1wvBsk7vqiou%3DUY0zcy8HGyE5hBp%2BHZ7SQ%40mail.gmail.com Backpatch-through: 18 --- doc/src/sgml/ref/vacuumdb.sgml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/doc/src/sgml/ref/vacuumdb.sgml b/doc/src/sgml/ref/vacuumdb.sgml index 53147480515e6..84c76d7350c83 100644 --- a/doc/src/sgml/ref/vacuumdb.sgml +++ b/doc/src/sgml/ref/vacuumdb.sgml @@ -292,6 +292,14 @@ PostgreSQL documentation This option can only be used in conjunction with or . + + Note that requires + SELECT privileges on + pg_statistic + and + pg_statistic_ext_data, + which are restricted to superusers by default. + From e567e22290544c57293f8e5a913292dacd3bcd1a Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Tue, 26 Aug 2025 22:48:00 +0200 Subject: [PATCH 575/602] Message style improvements Mostly adding some quoting. --- src/backend/catalog/storage.c | 2 +- src/backend/storage/aio/method_io_uring.c | 2 +- src/backend/storage/buffer/bufmgr.c | 34 +++++++++--------- src/test/modules/test_aio/t/001_aio.pl | 44 +++++++++++------------ 4 files changed, 42 insertions(+), 40 deletions(-) diff --git a/src/backend/catalog/storage.c b/src/backend/catalog/storage.c index fb784acf4af24..c58e9418ac313 100644 --- a/src/backend/catalog/storage.c +++ b/src/backend/catalog/storage.c @@ -546,7 +546,7 @@ RelationCopyStorage(SMgrRelation src, SMgrRelation dst, ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), - errmsg("invalid page in block %u of relation %s", + errmsg("invalid page in block %u of relation \"%s\"", blkno, relpath.str))); } diff --git a/src/backend/storage/aio/method_io_uring.c b/src/backend/storage/aio/method_io_uring.c index 0a8c054162f06..093a71359ef6d 100644 --- a/src/backend/storage/aio/method_io_uring.c +++ b/src/backend/storage/aio/method_io_uring.c @@ -377,7 +377,7 @@ pgaio_uring_shmem_init(bool first_time) else if (-ret == ENOSYS) { err = ERRCODE_FEATURE_NOT_SUPPORTED; - hint = _("Kernel does not support io_uring."); + hint = _("The kernel does not support io_uring."); } /* update errno to allow %m to work */ diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index fd7e21d96d31f..350cc0402aa8f 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -2769,7 +2769,7 @@ ExtendBufferedRelShared(BufferManagerRelation bmr, if (valid && !PageIsNew((Page) buf_block)) ereport(ERROR, - (errmsg("unexpected data beyond EOF in block %u of relation %s", + (errmsg("unexpected data beyond EOF in block %u of relation \"%s\"", existing_hdr->tag.blockNum, relpath(bmr.smgr->smgr_rlocator, fork).str))); @@ -6191,7 +6191,7 @@ shared_buffer_write_error_callback(void *arg) /* Buffer is pinned, so we can read the tag without locking the spinlock */ if (bufHdr != NULL) - errcontext("writing block %u of relation %s", + errcontext("writing block %u of relation \"%s\"", bufHdr->tag.blockNum, relpathperm(BufTagGetRelFileLocator(&bufHdr->tag), BufTagGetForkNum(&bufHdr->tag)).str); @@ -6206,7 +6206,7 @@ local_buffer_write_error_callback(void *arg) BufferDesc *bufHdr = (BufferDesc *) arg; if (bufHdr != NULL) - errcontext("writing block %u of relation %s", + errcontext("writing block %u of relation \"%s\"", bufHdr->tag.blockNum, relpathbackend(BufTagGetRelFileLocator(&bufHdr->tag), MyProcNumber, @@ -7305,13 +7305,15 @@ buffer_readv_report(PgAioResult result, const PgAioTargetData *td, ereport(elevel, errcode(ERRCODE_DATA_CORRUPTED), - errmsg("zeroing %u page(s) and ignoring %u checksum failure(s) among blocks %u..%u of relation %s", + errmsg("zeroing %u page(s) and ignoring %u checksum failure(s) among blocks %u..%u of relation \"%s\"", affected_count, checkfail_count, first, last, rpath.str), affected_count > 1 ? - errdetail("Block %u held first zeroed page.", + errdetail("Block %u held the first zeroed page.", first + first_off) : 0, - errhint("See server log for details about the other %d invalid block(s).", - affected_count + checkfail_count - 1)); + errhint_plural("See server log for details about the other %d invalid block.", + "See server log for details about the other %d invalid blocks.", + affected_count + checkfail_count - 1, + affected_count + checkfail_count - 1)); return; } @@ -7324,25 +7326,25 @@ buffer_readv_report(PgAioResult result, const PgAioTargetData *td, { Assert(!zeroed_any); /* can't have invalid pages when zeroing them */ affected_count = zeroed_or_error_count; - msg_one = _("invalid page in block %u of relation %s"); - msg_mult = _("%u invalid pages among blocks %u..%u of relation %s"); - det_mult = _("Block %u held first invalid page."); + msg_one = _("invalid page in block %u of relation \"%s\""); + msg_mult = _("%u invalid pages among blocks %u..%u of relation \"%s\""); + det_mult = _("Block %u held the first invalid page."); hint_mult = _("See server log for the other %u invalid block(s)."); } else if (zeroed_any && !ignored_any) { affected_count = zeroed_or_error_count; - msg_one = _("invalid page in block %u of relation %s; zeroing out page"); - msg_mult = _("zeroing out %u invalid pages among blocks %u..%u of relation %s"); - det_mult = _("Block %u held first zeroed page."); + msg_one = _("invalid page in block %u of relation \"%s\"; zeroing out page"); + msg_mult = _("zeroing out %u invalid pages among blocks %u..%u of relation \"%s\""); + det_mult = _("Block %u held the first zeroed page."); hint_mult = _("See server log for the other %u zeroed block(s)."); } else if (!zeroed_any && ignored_any) { affected_count = checkfail_count; - msg_one = _("ignoring checksum failure in block %u of relation %s"); - msg_mult = _("ignoring %u checksum failures among blocks %u..%u of relation %s"); - det_mult = _("Block %u held first ignored page."); + msg_one = _("ignoring checksum failure in block %u of relation \"%s\""); + msg_mult = _("ignoring %u checksum failures among blocks %u..%u of relation \"%s\""); + det_mult = _("Block %u held the first ignored page."); hint_mult = _("See server log for the other %u ignored block(s)."); } else diff --git a/src/test/modules/test_aio/t/001_aio.pl b/src/test/modules/test_aio/t/001_aio.pl index 82ffffc058f75..3f0453619e896 100644 --- a/src/test/modules/test_aio/t/001_aio.pl +++ b/src/test/modules/test_aio/t/001_aio.pl @@ -396,8 +396,8 @@ sub test_io_error { my $invalid_page_re = $tblname eq 'tbl_corr' - ? qr/invalid page in block 1 of relation base\/\d+\/\d+/ - : qr/invalid page in block 1 of relation base\/\d+\/t\d+_\d+/; + ? qr/invalid page in block 1 of relation "base\/\d+\/\d+/ + : qr/invalid page in block 1 of relation "base\/\d+\/t\d+_\d+/; # verify the error is reported in custom C code psql_like( @@ -798,7 +798,7 @@ sub test_inject "shortened multi-block read detects invalid page", qq(SELECT count(*) FROM tbl_corr WHERE ctid < '(2, 1)'), qr/^$/, - qr/ERROR:.*invalid page in block 1 of relation base\/.*/); + qr/ERROR:.*invalid page in block 1 of relation "base\/.*/); # trigger a hard error, should error out $psql->query_safe( @@ -985,7 +985,7 @@ sub test_zero qq( SELECT read_rel_block_ll('tbl_zero', 0, zero_on_error=>false)), qr/^$/, - qr/^psql::\d+: ERROR: invalid page in block 0 of relation base\/.*\/.*$/ + qr/^psql::\d+: ERROR: invalid page in block 0 of relation "base\/.*\/.*$/ ); # Check that page validity errors are zeroed @@ -996,7 +996,7 @@ sub test_zero qq( SELECT read_rel_block_ll('tbl_zero', 0, zero_on_error=>true)), qr/^$/, - qr/^psql::\d+: WARNING: invalid page in block 0 of relation base\/.*\/.*; zeroing out page$/ + qr/^psql::\d+: WARNING: invalid page in block 0 of relation "base\/.*\/.*"; zeroing out page$/ ); # And that once the corruption is fixed, we can read again @@ -1027,7 +1027,7 @@ sub test_zero "$persistency: test zeroing of invalid block 3", qq(SELECT read_rel_block_ll('tbl_zero', 3, zero_on_error=>true);), qr/^$/, - qr/^psql::\d+: WARNING: invalid page in block 3 of relation base\/.*\/.*; zeroing out page$/ + qr/^psql::\d+: WARNING: invalid page in block 3 of relation "base\/.*\/.*"; zeroing out page$/ ); @@ -1044,7 +1044,7 @@ sub test_zero "$persistency: test reading of invalid block 2,3 in larger read", qq(SELECT read_rel_block_ll('tbl_zero', 1, nblocks=>4, zero_on_error=>false)), qr/^$/, - qr/^psql::\d+: ERROR: 2 invalid pages among blocks 1..4 of relation base\/.*\/.*\nDETAIL: Block 2 held first invalid page\.\nHINT:[^\n]+$/ + qr/^psql::\d+: ERROR: 2 invalid pages among blocks 1..4 of relation "base\/.*\/.*\nDETAIL: Block 2 held the first invalid page\.\nHINT:[^\n]+$/ ); # Then test zeroing via ZERO_ON_ERROR flag @@ -1054,7 +1054,7 @@ sub test_zero "$persistency: test zeroing of invalid block 2,3 in larger read, ZERO_ON_ERROR", qq(SELECT read_rel_block_ll('tbl_zero', 1, nblocks=>4, zero_on_error=>true)), qr/^$/, - qr/^psql::\d+: WARNING: zeroing out 2 invalid pages among blocks 1..4 of relation base\/.*\/.*\nDETAIL: Block 2 held first zeroed page\.\nHINT:[^\n]+$/ + qr/^psql::\d+: WARNING: zeroing out 2 invalid pages among blocks 1..4 of relation "base\/.*\/.*\nDETAIL: Block 2 held the first zeroed page\.\nHINT:[^\n]+$/ ); # Then test zeroing via zero_damaged_pages @@ -1069,7 +1069,7 @@ sub test_zero COMMIT; ), qr/^$/, - qr/^psql::\d+: WARNING: zeroing out 2 invalid pages among blocks 1..4 of relation base\/.*\/.*\nDETAIL: Block 2 held first zeroed page\.\nHINT:[^\n]+$/ + qr/^psql::\d+: WARNING: zeroing out 2 invalid pages among blocks 1..4 of relation "base\/.*\/.*\nDETAIL: Block 2 held the first zeroed page\.\nHINT:[^\n]+$/ ); $psql_a->query_safe(qq(COMMIT)); @@ -1091,7 +1091,7 @@ sub test_zero qq( SELECT count(*) FROM tbl_zero), qr/^$/, - qr/^psql::\d+: ERROR: invalid page in block 2 of relation base\/.*\/.*$/ + qr/^psql::\d+: ERROR: invalid page in block 2 of relation "base\/.*\/.*$/ ); # Verify that bufmgr.c IO zeroes out pages with page validity errors @@ -1106,7 +1106,7 @@ sub test_zero COMMIT; ), qr/^\d+$/, - qr/^psql::\d+: WARNING: invalid page in block 2 of relation base\/.*\/.*$/ + qr/^psql::\d+: WARNING: invalid page in block 2 of relation "base\/.*\/.*$/ ); # Check that warnings/errors about page validity in an IO started by @@ -1192,7 +1192,7 @@ sub test_checksum qq( SELECT read_rel_block_ll('tbl_normal', 3, nblocks=>1, zero_on_error=>false);), qr/^$/, - qr/^psql::\d+: ERROR: invalid page in block 3 of relation base\/\d+\/\d+$/ + qr/^psql::\d+: ERROR: invalid page in block 3 of relation "base\/\d+\/\d+"$/ ); my ($cs_count_after, $cs_ts_after) = @@ -1214,7 +1214,7 @@ sub test_checksum qq( SELECT read_rel_block_ll('tbl_temp', 4, nblocks=>2, zero_on_error=>false);), qr/^$/, - qr/^psql::\d+: ERROR: invalid page in block 4 of relation base\/\d+\/t\d+_\d+$/ + qr/^psql::\d+: ERROR: invalid page in block 4 of relation "base\/\d+\/t\d+_\d+"$/ ); ($cs_count_after, $cs_ts_after) = checksum_failures($psql_a, 'postgres'); @@ -1235,7 +1235,7 @@ sub test_checksum qq( SELECT read_rel_block_ll('pg_shseclabel', 2, nblocks=>2, zero_on_error=>false);), qr/^$/, - qr/^psql::\d+: ERROR: 2 invalid pages among blocks 2..3 of relation global\/\d+\nDETAIL: Block 2 held first invalid page\.\nHINT:[^\n]+$/ + qr/^psql::\d+: ERROR: 2 invalid pages among blocks 2..3 of relation "global\/\d+"\nDETAIL: Block 2 held the first invalid page\.\nHINT:[^\n]+$/ ); ($cs_count_after, $cs_ts_after) = checksum_failures($psql_a); @@ -1300,7 +1300,7 @@ sub test_checksum_createdb "create database w/ wal strategy, invalid source", $createdb_sql, qr/^$/, - qr/psql::\d+: ERROR: invalid page in block 1 of relation base\/\d+\/\d+$/ + qr/psql::\d+: ERROR: invalid page in block 1 of relation "base\/\d+\/\d+"$/ ); my ($cs_count_after, $cs_ts_after) = checksum_failures($psql, 'regression_createdb_source'); @@ -1409,7 +1409,7 @@ sub test_ignore_checksum qq( SELECT read_rel_block_ll('tbl_cs_fail', 2, nblocks=>3, zero_on_error=>false);), qr/^$/, - qr/^psql::\d+: ERROR: invalid page in block 4 of relation base\/\d+\/\d+$/ + qr/^psql::\d+: ERROR: invalid page in block 4 of relation "base\/\d+\/\d+"$/ ); # Test multi-block read with different problems in different blocks @@ -1431,7 +1431,7 @@ sub test_ignore_checksum qq( SELECT read_rel_block_ll('tbl_cs_fail', 1, nblocks=>5, zero_on_error=>true);), qr/^$/, - qr/^psql::\d+: WARNING: zeroing 3 page\(s\) and ignoring 2 checksum failure\(s\) among blocks 1..5 of relation/ + qr/^psql::\d+: WARNING: zeroing 3 page\(s\) and ignoring 2 checksum failure\(s\) among blocks 1..5 of relation "/ ); @@ -1444,17 +1444,17 @@ sub test_ignore_checksum ok(1, "$io_method: found information about checksum failure in block 2"); $node->wait_for_log( - qr/LOG: invalid page in block 3 of relation base.*; zeroing out page/, + qr/LOG: invalid page in block 3 of relation "base.*"; zeroing out page/, $log_location); ok(1, "$io_method: found information about invalid page in block 3"); $node->wait_for_log( - qr/LOG: invalid page in block 4 of relation base.*; zeroing out page/, + qr/LOG: invalid page in block 4 of relation "base.*"; zeroing out page/, $log_location); ok(1, "$io_method: found information about checksum failure in block 4"); $node->wait_for_log( - qr/LOG: invalid page in block 5 of relation base.*; zeroing out page/, + qr/LOG: invalid page in block 5 of relation "base.*"; zeroing out page/, $log_location); ok(1, "$io_method: found information about checksum failure in block 5"); @@ -1473,7 +1473,7 @@ sub test_ignore_checksum qq( SELECT read_rel_block_ll('tbl_cs_fail', 3, nblocks=>1, zero_on_error=>false);), qr/^$/, - qr/^psql::\d+: ERROR: invalid page in block 3 of relation/); + qr/^psql::\d+: ERROR: invalid page in block 3 of relation "/); psql_like( $io_method, @@ -1482,7 +1482,7 @@ sub test_ignore_checksum qq( SELECT read_rel_block_ll('tbl_cs_fail', 3, nblocks=>1, zero_on_error=>true);), qr/^$/, - qr/^psql::\d+: WARNING: invalid page in block 3 of relation base\/.*; zeroing out page/ + qr/^psql::\d+: WARNING: invalid page in block 3 of relation "base\/.*"; zeroing out page/ ); From 85b380162cd6c66752d1dd020a2d9700da0903c9 Mon Sep 17 00:00:00 2001 From: Jacob Champion Date: Tue, 26 Aug 2025 14:16:31 -0700 Subject: [PATCH 576/602] oauth: Explicitly depend on -pthread Followup to 4e1e41733 and 52ecd05ae. oauth-utils.c uses pthread_sigmask(), requiring -pthread on Debian bullseye at minimum. Reported-by: Christoph Berg Tested-by: Christoph Berg Discussion: https://postgr.es/m/aK4PZgC0wuwQ5xSK%40msg.df7cb.de Backpatch-through: 18 --- meson.build | 1 + src/interfaces/libpq-oauth/Makefile | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/meson.build b/meson.build index 3aaa37be92067..ab8101d67b26d 100644 --- a/meson.build +++ b/meson.build @@ -3287,6 +3287,7 @@ libpq_deps += [ ] libpq_oauth_deps += [ + thread_dep, libcurl, ] diff --git a/src/interfaces/libpq-oauth/Makefile b/src/interfaces/libpq-oauth/Makefile index c8c38947ace1b..51145f085a8ac 100644 --- a/src/interfaces/libpq-oauth/Makefile +++ b/src/interfaces/libpq-oauth/Makefile @@ -25,6 +25,7 @@ override shlib := lib$(NAME)$(DLSUFFIX) override stlib := libpq-oauth.a override CPPFLAGS := -I$(libpq_srcdir) -I$(top_builddir)/src/port $(CPPFLAGS) $(LIBCURL_CPPFLAGS) +override CFLAGS += $(PTHREAD_CFLAGS) OBJS = \ $(WIN32RES) @@ -47,7 +48,7 @@ $(stlib): override OBJS += $(OBJS_STATIC) $(stlib): $(OBJS_STATIC) SHLIB_LINK_INTERNAL = $(libpq_pgport_shlib) -SHLIB_LINK = $(LIBCURL_LDFLAGS) $(LIBCURL_LDLIBS) $(filter -lintl -lm, $(LIBS)) +SHLIB_LINK = $(LIBCURL_LDFLAGS) $(LIBCURL_LDLIBS) $(filter -lintl -lm $(PTHREAD_LIBS), $(LIBS)) SHLIB_PREREQS = submake-libpq SHLIB_EXPORTS = exports.txt From ef5b87b970dc28adeeb88191fbf66c9d6298b112 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Tue, 26 Aug 2025 22:55:14 -0700 Subject: [PATCH 577/602] Check for more Unicode functions during upgrade. When checking for expression indexes that may be affected by a Unicode update during upgrade, check for a few more functions. Specifically, check for documented regexp functions, as well as the new CASEFOLD() function. Also, fully-qualify references to pg_catalog.text and pg_catalog.regtype. Discussion: https://postgr.es/m/399b656a3abb0c9283538a040f72199c0601525c.camel@j-davis.com Backpatch-through: 18 --- src/bin/pg_upgrade/check.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/bin/pg_upgrade/check.c b/src/bin/pg_upgrade/check.c index 67eedbae265a1..1e17d64b3ec63 100644 --- a/src/bin/pg_upgrade/check.c +++ b/src/bin/pg_upgrade/check.c @@ -1971,14 +1971,19 @@ check_for_unicode_update(ClusterInfo *cluster) " SELECT oper.oid, oper.oprcode, collid FROM pg_operator oper, collations " " WHERE oprname IN ('~', '~*', '!~', '!~*', '~~*', '!~~*') AND " " oprnamespace='pg_catalog'::regnamespace AND " - " oprright='text'::regtype " + " oprright='pg_catalog.text'::pg_catalog.regtype " "), " /* functions that use the input collation for character semantics */ "coll_functions(procid, collid) AS ( " " SELECT proc.oid, collid FROM pg_proc proc, collations " - " WHERE proname IN ('lower','initcap','upper') AND " - " pronamespace='pg_catalog'::regnamespace AND " - " proargtypes[0] = 'text'::regtype " + " WHERE pronamespace='pg_catalog'::regnamespace AND " + " ((proname IN ('lower','initcap','upper','casefold') AND " + " pronargs = 1 AND " + " proargtypes[0] = 'pg_catalog.text'::pg_catalog.regtype) OR " + " (proname = 'substring' AND pronargs = 2 AND " + " proargtypes[0] = 'pg_catalog.text'::pg_catalog.regtype AND " + " proargtypes[1] = 'pg_catalog.text'::pg_catalog.regtype) OR " + " proname LIKE 'regexp_%') " /* include functions behind the operators listed above */ " UNION " " SELECT procid, collid FROM coll_operators " From 990c8db1827c0c96fb20cb4fbfcc9690e9b45e15 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Wed, 27 Aug 2025 15:46:39 +0200 Subject: [PATCH 578/602] Fix: Don't strip $libdir from nested module_pathnames This patch fixes a bug in how 'load_external_function' handles '$libdir/ prefixes in module paths. Previously, 'load_external_function' would unconditionally strip '$libdir/' from the beginning of the 'filename' string. This caused an issue when the path was nested, such as "$libdir/nested/my_lib". Stripping the prefix resulted in a path of "nested/my_lib", which would fail to be found by the expand_dynamic_library_name function because the original '$libdir' macro was removed. To fix this, the code now checks for the presence of an additional directory separator ('/' or '\') after the '$libdir/' prefix. The prefix is only stripped if the remaining string does not contain a separator. This ensures that simple filenames like '"$libdir/my_lib"' are correctly handled, while nested paths are left intact for 'expand_dynamic_library_name' to process correctly. Reported-by: Dilip Kumar Co-authored-by: Matheus Alcantara Co-authored-by: Dilip Kumar Reviewed-by: Srinath Reddy Sadipiralla Discussion: https://www.postgresql.org/message-id/flat/CAFiTN-uKNzAro4tVwtJhF1UqcygfJ%2BR%2BRL%3Db-_ZMYE3LdHoGhA%40mail.gmail.com --- src/backend/utils/fmgr/dfmgr.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/backend/utils/fmgr/dfmgr.c b/src/backend/utils/fmgr/dfmgr.c index 4bb84ff70870f..1366521f471e2 100644 --- a/src/backend/utils/fmgr/dfmgr.c +++ b/src/backend/utils/fmgr/dfmgr.c @@ -100,12 +100,19 @@ load_external_function(const char *filename, const char *funcname, void *retval; /* - * If the value starts with "$libdir/", strip that. This is because many - * extensions have hardcoded '$libdir/foo' as their library name, which - * prevents using the path. + * For extensions with hardcoded '$libdir/' library names, we strip the + * prefix to allow the library search path to be used. This is done only + * for simple names (e.g., "$libdir/foo"), not for nested paths (e.g., + * "$libdir/foo/bar"). + * + * For nested paths, 'expand_dynamic_library_name' directly expands the + * '$libdir' macro, so we leave them untouched. */ if (strncmp(filename, "$libdir/", 8) == 0) - filename += 8; + { + if (first_dir_separator(filename + 8) == NULL) + filename += 8; + } /* Expand the possibly-abbreviated filename to an exact path name */ fullname = expand_dynamic_library_name(filename); From e36fa9319b1366ee0194e130a9a77c4af7ee1853 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Wed, 27 Aug 2025 16:46:51 +0200 Subject: [PATCH 579/602] Improve objectNamesToOids() comment Commit d31bbfb6590 removed the comment at objectNamesToOids() that there is no locking, because that commit added locking. But to fix all the problems, we'd still need a stronger lock. So put the comment back with more a detailed explanation. Co-authored-by: Noah Misch Reviewed-by: Heikki Linnakangas Discussion: https://www.postgresql.org/message-id/flat/bf72b82c-124d-4efa-a484-bb928e9494e4@eisentraut.org --- src/backend/catalog/aclchk.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/backend/catalog/aclchk.c b/src/backend/catalog/aclchk.c index 9ca8a88dc9104..24948c1f05ee5 100644 --- a/src/backend/catalog/aclchk.c +++ b/src/backend/catalog/aclchk.c @@ -659,6 +659,20 @@ ExecGrantStmt_oids(InternalGrant *istmt) * objectNamesToOids * * Turn a list of object names of a given type into an Oid list. + * + * XXX This function intentionally takes only an AccessShareLock. In the face + * of concurrent DDL, we might easily latch onto an old version of an object, + * causing the GRANT or REVOKE statement to fail. But it does prevent the + * object from disappearing altogether. To do better, we would need to use a + * self-exclusive lock, perhaps ShareUpdateExclusiveLock, here and before + * *every* CatalogTupleUpdate() of a row that GRANT/REVOKE can affect. + * Besides that additional work, this could have operational costs. For + * example, it would make GRANT ALL TABLES IN SCHEMA terminate every + * autovacuum running in the schema and consume a shared lock table entry per + * table in the schema. The user-visible benefit of that additional work is + * just changing "ERROR: tuple concurrently updated" to blocking. That's not + * nothing, but it might not outweigh autovacuum termination and lock table + * consumption spikes. */ static List * objectNamesToOids(ObjectType objtype, List *objnames, bool is_grant) From 310d04169a41c20fa2e84ba6e40cffc10b8fa065 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Wed, 27 Aug 2025 16:47:23 +0200 Subject: [PATCH 580/602] Put back intra-grant-inplace.spec test coverage Commit d31bbfb6590 lost some test coverage, because the situation being tested, a concurrent DROP, cannot happen anymore. Put the test coverage back with a bit of a trick, by deleting directly from the catalog table. Co-authored-by: Noah Misch Reviewed-by: Heikki Linnakangas Discussion: https://www.postgresql.org/message-id/flat/bf72b82c-124d-4efa-a484-bb928e9494e4@eisentraut.org --- src/test/isolation/expected/intra-grant-inplace.out | 4 ++-- src/test/isolation/specs/intra-grant-inplace.spec | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/test/isolation/expected/intra-grant-inplace.out b/src/test/isolation/expected/intra-grant-inplace.out index 1aa9da622da05..23c34d0ca0935 100644 --- a/src/test/isolation/expected/intra-grant-inplace.out +++ b/src/test/isolation/expected/intra-grant-inplace.out @@ -226,7 +226,7 @@ step revoke4: <... completed> starting permutation: b1 drop1 b3 sfu3 revoke4 c1 r3 step b1: BEGIN; step drop1: - DROP TABLE intra_grant_inplace; + DELETE FROM pg_class WHERE relname = 'intra_grant_inplace'; step b3: BEGIN ISOLATION LEVEL READ COMMITTED; step sfu3: @@ -248,6 +248,6 @@ relhasindex ----------- (0 rows) -s4: WARNING: got: relation "intra_grant_inplace" does not exist +s4: WARNING: got: cache lookup failed for relation REDACTED step revoke4: <... completed> step r3: ROLLBACK; diff --git a/src/test/isolation/specs/intra-grant-inplace.spec b/src/test/isolation/specs/intra-grant-inplace.spec index 9936d389359e5..e9c7848624cd7 100644 --- a/src/test/isolation/specs/intra-grant-inplace.spec +++ b/src/test/isolation/specs/intra-grant-inplace.spec @@ -20,7 +20,7 @@ step grant1 { GRANT SELECT ON intra_grant_inplace TO PUBLIC; } step drop1 { - DROP TABLE intra_grant_inplace; + DELETE FROM pg_class WHERE relname = 'intra_grant_inplace'; } step c1 { COMMIT; } From 5865150b6d535ecea241e9ad3038564cb7768b9a Mon Sep 17 00:00:00 2001 From: Andres Freund Date: Wed, 27 Aug 2025 19:12:11 -0400 Subject: [PATCH 581/602] aio: Stop using enum bitfields due to bad code generation During an investigation into rather odd aio related errors on macos, observed by Alexander and Konstantin, we started to wonder if bitfield access is related to the error. At the moment it looks like it is related, we cannot reproduce the failures when replacing the bitfields. In addition, the problem can only be reproduced with some compiler [versions] and not everyone has been able to reproduce the issue. The observed problem is that, very rarely, PgAioHandle->{state,target} are in an inconsistent state, after having been checked to be in a valid state not long before, triggering an assertion failure. Unfortunately, this could be caused by wrong compiler code generation or somehow of missing memory barriers - we don't really know. In theory there should not be any concurrent write access to the handle in the state the bug is triggered, as the handle was idle and is just being initialized. Separately from the bug, we observed that at least gcc and clang generate rather terrible code for the bitfield access. Even if it's not clear if the observed assertion failure is actually caused by the bitfield somehow, the bad code generation alone is sufficient reason to stop using bitfields. Therefore, replace the enum bitfields with uint8s and instead cast in each switch statement. Reported-by: Alexander Lakhin Reported-by: Konstantin Knizhnik Discussion: https://postgr.es/m/1500090.1745443021@sss.pgh.pa.us Backpatch-through: 18 --- src/backend/storage/aio/aio.c | 10 +++++----- src/backend/storage/aio/aio_funcs.c | 2 +- src/backend/storage/aio/aio_io.c | 8 ++++---- src/backend/storage/aio/method_io_uring.c | 2 +- src/include/storage/aio_internal.h | 14 ++++++++++---- 5 files changed, 21 insertions(+), 15 deletions(-) diff --git a/src/backend/storage/aio/aio.c b/src/backend/storage/aio/aio.c index 3643f27ad6e1b..87d7136a93647 100644 --- a/src/backend/storage/aio/aio.c +++ b/src/backend/storage/aio/aio.c @@ -275,7 +275,7 @@ pgaio_io_release_resowner(dlist_node *ioh_node, bool on_error) ResourceOwnerForgetAioHandle(ioh->resowner, &ioh->resowner_node); ioh->resowner = NULL; - switch (ioh->state) + switch ((PgAioHandleState) ioh->state) { case PGAIO_HS_IDLE: elog(ERROR, "unexpected"); @@ -600,7 +600,7 @@ pgaio_io_wait(PgAioHandle *ioh, uint64 ref_generation) if (pgaio_io_was_recycled(ioh, ref_generation, &state)) return; - switch (state) + switch ((PgAioHandleState) state) { case PGAIO_HS_IDLE: case PGAIO_HS_HANDED_OUT: @@ -825,7 +825,7 @@ pgaio_io_wait_for_free(void) &pgaio_my_backend->in_flight_ios); uint64 generation = ioh->generation; - switch (ioh->state) + switch ((PgAioHandleState) ioh->state) { /* should not be in in-flight list */ case PGAIO_HS_IDLE: @@ -905,7 +905,7 @@ static const char * pgaio_io_state_get_name(PgAioHandleState s) { #define PGAIO_HS_TOSTR_CASE(sym) case PGAIO_HS_##sym: return #sym - switch (s) + switch ((PgAioHandleState) s) { PGAIO_HS_TOSTR_CASE(IDLE); PGAIO_HS_TOSTR_CASE(HANDED_OUT); @@ -930,7 +930,7 @@ pgaio_io_get_state_name(PgAioHandle *ioh) const char * pgaio_result_status_string(PgAioResultStatus rs) { - switch (rs) + switch ((PgAioResultStatus) rs) { case PGAIO_RS_UNKNOWN: return "UNKNOWN"; diff --git a/src/backend/storage/aio/aio_funcs.c b/src/backend/storage/aio/aio_funcs.c index 34f8f6327338f..d7977387b8f1a 100644 --- a/src/backend/storage/aio/aio_funcs.c +++ b/src/backend/storage/aio/aio_funcs.c @@ -175,7 +175,7 @@ pg_get_aios(PG_FUNCTION_ARGS) values[4] = CStringGetTextDatum(pgaio_io_get_op_name(&ioh_copy)); /* columns: details about the IO's operation (offset, length) */ - switch (ioh_copy.op) + switch ((PgAioOp) ioh_copy.op) { case PGAIO_OP_INVALID: nulls[5] = true; diff --git a/src/backend/storage/aio/aio_io.c b/src/backend/storage/aio/aio_io.c index 520b5077df25a..7d11d40284ada 100644 --- a/src/backend/storage/aio/aio_io.c +++ b/src/backend/storage/aio/aio_io.c @@ -121,7 +121,7 @@ pgaio_io_perform_synchronously(PgAioHandle *ioh) START_CRIT_SECTION(); /* Perform IO. */ - switch (ioh->op) + switch ((PgAioOp) ioh->op) { case PGAIO_OP_READV: pgstat_report_wait_start(WAIT_EVENT_DATA_FILE_READ); @@ -176,7 +176,7 @@ pgaio_io_get_op_name(PgAioHandle *ioh) { Assert(ioh->op >= 0 && ioh->op < PGAIO_OP_COUNT); - switch (ioh->op) + switch ((PgAioOp) ioh->op) { case PGAIO_OP_INVALID: return "invalid"; @@ -198,7 +198,7 @@ pgaio_io_uses_fd(PgAioHandle *ioh, int fd) { Assert(ioh->state >= PGAIO_HS_DEFINED); - switch (ioh->op) + switch ((PgAioOp) ioh->op) { case PGAIO_OP_READV: return ioh->op_data.read.fd == fd; @@ -222,7 +222,7 @@ pgaio_io_get_iovec_length(PgAioHandle *ioh, struct iovec **iov) *iov = &pgaio_ctl->iovecs[ioh->iovec_off]; - switch (ioh->op) + switch ((PgAioOp) ioh->op) { case PGAIO_OP_READV: return ioh->op_data.read.iov_length; diff --git a/src/backend/storage/aio/method_io_uring.c b/src/backend/storage/aio/method_io_uring.c index 093a71359ef6d..bb06da63a8e02 100644 --- a/src/backend/storage/aio/method_io_uring.c +++ b/src/backend/storage/aio/method_io_uring.c @@ -660,7 +660,7 @@ pgaio_uring_sq_from_io(PgAioHandle *ioh, struct io_uring_sqe *sqe) { struct iovec *iov; - switch (ioh->op) + switch ((PgAioOp) ioh->op) { case PGAIO_OP_READV: iov = &pgaio_ctl->iovecs[ioh->iovec_off]; diff --git a/src/include/storage/aio_internal.h b/src/include/storage/aio_internal.h index 2d37a243abe52..b4de30f2ec149 100644 --- a/src/include/storage/aio_internal.h +++ b/src/include/storage/aio_internal.h @@ -92,17 +92,23 @@ typedef enum PgAioHandleState struct ResourceOwnerData; -/* typedef is in aio_types.h */ +/* + * Typedef is in aio_types.h + * + * We don't use the underlying enums for state, target and op to avoid wasting + * space. We tried using bitfields, but several compilers generate rather + * horrid code for that. + */ struct PgAioHandle { /* all state updates should go through pgaio_io_update_state() */ - PgAioHandleState state:8; + uint8 state; /* what are we operating on */ - PgAioTargetID target:8; + uint8 target; /* which IO operation */ - PgAioOp op:8; + uint8 op; /* bitfield of PgAioHandleFlags */ uint8 flags; From 80f11061323499ef2d07af32ae993473115ca176 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Wed, 27 Aug 2025 21:17:58 +0200 Subject: [PATCH 582/602] Message style improvements An improvement pass over the new stats import functionality. --- src/backend/statistics/attribute_stats.c | 25 +++++---- src/backend/statistics/relation_stats.c | 2 +- src/backend/statistics/stat_utils.c | 14 ++--- src/test/regress/expected/stats_import.out | 60 +++++++++++----------- 4 files changed, 52 insertions(+), 49 deletions(-) diff --git a/src/backend/statistics/attribute_stats.c b/src/backend/statistics/attribute_stats.c index e8241926d2c67..1db6a7f784c58 100644 --- a/src/backend/statistics/attribute_stats.c +++ b/src/backend/statistics/attribute_stats.c @@ -199,7 +199,7 @@ attribute_statistics_update(FunctionCallInfo fcinfo) if (!PG_ARGISNULL(ATTNUM_ARG)) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("cannot specify both attname and attnum"))); + errmsg("cannot specify both \"%s\" and \"%s\"", "attname", "attnum"))); attname = TextDatumGetCString(PG_GETARG_DATUM(ATTNAME_ARG)); attnum = get_attnum(reloid, attname); /* note that this test covers attisdropped cases too: */ @@ -225,7 +225,7 @@ attribute_statistics_update(FunctionCallInfo fcinfo) { ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("must specify either attname or attnum"))); + errmsg("must specify either \"%s\" or \"%s\"", "attname", "attnum"))); attname = NULL; /* keep compiler quiet */ attnum = 0; } @@ -297,8 +297,9 @@ attribute_statistics_update(FunctionCallInfo fcinfo) &elemtypid, &elem_eq_opr)) { ereport(WARNING, - (errmsg("unable to determine element type of attribute \"%s\"", attname), - errdetail("Cannot set STATISTIC_KIND_MCELEM or STATISTIC_KIND_DECHIST."))); + (errmsg("could not determine element type of column \"%s\"", attname), + errdetail("Cannot set %s or %s.", + "STATISTIC_KIND_MCELEM", "STATISTIC_KIND_DECHIST"))); elemtypid = InvalidOid; elem_eq_opr = InvalidOid; @@ -313,8 +314,9 @@ attribute_statistics_update(FunctionCallInfo fcinfo) { ereport(WARNING, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("could not determine less-than operator for attribute \"%s\"", attname), - errdetail("Cannot set STATISTIC_KIND_HISTOGRAM or STATISTIC_KIND_CORRELATION."))); + errmsg("could not determine less-than operator for column \"%s\"", attname), + errdetail("Cannot set %s or %s.", + "STATISTIC_KIND_HISTOGRAM", "STATISTIC_KIND_CORRELATION"))); do_histogram = false; do_correlation = false; @@ -327,8 +329,9 @@ attribute_statistics_update(FunctionCallInfo fcinfo) { ereport(WARNING, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("attribute \"%s\" is not a range type", attname), - errdetail("Cannot set STATISTIC_KIND_RANGE_LENGTH_HISTOGRAM or STATISTIC_KIND_BOUNDS_HISTOGRAM."))); + errmsg("column \"%s\" is not a range type", attname), + errdetail("Cannot set %s or %s.", + "STATISTIC_KIND_RANGE_LENGTH_HISTOGRAM", "STATISTIC_KIND_BOUNDS_HISTOGRAM"))); do_bounds_histogram = false; do_range_length_histogram = false; @@ -587,7 +590,7 @@ get_attr_stat_type(Oid reloid, AttrNumber attnum, if (!HeapTupleIsValid(atup)) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_COLUMN), - errmsg("attribute %d of relation \"%s\" does not exist", + errmsg("column %d of relation \"%s\" does not exist", attnum, RelationGetRelationName(rel)))); attr = (Form_pg_attribute) GETSTRUCT(atup); @@ -595,7 +598,7 @@ get_attr_stat_type(Oid reloid, AttrNumber attnum, if (attr->attisdropped) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_COLUMN), - errmsg("attribute %d of relation \"%s\" does not exist", + errmsg("column %d of relation \"%s\" does not exist", attnum, RelationGetRelationName(rel)))); expr = get_attr_expr(rel, attr->attnum); @@ -729,7 +732,7 @@ text_to_stavalues(const char *staname, FmgrInfo *array_in, Datum d, Oid typid, { ereport(WARNING, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("\"%s\" array cannot contain NULL values", staname))); + errmsg("\"%s\" array must not contain null values", staname))); *ok = false; return (Datum) 0; } diff --git a/src/backend/statistics/relation_stats.c b/src/backend/statistics/relation_stats.c index cd3a75b621a0c..a59f0c519a474 100644 --- a/src/backend/statistics/relation_stats.c +++ b/src/backend/statistics/relation_stats.c @@ -112,7 +112,7 @@ relation_statistics_update(FunctionCallInfo fcinfo) { ereport(WARNING, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("reltuples cannot be < -1.0"))); + errmsg("argument \"%s\" must not be less than -1.0", "reltuples"))); result = false; } else diff --git a/src/backend/statistics/stat_utils.c b/src/backend/statistics/stat_utils.c index a9a3224efe6fd..ef7e5168bed40 100644 --- a/src/backend/statistics/stat_utils.c +++ b/src/backend/statistics/stat_utils.c @@ -41,7 +41,7 @@ stats_check_required_arg(FunctionCallInfo fcinfo, if (PG_ARGISNULL(argnum)) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("\"%s\" cannot be NULL", + errmsg("argument \"%s\" must not be null", arginfo[argnum].argname))); } @@ -68,7 +68,7 @@ stats_check_arg_array(FunctionCallInfo fcinfo, { ereport(WARNING, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("\"%s\" cannot be a multidimensional array", + errmsg("argument \"%s\" must not be a multidimensional array", arginfo[argnum].argname))); return false; } @@ -77,7 +77,7 @@ stats_check_arg_array(FunctionCallInfo fcinfo, { ereport(WARNING, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("\"%s\" array cannot contain NULL values", + errmsg("argument \"%s\" array must not contain null values", arginfo[argnum].argname))); return false; } @@ -108,7 +108,7 @@ stats_check_arg_pair(FunctionCallInfo fcinfo, ereport(WARNING, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("\"%s\" must be specified when \"%s\" is specified", + errmsg("argument \"%s\" must be specified when argument \"%s\" is specified", arginfo[nullarg].argname, arginfo[otherarg].argname))); @@ -263,7 +263,7 @@ stats_check_arg_type(const char *argname, Oid argtype, Oid expectedtype) if (argtype != expectedtype) { ereport(WARNING, - (errmsg("argument \"%s\" has type \"%s\", expected type \"%s\"", + (errmsg("argument \"%s\" has type %s, expected type %s", argname, format_type_be(argtype), format_type_be(expectedtype)))); return false; @@ -319,11 +319,11 @@ stats_fill_fcinfo_from_arg_pairs(FunctionCallInfo pairs_fcinfo, if (argnulls[i]) ereport(ERROR, - (errmsg("name at variadic position %d is NULL", i + 1))); + (errmsg("name at variadic position %d is null", i + 1))); if (types[i] != TEXTOID) ereport(ERROR, - (errmsg("name at variadic position %d has type \"%s\", expected type \"%s\"", + (errmsg("name at variadic position %d has type %s, expected type %s", i + 1, format_type_be(types[i]), format_type_be(TEXTOID)))); diff --git a/src/test/regress/expected/stats_import.out b/src/test/regress/expected/stats_import.out index 48d6392b4ad4f..9e615ccd0af97 100644 --- a/src/test/regress/expected/stats_import.out +++ b/src/test/regress/expected/stats_import.out @@ -50,26 +50,26 @@ SELECT pg_clear_relation_stats('stats_import', 'test'); SELECT pg_catalog.pg_restore_relation_stats( 'relname', 'test', 'relpages', 17::integer); -ERROR: "schemaname" cannot be NULL +ERROR: argument "schemaname" must not be null -- error: relname missing SELECT pg_catalog.pg_restore_relation_stats( 'schemaname', 'stats_import', 'relpages', 17::integer); -ERROR: "relname" cannot be NULL +ERROR: argument "relname" must not be null --- error: schemaname is wrong type SELECT pg_catalog.pg_restore_relation_stats( 'schemaname', 3.6::float, 'relname', 'test', 'relpages', 17::integer); -WARNING: argument "schemaname" has type "double precision", expected type "text" -ERROR: "schemaname" cannot be NULL +WARNING: argument "schemaname" has type double precision, expected type text +ERROR: argument "schemaname" must not be null --- error: relname is wrong type SELECT pg_catalog.pg_restore_relation_stats( 'schemaname', 'stats_import', 'relname', 0::oid, 'relpages', 17::integer); -WARNING: argument "relname" has type "oid", expected type "text" -ERROR: "relname" cannot be NULL +WARNING: argument "relname" has type oid, expected type text +ERROR: argument "relname" must not be null -- error: relation not found SELECT pg_catalog.pg_restore_relation_stats( 'schemaname', 'stats_import', @@ -88,7 +88,7 @@ SELECT pg_restore_relation_stats( 'schemaname', 'stats_import', 'relname', 'test', NULL, '17'::integer); -ERROR: name at variadic position 5 is NULL +ERROR: name at variadic position 5 is null -- starting stats SELECT relpages, reltuples, relallvisible, relallfrozen FROM pg_class @@ -286,7 +286,7 @@ SELECT pg_restore_relation_stats( 'reltuples', 400.0::real, 'relallvisible', 4::integer, 'relallfrozen', 3::integer); -WARNING: argument "relpages" has type "text", expected type "integer" +WARNING: argument "relpages" has type text, expected type integer pg_restore_relation_stats --------------------------- f @@ -358,7 +358,7 @@ SELECT pg_catalog.pg_restore_attribute_stats( 'attname', 'id', 'inherited', false::boolean, 'null_frac', 0.1::real); -ERROR: "schemaname" cannot be NULL +ERROR: argument "schemaname" must not be null -- error: schema does not exist SELECT pg_catalog.pg_restore_attribute_stats( 'schemaname', 'nope', @@ -373,7 +373,7 @@ SELECT pg_catalog.pg_restore_attribute_stats( 'attname', 'id', 'inherited', false::boolean, 'null_frac', 0.1::real); -ERROR: "relname" cannot be NULL +ERROR: argument "relname" must not be null -- error: relname does not exist SELECT pg_catalog.pg_restore_attribute_stats( 'schemaname', 'stats_import', @@ -389,7 +389,7 @@ SELECT pg_catalog.pg_restore_attribute_stats( 'attname', 'id', 'inherited', false::boolean, 'null_frac', 0.1::real); -ERROR: "relname" cannot be NULL +ERROR: argument "relname" must not be null -- error: NULL attname SELECT pg_catalog.pg_restore_attribute_stats( 'schemaname', 'stats_import', @@ -397,7 +397,7 @@ SELECT pg_catalog.pg_restore_attribute_stats( 'attname', NULL, 'inherited', false::boolean, 'null_frac', 0.1::real); -ERROR: must specify either attname or attnum +ERROR: must specify either "attname" or "attnum" -- error: attname doesn't exist SELECT pg_catalog.pg_restore_attribute_stats( 'schemaname', 'stats_import', @@ -416,14 +416,14 @@ SELECT pg_catalog.pg_restore_attribute_stats( 'attnum', 1::smallint, 'inherited', false::boolean, 'null_frac', 0.1::real); -ERROR: cannot specify both attname and attnum +ERROR: cannot specify both "attname" and "attnum" -- error: neither attname nor attnum SELECT pg_catalog.pg_restore_attribute_stats( 'schemaname', 'stats_import', 'relname', 'test', 'inherited', false::boolean, 'null_frac', 0.1::real); -ERROR: must specify either attname or attnum +ERROR: must specify either "attname" or "attnum" -- error: attribute is system column SELECT pg_catalog.pg_restore_attribute_stats( 'schemaname', 'stats_import', @@ -439,7 +439,7 @@ SELECT pg_catalog.pg_restore_attribute_stats( 'attname', 'id', 'inherited', NULL::boolean, 'null_frac', 0.1::real); -ERROR: "inherited" cannot be NULL +ERROR: argument "inherited" must not be null -- ok: just the fixed values, with version, no stakinds SELECT pg_catalog.pg_restore_attribute_stats( 'schemaname', 'stats_import', @@ -527,7 +527,7 @@ SELECT pg_catalog.pg_restore_attribute_stats( 'null_frac', 0.21::real, 'most_common_freqs', '{0.1,0.2,0.3}'::real[] ); -WARNING: "most_common_vals" must be specified when "most_common_freqs" is specified +WARNING: argument "most_common_vals" must be specified when argument "most_common_freqs" is specified pg_restore_attribute_stats ---------------------------- f @@ -553,7 +553,7 @@ SELECT pg_catalog.pg_restore_attribute_stats( 'null_frac', 0.21::real, 'most_common_vals', '{1,2,3}'::text ); -WARNING: "most_common_freqs" must be specified when "most_common_vals" is specified +WARNING: argument "most_common_freqs" must be specified when argument "most_common_vals" is specified pg_restore_attribute_stats ---------------------------- f @@ -580,8 +580,8 @@ SELECT pg_catalog.pg_restore_attribute_stats( 'most_common_vals', '{2,1,3}'::text, 'most_common_freqs', '{0.2,0.1}'::double precision[] ); -WARNING: argument "most_common_freqs" has type "double precision[]", expected type "real[]" -WARNING: "most_common_freqs" must be specified when "most_common_vals" is specified +WARNING: argument "most_common_freqs" has type double precision[], expected type real[] +WARNING: argument "most_common_freqs" must be specified when argument "most_common_vals" is specified pg_restore_attribute_stats ---------------------------- f @@ -659,7 +659,7 @@ SELECT pg_catalog.pg_restore_attribute_stats( 'null_frac', 0.24::real, 'histogram_bounds', '{1,NULL,3,4}'::text ); -WARNING: "histogram_bounds" array cannot contain NULL values +WARNING: "histogram_bounds" array must not contain null values pg_restore_attribute_stats ---------------------------- f @@ -709,7 +709,7 @@ SELECT pg_catalog.pg_restore_attribute_stats( 'null_frac', 0.25::real, 'elem_count_histogram', '{1,1,NULL,1,1,1,1,1}'::real[] ); -WARNING: "elem_count_histogram" array cannot contain NULL values +WARNING: argument "elem_count_histogram" array must not contain null values pg_restore_attribute_stats ---------------------------- f @@ -761,7 +761,7 @@ SELECT pg_catalog.pg_restore_attribute_stats( 'range_empty_frac', 0.5::real, 'range_length_histogram', '{399,499,Infinity}'::text ); -WARNING: attribute "id" is not a range type +WARNING: column "id" is not a range type DETAIL: Cannot set STATISTIC_KIND_RANGE_LENGTH_HISTOGRAM or STATISTIC_KIND_BOUNDS_HISTOGRAM. pg_restore_attribute_stats ---------------------------- @@ -788,7 +788,7 @@ SELECT pg_catalog.pg_restore_attribute_stats( 'null_frac', 0.28::real, 'range_length_histogram', '{399,499,Infinity}'::text ); -WARNING: "range_empty_frac" must be specified when "range_length_histogram" is specified +WARNING: argument "range_empty_frac" must be specified when argument "range_length_histogram" is specified pg_restore_attribute_stats ---------------------------- f @@ -814,7 +814,7 @@ SELECT pg_catalog.pg_restore_attribute_stats( 'null_frac', 0.29::real, 'range_empty_frac', 0.5::real ); -WARNING: "range_length_histogram" must be specified when "range_empty_frac" is specified +WARNING: argument "range_length_histogram" must be specified when argument "range_empty_frac" is specified pg_restore_attribute_stats ---------------------------- f @@ -865,7 +865,7 @@ SELECT pg_catalog.pg_restore_attribute_stats( 'null_frac', 0.31::real, 'range_bounds_histogram', '{"[-1,1)","[0,4)","[1,4)","[1,100)"}'::text ); -WARNING: attribute "id" is not a range type +WARNING: column "id" is not a range type DETAIL: Cannot set STATISTIC_KIND_RANGE_LENGTH_HISTOGRAM or STATISTIC_KIND_BOUNDS_HISTOGRAM. pg_restore_attribute_stats ---------------------------- @@ -917,7 +917,7 @@ SELECT pg_catalog.pg_restore_attribute_stats( 'most_common_elems', '{3,1}'::text, 'most_common_elem_freqs', '{0.3,0.2,0.2,0.3,0.0}'::real[] ); -WARNING: unable to determine element type of attribute "arange" +WARNING: could not determine element type of column "arange" DETAIL: Cannot set STATISTIC_KIND_MCELEM or STATISTIC_KIND_DECHIST. pg_restore_attribute_stats ---------------------------- @@ -945,7 +945,7 @@ SELECT pg_catalog.pg_restore_attribute_stats( 'most_common_elems', '{1,3}'::text, 'most_common_elem_freqs', '{0.3,0.2,0.2,0.3,0.0}'::real[] ); -WARNING: unable to determine element type of attribute "id" +WARNING: could not determine element type of column "id" DETAIL: Cannot set STATISTIC_KIND_MCELEM or STATISTIC_KIND_DECHIST. pg_restore_attribute_stats ---------------------------- @@ -972,7 +972,7 @@ SELECT pg_catalog.pg_restore_attribute_stats( 'null_frac', 0.34::real, 'most_common_elems', '{one,two}'::text ); -WARNING: "most_common_elem_freqs" must be specified when "most_common_elems" is specified +WARNING: argument "most_common_elem_freqs" must be specified when argument "most_common_elems" is specified pg_restore_attribute_stats ---------------------------- f @@ -998,7 +998,7 @@ SELECT pg_catalog.pg_restore_attribute_stats( 'null_frac', 0.35::real, 'most_common_elem_freqs', '{0.3,0.2,0.2,0.3}'::real[] ); -WARNING: "most_common_elems" must be specified when "most_common_elem_freqs" is specified +WARNING: argument "most_common_elems" must be specified when argument "most_common_elem_freqs" is specified pg_restore_attribute_stats ---------------------------- f @@ -1049,7 +1049,7 @@ SELECT pg_catalog.pg_restore_attribute_stats( 'null_frac', 0.36::real, 'elem_count_histogram', '{1,1,1,1,1,1,1,1,1,1}'::real[] ); -WARNING: unable to determine element type of attribute "id" +WARNING: could not determine element type of column "id" DETAIL: Cannot set STATISTIC_KIND_MCELEM or STATISTIC_KIND_DECHIST. pg_restore_attribute_stats ---------------------------- From 325fc0ab14d11fc87da594857ffbb6636affe7c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Herrera?= Date: Thu, 28 Aug 2025 12:39:04 +0200 Subject: [PATCH 583/602] Avoid including commands/dbcommands.h in so many places MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This has been done historically because of get_database_name (which since commit cb98e6fb8fd4 belongs in lsyscache.c/h, so let's move it there) and get_database_oid (which is in the right place, but whose declaration should appear in pg_database.h rather than dbcommands.h). Clean this up. Also, xlogreader.h and stringinfo.h are no longer needed by dbcommands.h since commit f1fd515b393a, so remove them. Author: Álvaro Herrera Reviewed-by: Bertrand Drouvot Reviewed-by: Michael Paquier Discussion: https://postgr.es/m/202508191031.5ipojyuaswzt@alvherre.pgsql --- contrib/sepgsql/database.c | 1 - contrib/sepgsql/label.c | 1 - src/backend/access/heap/vacuumlazy.c | 1 - src/backend/access/transam/multixact.c | 2 +- src/backend/access/transam/varsup.c | 2 +- src/backend/catalog/aclchk.c | 1 - src/backend/catalog/namespace.c | 1 - src/backend/catalog/objectaddress.c | 1 - src/backend/catalog/pg_shdepend.c | 2 +- src/backend/commands/analyze.c | 1 - src/backend/commands/comment.c | 2 +- src/backend/commands/dbcommands.c | 25 +------------------- src/backend/commands/indexcmds.c | 1 - src/backend/commands/publicationcmds.c | 1 - src/backend/commands/schemacmds.c | 2 +- src/backend/commands/subscriptioncmds.c | 1 - src/backend/commands/trigger.c | 1 - src/backend/parser/parse_expr.c | 1 - src/backend/parser/parse_target.c | 1 - src/backend/postmaster/autovacuum.c | 1 - src/backend/replication/logical/slotsync.c | 1 - src/backend/replication/walsender.c | 2 +- src/backend/storage/ipc/procarray.c | 2 +- src/backend/utils/adt/acl.c | 1 - src/backend/utils/adt/dbsize.c | 2 +- src/backend/utils/adt/misc.c | 1 - src/backend/utils/adt/regproc.c | 2 +- src/backend/utils/adt/xml.c | 1 - src/backend/utils/cache/lsyscache.c | 27 ++++++++++++++++++++++ src/include/catalog/pg_database.h | 1 + src/include/commands/dbcommands.h | 4 ---- src/include/utils/lsyscache.h | 2 +- src/test/modules/worker_spi/worker_spi.c | 2 +- 33 files changed, 40 insertions(+), 57 deletions(-) diff --git a/contrib/sepgsql/database.c b/contrib/sepgsql/database.c index 6eeb429a28c08..c4ed646436990 100644 --- a/contrib/sepgsql/database.c +++ b/contrib/sepgsql/database.c @@ -16,7 +16,6 @@ #include "access/table.h" #include "catalog/dependency.h" #include "catalog/pg_database.h" -#include "commands/dbcommands.h" #include "commands/seclabel.h" #include "sepgsql.h" #include "utils/builtins.h" diff --git a/contrib/sepgsql/label.c b/contrib/sepgsql/label.c index 996ce174454dc..a37d89a3f1cc0 100644 --- a/contrib/sepgsql/label.c +++ b/contrib/sepgsql/label.c @@ -23,7 +23,6 @@ #include "catalog/pg_database.h" #include "catalog/pg_namespace.h" #include "catalog/pg_proc.h" -#include "commands/dbcommands.h" #include "commands/seclabel.h" #include "libpq/auth.h" #include "libpq/libpq-be.h" diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index 14036c27e878a..932701d8420dc 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -140,7 +140,6 @@ #include "access/visibilitymap.h" #include "access/xloginsert.h" #include "catalog/storage.h" -#include "commands/dbcommands.h" #include "commands/progress.h" #include "commands/vacuum.h" #include "common/int.h" diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c index e89ab0c49b382..8bf59d369f848 100644 --- a/src/backend/access/transam/multixact.c +++ b/src/backend/access/transam/multixact.c @@ -75,7 +75,6 @@ #include "access/xlog.h" #include "access/xloginsert.h" #include "access/xlogutils.h" -#include "commands/dbcommands.h" #include "miscadmin.h" #include "pg_trace.h" #include "pgstat.h" @@ -85,6 +84,7 @@ #include "storage/procarray.h" #include "utils/guc_hooks.h" #include "utils/injection_point.h" +#include "utils/lsyscache.h" #include "utils/memutils.h" diff --git a/src/backend/access/transam/varsup.c b/src/backend/access/transam/varsup.c index fe895787cb72d..f8c4dada7c93a 100644 --- a/src/backend/access/transam/varsup.c +++ b/src/backend/access/transam/varsup.c @@ -19,11 +19,11 @@ #include "access/transam.h" #include "access/xact.h" #include "access/xlogutils.h" -#include "commands/dbcommands.h" #include "miscadmin.h" #include "postmaster/autovacuum.h" #include "storage/pmsignal.h" #include "storage/proc.h" +#include "utils/lsyscache.h" #include "utils/syscache.h" diff --git a/src/backend/catalog/aclchk.c b/src/backend/catalog/aclchk.c index 24948c1f05ee5..cd139bd65a668 100644 --- a/src/backend/catalog/aclchk.c +++ b/src/backend/catalog/aclchk.c @@ -64,7 +64,6 @@ #include "catalog/pg_proc.h" #include "catalog/pg_tablespace.h" #include "catalog/pg_type.h" -#include "commands/dbcommands.h" #include "commands/defrem.h" #include "commands/event_trigger.h" #include "commands/extension.h" diff --git a/src/backend/catalog/namespace.c b/src/backend/catalog/namespace.c index d97d632a7ef55..8bd4d6c3d4346 100644 --- a/src/backend/catalog/namespace.c +++ b/src/backend/catalog/namespace.c @@ -41,7 +41,6 @@ #include "catalog/pg_ts_parser.h" #include "catalog/pg_ts_template.h" #include "catalog/pg_type.h" -#include "commands/dbcommands.h" #include "common/hashfn_unstable.h" #include "funcapi.h" #include "mb/pg_wchar.h" diff --git a/src/backend/catalog/objectaddress.c b/src/backend/catalog/objectaddress.c index 0102c9984e7db..91f3018fd0a8c 100644 --- a/src/backend/catalog/objectaddress.c +++ b/src/backend/catalog/objectaddress.c @@ -62,7 +62,6 @@ #include "catalog/pg_ts_template.h" #include "catalog/pg_type.h" #include "catalog/pg_user_mapping.h" -#include "commands/dbcommands.h" #include "commands/defrem.h" #include "commands/event_trigger.h" #include "commands/extension.h" diff --git a/src/backend/catalog/pg_shdepend.c b/src/backend/catalog/pg_shdepend.c index 32e544da28a13..16e3e5c7457db 100644 --- a/src/backend/catalog/pg_shdepend.c +++ b/src/backend/catalog/pg_shdepend.c @@ -47,7 +47,6 @@ #include "catalog/pg_type.h" #include "catalog/pg_user_mapping.h" #include "commands/alter.h" -#include "commands/dbcommands.h" #include "commands/defrem.h" #include "commands/event_trigger.h" #include "commands/policy.h" @@ -61,6 +60,7 @@ #include "storage/lmgr.h" #include "utils/acl.h" #include "utils/fmgroids.h" +#include "utils/lsyscache.h" #include "utils/memutils.h" #include "utils/syscache.h" diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index 40d66537ad730..8ea2913d90632 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -29,7 +29,6 @@ #include "catalog/index.h" #include "catalog/indexing.h" #include "catalog/pg_inherits.h" -#include "commands/dbcommands.h" #include "commands/progress.h" #include "commands/tablecmds.h" #include "commands/vacuum.h" diff --git a/src/backend/commands/comment.c b/src/backend/commands/comment.c index f67a8b95d29de..5c783cc61f1d7 100644 --- a/src/backend/commands/comment.c +++ b/src/backend/commands/comment.c @@ -20,10 +20,10 @@ #include "access/table.h" #include "catalog/indexing.h" #include "catalog/objectaddress.h" +#include "catalog/pg_database.h" #include "catalog/pg_description.h" #include "catalog/pg_shdescription.h" #include "commands/comment.h" -#include "commands/dbcommands.h" #include "miscadmin.h" #include "utils/builtins.h" #include "utils/fmgroids.h" diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c index 92a396b8406a3..2793fd837715d 100644 --- a/src/backend/commands/dbcommands.c +++ b/src/backend/commands/dbcommands.c @@ -64,6 +64,7 @@ #include "utils/acl.h" #include "utils/builtins.h" #include "utils/fmgroids.h" +#include "utils/lsyscache.h" #include "utils/pg_locale.h" #include "utils/relmapper.h" #include "utils/snapmgr.h" @@ -3204,30 +3205,6 @@ get_database_oid(const char *dbname, bool missing_ok) } -/* - * get_database_name - given a database OID, look up the name - * - * Returns a palloc'd string, or NULL if no such database. - */ -char * -get_database_name(Oid dbid) -{ - HeapTuple dbtuple; - char *result; - - dbtuple = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(dbid)); - if (HeapTupleIsValid(dbtuple)) - { - result = pstrdup(NameStr(((Form_pg_database) GETSTRUCT(dbtuple))->datname)); - ReleaseSysCache(dbtuple); - } - else - result = NULL; - - return result; -} - - /* * While dropping a database the pg_database row is marked invalid, but the * catalog contents still exist. Connections to such a database are not diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c index 6f753ab6d7a0d..ca2bde62e82ff 100644 --- a/src/backend/commands/indexcmds.c +++ b/src/backend/commands/indexcmds.c @@ -38,7 +38,6 @@ #include "catalog/pg_tablespace.h" #include "catalog/pg_type.h" #include "commands/comment.h" -#include "commands/dbcommands.h" #include "commands/defrem.h" #include "commands/event_trigger.h" #include "commands/progress.h" diff --git a/src/backend/commands/publicationcmds.c b/src/backend/commands/publicationcmds.c index 803c26ab216dd..3de5687461c85 100644 --- a/src/backend/commands/publicationcmds.c +++ b/src/backend/commands/publicationcmds.c @@ -29,7 +29,6 @@ #include "catalog/pg_publication.h" #include "catalog/pg_publication_namespace.h" #include "catalog/pg_publication_rel.h" -#include "commands/dbcommands.h" #include "commands/defrem.h" #include "commands/event_trigger.h" #include "commands/publicationcmds.h" diff --git a/src/backend/commands/schemacmds.c b/src/backend/commands/schemacmds.c index 0f03d9743d203..3cc1472103a7a 100644 --- a/src/backend/commands/schemacmds.c +++ b/src/backend/commands/schemacmds.c @@ -25,7 +25,6 @@ #include "catalog/pg_authid.h" #include "catalog/pg_database.h" #include "catalog/pg_namespace.h" -#include "commands/dbcommands.h" #include "commands/event_trigger.h" #include "commands/schemacmds.h" #include "miscadmin.h" @@ -34,6 +33,7 @@ #include "tcop/utility.h" #include "utils/acl.h" #include "utils/builtins.h" +#include "utils/lsyscache.h" #include "utils/rel.h" #include "utils/syscache.h" diff --git a/src/backend/commands/subscriptioncmds.c b/src/backend/commands/subscriptioncmds.c index 4c01d21b2f30c..0d74398faf321 100644 --- a/src/backend/commands/subscriptioncmds.c +++ b/src/backend/commands/subscriptioncmds.c @@ -30,7 +30,6 @@ #include "catalog/pg_subscription.h" #include "catalog/pg_subscription_rel.h" #include "catalog/pg_type.h" -#include "commands/dbcommands.h" #include "commands/defrem.h" #include "commands/event_trigger.h" #include "commands/subscriptioncmds.h" diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c index 235533ac17f78..579ac8d76ae73 100644 --- a/src/backend/commands/trigger.c +++ b/src/backend/commands/trigger.c @@ -30,7 +30,6 @@ #include "catalog/pg_proc.h" #include "catalog/pg_trigger.h" #include "catalog/pg_type.h" -#include "commands/dbcommands.h" #include "commands/trigger.h" #include "executor/executor.h" #include "miscadmin.h" diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c index d66276801c67b..e1979a80c198a 100644 --- a/src/backend/parser/parse_expr.c +++ b/src/backend/parser/parse_expr.c @@ -17,7 +17,6 @@ #include "catalog/pg_aggregate.h" #include "catalog/pg_type.h" -#include "commands/dbcommands.h" #include "miscadmin.h" #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" diff --git a/src/backend/parser/parse_target.c b/src/backend/parser/parse_target.c index 4aba0d9d4d5cc..905c975d83b56 100644 --- a/src/backend/parser/parse_target.c +++ b/src/backend/parser/parse_target.c @@ -16,7 +16,6 @@ #include "catalog/namespace.h" #include "catalog/pg_type.h" -#include "commands/dbcommands.h" #include "funcapi.h" #include "miscadmin.h" #include "nodes/makefuncs.h" diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c index ff96b36d71025..dce4c8c45b9b6 100644 --- a/src/backend/postmaster/autovacuum.c +++ b/src/backend/postmaster/autovacuum.c @@ -77,7 +77,6 @@ #include "catalog/namespace.h" #include "catalog/pg_database.h" #include "catalog/pg_namespace.h" -#include "commands/dbcommands.h" #include "commands/vacuum.h" #include "common/int.h" #include "lib/ilist.h" diff --git a/src/backend/replication/logical/slotsync.c b/src/backend/replication/logical/slotsync.c index 3773844011358..caa003cb7b3ac 100644 --- a/src/backend/replication/logical/slotsync.c +++ b/src/backend/replication/logical/slotsync.c @@ -52,7 +52,6 @@ #include "access/xlog_internal.h" #include "access/xlogrecovery.h" #include "catalog/pg_database.h" -#include "commands/dbcommands.h" #include "libpq/pqsignal.h" #include "pgstat.h" #include "postmaster/interrupt.h" diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c index 0855bae3535a6..e3dce9dc68d04 100644 --- a/src/backend/replication/walsender.c +++ b/src/backend/replication/walsender.c @@ -60,7 +60,6 @@ #include "backup/basebackup_incremental.h" #include "catalog/pg_authid.h" #include "catalog/pg_type.h" -#include "commands/dbcommands.h" #include "commands/defrem.h" #include "funcapi.h" #include "libpq/libpq.h" @@ -91,6 +90,7 @@ #include "utils/acl.h" #include "utils/builtins.h" #include "utils/guc.h" +#include "utils/lsyscache.h" #include "utils/memutils.h" #include "utils/pg_lsn.h" #include "utils/pgstat_internal.h" diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c index bf987aed8d327..200f72c6e2565 100644 --- a/src/backend/storage/ipc/procarray.c +++ b/src/backend/storage/ipc/procarray.c @@ -54,7 +54,6 @@ #include "access/xlogutils.h" #include "catalog/catalog.h" #include "catalog/pg_authid.h" -#include "commands/dbcommands.h" #include "miscadmin.h" #include "pgstat.h" #include "port/pg_lfind.h" @@ -62,6 +61,7 @@ #include "storage/procarray.h" #include "utils/acl.h" #include "utils/builtins.h" +#include "utils/lsyscache.h" #include "utils/rel.h" #include "utils/snapmgr.h" diff --git a/src/backend/utils/adt/acl.c b/src/backend/utils/adt/acl.c index 1213f9106d515..7dadaefdfc1b8 100644 --- a/src/backend/utils/adt/acl.c +++ b/src/backend/utils/adt/acl.c @@ -31,7 +31,6 @@ #include "catalog/pg_proc.h" #include "catalog/pg_tablespace.h" #include "catalog/pg_type.h" -#include "commands/dbcommands.h" #include "commands/proclang.h" #include "commands/tablespace.h" #include "common/hashfn.h" diff --git a/src/backend/utils/adt/dbsize.c b/src/backend/utils/adt/dbsize.c index 3a059f4ace043..894d226541f23 100644 --- a/src/backend/utils/adt/dbsize.c +++ b/src/backend/utils/adt/dbsize.c @@ -19,12 +19,12 @@ #include "catalog/pg_authid.h" #include "catalog/pg_database.h" #include "catalog/pg_tablespace.h" -#include "commands/dbcommands.h" #include "commands/tablespace.h" #include "miscadmin.h" #include "storage/fd.h" #include "utils/acl.h" #include "utils/builtins.h" +#include "utils/lsyscache.h" #include "utils/numeric.h" #include "utils/rel.h" #include "utils/relfilenumbermap.h" diff --git a/src/backend/utils/adt/misc.c b/src/backend/utils/adt/misc.c index 6fcfd031428ed..6c5e3438447b0 100644 --- a/src/backend/utils/adt/misc.c +++ b/src/backend/utils/adt/misc.c @@ -26,7 +26,6 @@ #include "catalog/pg_tablespace.h" #include "catalog/pg_type.h" #include "catalog/system_fk_info.h" -#include "commands/dbcommands.h" #include "commands/tablespace.h" #include "common/keywords.h" #include "funcapi.h" diff --git a/src/backend/utils/adt/regproc.c b/src/backend/utils/adt/regproc.c index b8bbe95e82eb8..af17a3421a02d 100644 --- a/src/backend/utils/adt/regproc.c +++ b/src/backend/utils/adt/regproc.c @@ -25,12 +25,12 @@ #include "catalog/namespace.h" #include "catalog/pg_class.h" #include "catalog/pg_collation.h" +#include "catalog/pg_database.h" #include "catalog/pg_operator.h" #include "catalog/pg_proc.h" #include "catalog/pg_ts_config.h" #include "catalog/pg_ts_dict.h" #include "catalog/pg_type.h" -#include "commands/dbcommands.h" #include "lib/stringinfo.h" #include "mb/pg_wchar.h" #include "miscadmin.h" diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c index 182e8f75db75c..7b7396cdf830c 100644 --- a/src/backend/utils/adt/xml.c +++ b/src/backend/utils/adt/xml.c @@ -84,7 +84,6 @@ #include "catalog/namespace.h" #include "catalog/pg_class.h" #include "catalog/pg_type.h" -#include "commands/dbcommands.h" #include "executor/spi.h" #include "executor/tablefunc.h" #include "fmgr.h" diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c index 032bb6222c49b..fa7cd7e06a7ab 100644 --- a/src/backend/utils/cache/lsyscache.c +++ b/src/backend/utils/cache/lsyscache.c @@ -26,6 +26,7 @@ #include "catalog/pg_class.h" #include "catalog/pg_collation.h" #include "catalog/pg_constraint.h" +#include "catalog/pg_database.h" #include "catalog/pg_index.h" #include "catalog/pg_language.h" #include "catalog/pg_namespace.h" @@ -1247,6 +1248,32 @@ get_constraint_type(Oid conoid) return contype; } +/* ---------- DATABASE CACHE ---------- */ + +/* + * get_database_name - given a database OID, look up the name + * + * Returns a palloc'd string, or NULL if no such database. + */ +char * +get_database_name(Oid dbid) +{ + HeapTuple dbtuple; + char *result; + + dbtuple = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(dbid)); + if (HeapTupleIsValid(dbtuple)) + { + result = pstrdup(NameStr(((Form_pg_database) GETSTRUCT(dbtuple))->datname)); + ReleaseSysCache(dbtuple); + } + else + result = NULL; + + return result; +} + + /* ---------- LANGUAGE CACHE ---------- */ char * diff --git a/src/include/catalog/pg_database.h b/src/include/catalog/pg_database.h index 54f0d38c9c9e1..97bc18745084b 100644 --- a/src/include/catalog/pg_database.h +++ b/src/include/catalog/pg_database.h @@ -123,6 +123,7 @@ DECLARE_OID_DEFINING_MACRO(PostgresDbOid, 5); */ #define DATCONNLIMIT_INVALID_DB -2 +extern Oid get_database_oid(const char *dbname, bool missing_ok); extern bool database_is_invalid_form(Form_pg_database datform); extern bool database_is_invalid_oid(Oid dboid); diff --git a/src/include/commands/dbcommands.h b/src/include/commands/dbcommands.h index 524ac6d97e898..d48ab6d7d7d38 100644 --- a/src/include/commands/dbcommands.h +++ b/src/include/commands/dbcommands.h @@ -14,9 +14,7 @@ #ifndef DBCOMMANDS_H #define DBCOMMANDS_H -#include "access/xlogreader.h" #include "catalog/objectaddress.h" -#include "lib/stringinfo.h" #include "parser/parse_node.h" extern Oid createdb(ParseState *pstate, const CreatedbStmt *stmt); @@ -28,8 +26,6 @@ extern ObjectAddress AlterDatabaseRefreshColl(AlterDatabaseRefreshCollStmt *stmt extern Oid AlterDatabaseSet(AlterDatabaseSetStmt *stmt); extern ObjectAddress AlterDatabaseOwner(const char *dbname, Oid newOwnerId); -extern Oid get_database_oid(const char *dbname, bool missing_ok); -extern char *get_database_name(Oid dbid); extern bool have_createdb_privilege(void); extern void check_encoding_locale_matches(int encoding, const char *collate, const char *ctype); diff --git a/src/include/utils/lsyscache.h b/src/include/utils/lsyscache.h index fa7c7e0323b10..c65cee4f24cd2 100644 --- a/src/include/utils/lsyscache.h +++ b/src/include/utils/lsyscache.h @@ -104,7 +104,7 @@ extern bool get_collation_isdeterministic(Oid colloid); extern char *get_constraint_name(Oid conoid); extern Oid get_constraint_index(Oid conoid); extern char get_constraint_type(Oid conoid); - +extern char *get_database_name(Oid dbid); extern char *get_language_name(Oid langoid, bool missing_ok); extern Oid get_opclass_family(Oid opclass); extern Oid get_opclass_input_type(Oid opclass); diff --git a/src/test/modules/worker_spi/worker_spi.c b/src/test/modules/worker_spi/worker_spi.c index 9c53d896b6ae5..bea8339f46469 100644 --- a/src/test/modules/worker_spi/worker_spi.c +++ b/src/test/modules/worker_spi/worker_spi.c @@ -30,7 +30,7 @@ /* these headers are used by this particular worker's code */ #include "access/xact.h" -#include "commands/dbcommands.h" +#include "catalog/pg_database.h" #include "executor/spi.h" #include "fmgr.h" #include "lib/stringinfo.h" From 16a9165ce4a45f11aa042e12ab62be8ce6b2b93b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Herrera?= Date: Thu, 28 Aug 2025 18:16:08 +0200 Subject: [PATCH 584/602] Glossary: improve definition of "relation" Define the more general term first, then the Postgres-specific meaning. Wording from Tom Lane. Discussion: https://postgr.es/m/CACJufxEZ48toGH0Em_6vdsT57Y3L8pLF=DZCQ_gCii6=C3MeXw@mail.gmail.com --- doc/src/sgml/glossary.sgml | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/doc/src/sgml/glossary.sgml b/doc/src/sgml/glossary.sgml index b88cac598e901..8651f0cdb9198 100644 --- a/doc/src/sgml/glossary.sgml +++ b/doc/src/sgml/glossary.sgml @@ -1419,11 +1419,15 @@ Relation - The generic term for all objects in a - database - that have a name and a list of - attributes - defined in a specific order. + Mathematically, a relation is a set of + tuples; + this is the sense meant in the term "relational database". + + + + In PostgreSQL, "relation" is commonly used to + mean an SQL object + that has a name and a list of attributes defined in a specific order. Tables, sequences, views, @@ -1431,15 +1435,14 @@ materialized views, composite types, and indexes are all relations. + A relation in this sense is a container or a descriptor for a set of tuples. + - More generically, a relation is a set of tuples; for example, - the result of a query is also a relation. - - - In PostgreSQL, - Class is an archaic synonym for - relation. + Class is an alternative but archaic term. + The system catalog + pg_class + holds an entry for each PostgreSQL relation. From b8a1bdc458e3e81898a1fe3d26188bc1dcbac965 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Thu, 28 Aug 2025 13:49:20 -0400 Subject: [PATCH 585/602] Fix "variable not found in subplan target lists" in semijoin de-duplication. One mechanism we have for implementing semi-joins is to de-duplicate the output of the RHS and then treat the join as a plain inner join. Initial construction of the join's SpecialJoinInfo identifies the RHS columns that need to be de-duplicated, but later we may find that some of those don't need to be handled explicitly, either because they're known to be constant or because they are redundant with some previous column. Up to now, while sort-based de-duplication handled such cases well, hash-based de-duplication didn't: we'd still hash on all of the originally-identified columns. This is probably not a very big deal performance-wise, but in the wake of commit a3179ab69 it can cause planner errors. That happens when join elimination causes recalculation of variables' attr_needed bitmapsets, and we decide that a variable mentioned in a semijoin clause doesn't need to be propagated up to the join level anymore. There are a number of ways we could slice the blame for this, but the only fix that doesn't result in pessimizing plans for loosely-related cases is to be more careful about not hashing columns we don't actually need to de-duplicate. We can install that consideration into create_unique_paths in master, or the predecessor code in create_unique_path in v18, without much refactoring. (As follow-up work, it might be a good idea to look at more-invasive refactoring, in hopes of preventing other bugs in this area. But with v18 release so close, there's not time for that now, nor would we be likely to want to put such refactoring into v18 anyway.) Reported-by: Sergey Soloviev Diagnosed-by: Richard Guo Author: Tom Lane Reviewed-by: Richard Guo Discussion: https://postgr.es/m/1fd1a421-4609-4d46-a1af-ab74d5de504a@tantorlabs.ru Backpatch-through: 18 --- src/backend/optimizer/plan/planner.c | 80 +++++++++++++++++++----- src/test/regress/expected/aggregates.out | 20 ------ src/test/regress/expected/join.out | 80 ++++++++++++++++++++++++ src/test/regress/sql/aggregates.sql | 9 --- src/test/regress/sql/join.sql | 33 ++++++++++ 5 files changed, 179 insertions(+), 43 deletions(-) diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 65f1710159194..a8c8edfac75e3 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -8284,8 +8284,8 @@ generate_setop_child_grouplist(SetOperationStmt *op, List *targetlist) * the needs of the semijoin represented by sjinfo. If it is not possible * to identify how to make the data unique, NULL is returned. * - * If used at all, this is likely to be called repeatedly on the same rel; - * So we cache the result. + * If used at all, this is likely to be called repeatedly on the same rel, + * so we cache the result. */ RelOptInfo * create_unique_paths(PlannerInfo *root, RelOptInfo *rel, SpecialJoinInfo *sjinfo) @@ -8375,9 +8375,14 @@ create_unique_paths(PlannerInfo *root, RelOptInfo *rel, SpecialJoinInfo *sjinfo) * variables of the input rel's targetlist. We have to add any such * expressions to the unique rel's targetlist. * - * While in the loop, build the lists of SortGroupClause's that - * represent the ordering for the sort-based implementation and the - * grouping for the hash-based implementation. + * To complicate matters, some of the values to be unique-ified may be + * known redundant by the EquivalenceClass machinery (e.g., because + * they have been equated to constants). There is no need to compare + * such values during unique-ification, and indeed we had better not + * try because the Vars involved may not have propagated as high as + * the semijoin's level. We use make_pathkeys_for_sortclauses to + * detect such cases, which is a tad inefficient but it doesn't seem + * worth building specialized infrastructure for this. */ newtlist = make_tlist_from_pathtarget(rel->reltarget); nextresno = list_length(newtlist) + 1; @@ -8386,8 +8391,9 @@ create_unique_paths(PlannerInfo *root, RelOptInfo *rel, SpecialJoinInfo *sjinfo) { Expr *uniqexpr = lfirst(lc1); Oid in_oper = lfirst_oid(lc2); - Oid sortop = InvalidOid; + Oid sortop; TargetEntry *tle; + bool made_tle = false; tle = tlist_member(uniqexpr, newtlist); if (!tle) @@ -8398,19 +8404,21 @@ create_unique_paths(PlannerInfo *root, RelOptInfo *rel, SpecialJoinInfo *sjinfo) false); newtlist = lappend(newtlist, tle); nextresno++; + made_tle = true; } - if (sjinfo->semi_can_btree) + /* + * Try to build an ORDER BY list to sort the input compatibly. We + * do this for each sortable clause even when the clauses are not + * all sortable, so that we can detect clauses that are redundant + * according to the pathkey machinery. + */ + sortop = get_ordering_op_for_equality_op(in_oper, false); + if (OidIsValid(sortop)) { - /* Create an ORDER BY list to sort the input compatibly */ Oid eqop; SortGroupClause *sortcl; - sortop = get_ordering_op_for_equality_op(in_oper, false); - if (!OidIsValid(sortop)) /* shouldn't happen */ - elog(ERROR, "could not find ordering operator for equality operator %u", - in_oper); - /* * The Unique node will need equality operators. Normally * these are the same as the IN clause operators, but if those @@ -8430,7 +8438,32 @@ create_unique_paths(PlannerInfo *root, RelOptInfo *rel, SpecialJoinInfo *sjinfo) sortcl->nulls_first = false; sortcl->hashable = false; /* no need to make this accurate */ sortList = lappend(sortList, sortcl); + + /* + * At each step, convert the SortGroupClause list to pathkey + * form. If the just-added SortGroupClause is redundant, the + * result will be shorter than the SortGroupClause list. + */ + sortPathkeys = make_pathkeys_for_sortclauses(root, sortList, + newtlist); + if (list_length(sortPathkeys) != list_length(sortList)) + { + /* Drop the redundant SortGroupClause */ + sortList = list_delete_last(sortList); + Assert(list_length(sortPathkeys) == list_length(sortList)); + /* Undo tlist addition, if we made one */ + if (made_tle) + { + newtlist = list_delete_last(newtlist); + nextresno--; + } + /* We need not consider this clause for hashing, either */ + continue; + } } + else if (sjinfo->semi_can_btree) /* shouldn't happen */ + elog(ERROR, "could not find ordering operator for equality operator %u", + in_oper); if (sjinfo->semi_can_hash) { @@ -8460,8 +8493,27 @@ create_unique_paths(PlannerInfo *root, RelOptInfo *rel, SpecialJoinInfo *sjinfo) } } + /* + * Done building the sortPathkeys and groupClause. But the + * sortPathkeys are bogus if not all the clauses were sortable. + */ + if (!sjinfo->semi_can_btree) + sortPathkeys = NIL; + + /* + * It can happen that all the RHS columns are equated to constants. + * We'd have to do something special to unique-ify in that case, and + * it's such an unlikely-in-the-real-world case that it's not worth + * the effort. So just punt if we found no columns to unique-ify. + */ + if (sortPathkeys == NIL && groupClause == NIL) + { + MemoryContextSwitchTo(oldcontext); + return NULL; + } + + /* Convert the required targetlist back to PathTarget form */ unique_rel->reltarget = create_pathtarget(root, newtlist); - sortPathkeys = make_pathkeys_for_sortclauses(root, sortList, newtlist); } /* build unique paths based on input rel's pathlist */ diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out index 7319945ffe325..c35288eecde37 100644 --- a/src/test/regress/expected/aggregates.out +++ b/src/test/regress/expected/aggregates.out @@ -3398,26 +3398,6 @@ select v||'a', case when v||'a' = 'aa' then 1 else 0 end, count(*) ba | 0 | 1 (2 rows) --- Make sure that generation of HashAggregate for uniqification purposes --- does not lead to array overflow due to unexpected duplicate hash keys --- see CAFeeJoKKu0u+A_A9R9316djW-YW3-+Gtgvy3ju655qRHR3jtdA@mail.gmail.com -set enable_memoize to off; -explain (costs off) - select 1 from tenk1 - where (hundred, thousand) in (select twothousand, twothousand from onek); - QUERY PLAN -------------------------------------------------------------- - Hash Join - Hash Cond: (tenk1.hundred = onek.twothousand) - -> Seq Scan on tenk1 - Filter: (hundred = thousand) - -> Hash - -> HashAggregate - Group Key: onek.twothousand, onek.twothousand - -> Seq Scan on onek -(8 rows) - -reset enable_memoize; -- -- Hash Aggregation Spill tests -- diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index 98b05c94a1195..b26b8c5bdbedd 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -3222,6 +3222,24 @@ where b.unique2 is null; -> Index Only Scan using tenk1_unique2 on tenk1 b (5 rows) +-- check that we avoid de-duplicating columns redundantly +set enable_memoize to off; +explain (costs off) +select 1 from tenk1 +where (hundred, thousand) in (select twothousand, twothousand from onek); + QUERY PLAN +------------------------------------------------- + Hash Join + Hash Cond: (tenk1.hundred = onek.twothousand) + -> Seq Scan on tenk1 + Filter: (hundred = thousand) + -> Hash + -> HashAggregate + Group Key: onek.twothousand + -> Seq Scan on onek +(8 rows) + +reset enable_memoize; -- -- regression test for bogus RTE_GROUP entries -- @@ -6500,6 +6518,68 @@ where t1.a = s.c; ---------- (0 rows) +rollback; +-- check handling of semijoins after join removal: we must suppress +-- unique-ification of known-constant values +begin; +create temp table t (a int unique, b int); +insert into t values (1, 2); +explain (verbose, costs off) +select t1.a from t t1 + left join t t2 on t1.a = t2.a + join t t3 on true +where exists (select 1 from t t4 + join t t5 on t4.b = t5.b + join t t6 on t5.b = t6.b + where t1.a = t4.a and t3.a = t5.a and t4.a = 1); + QUERY PLAN +------------------------------------------------------------------------------------ + Nested Loop + Output: t1.a + Inner Unique: true + -> Nested Loop + Output: t1.a, t5.a + -> Index Only Scan using t_a_key on pg_temp.t t1 + Output: t1.a + Index Cond: (t1.a = 1) + -> HashAggregate + Output: t5.a + Group Key: t5.a + -> Hash Join + Output: t5.a + Hash Cond: (t6.b = t4.b) + -> Seq Scan on pg_temp.t t6 + Output: t6.a, t6.b + -> Hash + Output: t4.b, t5.b, t5.a + -> Hash Join + Output: t4.b, t5.b, t5.a + Inner Unique: true + Hash Cond: (t5.b = t4.b) + -> Seq Scan on pg_temp.t t5 + Output: t5.a, t5.b + -> Hash + Output: t4.b, t4.a + -> Index Scan using t_a_key on pg_temp.t t4 + Output: t4.b, t4.a + Index Cond: (t4.a = 1) + -> Index Only Scan using t_a_key on pg_temp.t t3 + Output: t3.a + Index Cond: (t3.a = t5.a) +(32 rows) + +select t1.a from t t1 + left join t t2 on t1.a = t2.a + join t t3 on true +where exists (select 1 from t t4 + join t t5 on t4.b = t5.b + join t t6 on t5.b = t6.b + where t1.a = t4.a and t3.a = t5.a and t4.a = 1); + a +--- + 1 +(1 row) + rollback; -- test cases where we can remove a join, but not a PHV computed at it begin; diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql index dde85d0dfb235..62540b1ffa4eb 100644 --- a/src/test/regress/sql/aggregates.sql +++ b/src/test/regress/sql/aggregates.sql @@ -1510,15 +1510,6 @@ select v||'a', case when v||'a' = 'aa' then 1 else 0 end, count(*) from unnest(array['a','b']) u(v) group by v||'a' order by 1; --- Make sure that generation of HashAggregate for uniqification purposes --- does not lead to array overflow due to unexpected duplicate hash keys --- see CAFeeJoKKu0u+A_A9R9316djW-YW3-+Gtgvy3ju655qRHR3jtdA@mail.gmail.com -set enable_memoize to off; -explain (costs off) - select 1 from tenk1 - where (hundred, thousand) in (select twothousand, twothousand from onek); -reset enable_memoize; - -- -- Hash Aggregation Spill tests -- diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index 5f0a475894ddc..bccd171afb6c3 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -839,6 +839,13 @@ explain (costs off) select a.* from tenk1 a left join tenk1 b on a.unique1 = b.unique2 where b.unique2 is null; +-- check that we avoid de-duplicating columns redundantly +set enable_memoize to off; +explain (costs off) +select 1 from tenk1 +where (hundred, thousand) in (select twothousand, twothousand from onek); +reset enable_memoize; + -- -- regression test for bogus RTE_GROUP entries -- @@ -2420,6 +2427,32 @@ where t1.a = s.c; rollback; +-- check handling of semijoins after join removal: we must suppress +-- unique-ification of known-constant values +begin; + +create temp table t (a int unique, b int); +insert into t values (1, 2); + +explain (verbose, costs off) +select t1.a from t t1 + left join t t2 on t1.a = t2.a + join t t3 on true +where exists (select 1 from t t4 + join t t5 on t4.b = t5.b + join t t6 on t5.b = t6.b + where t1.a = t4.a and t3.a = t5.a and t4.a = 1); + +select t1.a from t t1 + left join t t2 on t1.a = t2.a + join t t3 on true +where exists (select 1 from t t4 + join t t5 on t4.b = t5.b + join t t6 on t5.b = t6.b + where t1.a = t4.a and t3.a = t5.a and t4.a = 1); + +rollback; + -- test cases where we can remove a join, but not a PHV computed at it begin; From fabd8b8e2a72bac9e5a361856a355bd5bb2e61b8 Mon Sep 17 00:00:00 2001 From: Masahiko Sawada Date: Thu, 28 Aug 2025 17:06:42 -0700 Subject: [PATCH 586/602] Use LW_SHARED in walsummarizer.c for WALSummarizerLock lock where possible. Previously, we used LW_EXCLUSIVE in several places despite only reading WalSummarizerCtl fields. This patch reduces the lock level to LW_SHARED where we are only reading the shared fields. Backpatch to 17, where wal summarization was introduced. Reviewed-by: Nathan Bossart Discussion: https://postgr.es/m/CAD21AoDdKhf_9oriEYxY-JCdF+Oe_muhca3pcdkMEdBMzyHyKw@mail.gmail.com Backpatch-through: 17 --- src/backend/postmaster/walsummarizer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/backend/postmaster/walsummarizer.c b/src/backend/postmaster/walsummarizer.c index 777c9a8d5553b..e1f142f20c7a0 100644 --- a/src/backend/postmaster/walsummarizer.c +++ b/src/backend/postmaster/walsummarizer.c @@ -644,7 +644,7 @@ WakeupWalSummarizer(void) if (WalSummarizerCtl == NULL) return; - LWLockAcquire(WALSummarizerLock, LW_EXCLUSIVE); + LWLockAcquire(WALSummarizerLock, LW_SHARED); pgprocno = WalSummarizerCtl->summarizer_pgprocno; LWLockRelease(WALSummarizerLock); @@ -685,7 +685,7 @@ WaitForWalSummarization(XLogRecPtr lsn) /* * If the LSN summarized on disk has reached the target value, stop. */ - LWLockAcquire(WALSummarizerLock, LW_EXCLUSIVE); + LWLockAcquire(WALSummarizerLock, LW_SHARED); summarized_lsn = WalSummarizerCtl->summarized_lsn; pending_lsn = WalSummarizerCtl->pending_lsn; LWLockRelease(WALSummarizerLock); From 97b0f36bde9a08bc6f004438ff8fc0afbcb418c0 Mon Sep 17 00:00:00 2001 From: Richard Guo Date: Fri, 29 Aug 2025 13:14:12 +0900 Subject: [PATCH 587/602] Fix semijoin unique-ification for child relations For a child relation, we should not assume that its parent's unique-ified relation (or unique-ified path in v18) always exists. In cases where all RHS columns that need to be unique-ified are equated to constants, the unique-ified relation/path for the parent table is not built, as there are no columns left to unique-ify. Failing to account for this can result in a SIGSEGV crash during planning. This patch checks whether the parent's unique-ified relation or path exists and skips unique-ification of the child relation if it does not. Author: Richard Guo Discussion: https://postgr.es/m/CAMbWs49MOdLW2c+qbLHHBt8VBu=4ONpM91D19=AWeW93eFUF6A@mail.gmail.com Backpatch-through: 18 --- src/backend/optimizer/plan/planner.c | 9 +++++ src/test/regress/expected/join.out | 60 ++++++++++++++++++++++++++++ src/test/regress/sql/join.sql | 37 +++++++++++++++++ 3 files changed, 106 insertions(+) diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index a8c8edfac75e3..41bd8353430df 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -8307,6 +8307,15 @@ create_unique_paths(PlannerInfo *root, RelOptInfo *rel, SpecialJoinInfo *sjinfo) if (!(sjinfo->semi_can_btree || sjinfo->semi_can_hash)) return NULL; + /* + * Punt if this is a child relation and we failed to build a unique-ified + * relation for its parent. This can happen if all the RHS columns were + * found to be equated to constants when unique-ifying the parent table, + * leaving no columns to unique-ify. + */ + if (IS_OTHER_REL(rel) && rel->top_parent->unique_rel == NULL) + return NULL; + /* * When called during GEQO join planning, we are in a short-lived memory * context. We must make sure that the unique rel and any subsidiary data diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index b26b8c5bdbedd..04079268b9861 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -6580,6 +6580,66 @@ where exists (select 1 from t t4 1 (1 row) +rollback; +-- check handling of semijoins if all RHS columns are equated to constants: we +-- should suppress unique-ification in this case. +begin; +create temp table t (a int, b int); +insert into t values (1, 2); +explain (costs off) +select * from t t1, t t2 where exists + (select 1 from t t3 where t1.a = t3.a and t2.b = t3.b and t3.a = 1 and t3.b = 2); + QUERY PLAN +--------------------------------------------- + Nested Loop Semi Join + -> Nested Loop + -> Seq Scan on t t1 + Filter: (a = 1) + -> Materialize + -> Seq Scan on t t2 + Filter: (b = 2) + -> Materialize + -> Seq Scan on t t3 + Filter: ((a = 1) AND (b = 2)) +(10 rows) + +select * from t t1, t t2 where exists + (select 1 from t t3 where t1.a = t3.a and t2.b = t3.b and t3.a = 1 and t3.b = 2); + a | b | a | b +---+---+---+--- + 1 | 2 | 1 | 2 +(1 row) + +rollback; +-- check handling of semijoin unique-ification for child relations if all RHS +-- columns are equated to constants. +begin; +create temp table p (a int, b int) partition by range (a); +create temp table p1 partition of p for values from (0) to (10); +create temp table p2 partition of p for values from (10) to (20); +insert into p values (1, 2); +insert into p values (10, 20); +set enable_partitionwise_join to on; +explain (costs off) +select * from p t1 where exists + (select 1 from p t2 where t1.a = t2.a and t1.a = 1); + QUERY PLAN +------------------------------- + Nested Loop Semi Join + -> Seq Scan on p1 t1 + Filter: (a = 1) + -> Materialize + -> Seq Scan on p1 t2 + Filter: (a = 1) +(6 rows) + +select * from p t1 where exists + (select 1 from p t2 where t1.a = t2.a and t1.a = 1); + a | b +---+--- + 1 | 2 +(1 row) + rollback; -- test cases where we can remove a join, but not a PHV computed at it begin; diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index bccd171afb6c3..b1732453e8d3d 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -2453,6 +2453,43 @@ where exists (select 1 from t t4 rollback; +-- check handling of semijoins if all RHS columns are equated to constants: we +-- should suppress unique-ification in this case. +begin; + +create temp table t (a int, b int); +insert into t values (1, 2); + +explain (costs off) +select * from t t1, t t2 where exists + (select 1 from t t3 where t1.a = t3.a and t2.b = t3.b and t3.a = 1 and t3.b = 2); + +select * from t t1, t t2 where exists + (select 1 from t t3 where t1.a = t3.a and t2.b = t3.b and t3.a = 1 and t3.b = 2); + +rollback; + +-- check handling of semijoin unique-ification for child relations if all RHS +-- columns are equated to constants. +begin; + +create temp table p (a int, b int) partition by range (a); +create temp table p1 partition of p for values from (0) to (10); +create temp table p2 partition of p for values from (10) to (20); +insert into p values (1, 2); +insert into p values (10, 20); + +set enable_partitionwise_join to on; + +explain (costs off) +select * from p t1 where exists + (select 1 from p t2 where t1.a = t2.a and t1.a = 1); + +select * from p t1 where exists + (select 1 from p t2 where t1.a = t2.a and t1.a = 1); + +rollback; + -- test cases where we can remove a join, but not a PHV computed at it begin; From 710e6c4301ee3c739a171ea12ed141b1f8df0d93 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Fri, 29 Aug 2025 07:09:02 +0200 Subject: [PATCH 588/602] Remove unneeded casts of BufferGetPage() result MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit BufferGetPage() already returns type Page, so casting it to Page doesn't achieve anything. A sizable number of call sites does this casting; remove that. This was already done inconsistently in the code in the first import in 1996 (but didn't exist in the pre-1995 code), and it was then apparently just copied around. Author: Kirill Reshke Reviewed-by: Chao Li Reviewed-by: Richard Guo Reviewed-by: Álvaro Herrera Reviewed-by: Peter Eisentraut Discussion: https://www.postgresql.org/message-id/flat/CALdSSPgFhc5=vLqHdk-zCcnztC0zEY3EU_Q6a9vPEaw7FkE9Vw@mail.gmail.com --- contrib/amcheck/verify_gin.c | 4 ++-- contrib/bloom/blvacuum.c | 2 +- contrib/pgstattuple/pgstatindex.c | 2 +- contrib/pgstattuple/pgstattuple.c | 4 ++-- src/backend/access/brin/brin_xlog.c | 12 +++++----- src/backend/access/gin/ginvacuum.c | 2 +- src/backend/access/gin/ginxlog.c | 4 ++-- src/backend/access/gist/gist.c | 14 ++++++------ src/backend/access/gist/gistbuild.c | 4 ++-- src/backend/access/gist/gistvacuum.c | 4 ++-- src/backend/access/gist/gistxlog.c | 10 ++++----- src/backend/access/hash/hash_xlog.c | 18 +++++++-------- src/backend/access/heap/heapam.c | 2 +- src/backend/access/heap/heapam_handler.c | 2 +- src/backend/access/heap/heapam_xlog.c | 8 +++---- src/backend/access/heap/pruneheap.c | 2 +- src/backend/access/heap/visibilitymap.c | 2 +- src/backend/access/nbtree/nbtxlog.c | 28 ++++++++++++------------ src/backend/access/spgist/spgvacuum.c | 4 ++-- src/backend/access/spgist/spgxlog.c | 6 ++--- src/backend/access/transam/xlogutils.c | 2 +- src/backend/commands/sequence.c | 2 +- 22 files changed, 69 insertions(+), 69 deletions(-) diff --git a/contrib/amcheck/verify_gin.c b/contrib/amcheck/verify_gin.c index c615d950736f6..5c3eb4d0fd4f4 100644 --- a/contrib/amcheck/verify_gin.c +++ b/contrib/amcheck/verify_gin.c @@ -174,7 +174,7 @@ gin_check_posting_tree_parent_keys_consistency(Relation rel, BlockNumber posting buffer = ReadBufferExtended(rel, MAIN_FORKNUM, stack->blkno, RBM_NORMAL, strategy); LockBuffer(buffer, GIN_SHARE); - page = (Page) BufferGetPage(buffer); + page = BufferGetPage(buffer); Assert(GinPageIsData(page)); @@ -434,7 +434,7 @@ gin_check_parent_keys_consistency(Relation rel, buffer = ReadBufferExtended(rel, MAIN_FORKNUM, stack->blkno, RBM_NORMAL, strategy); LockBuffer(buffer, GIN_SHARE); - page = (Page) BufferGetPage(buffer); + page = BufferGetPage(buffer); maxoff = PageGetMaxOffsetNumber(page); rightlink = GinPageGetOpaque(page)->rightlink; diff --git a/contrib/bloom/blvacuum.c b/contrib/bloom/blvacuum.c index 86b15a75f6fb9..9e5f0574fad2e 100644 --- a/contrib/bloom/blvacuum.c +++ b/contrib/bloom/blvacuum.c @@ -192,7 +192,7 @@ blvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats) buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno, RBM_NORMAL, info->strategy); LockBuffer(buffer, BUFFER_LOCK_SHARE); - page = (Page) BufferGetPage(buffer); + page = BufferGetPage(buffer); if (PageIsNew(page) || BloomPageIsDeleted(page)) { diff --git a/contrib/pgstattuple/pgstatindex.c b/contrib/pgstattuple/pgstatindex.c index 4b9d76ec4e4df..40823d54fcac0 100644 --- a/contrib/pgstattuple/pgstatindex.c +++ b/contrib/pgstattuple/pgstatindex.c @@ -647,7 +647,7 @@ pgstathashindex(PG_FUNCTION_ARGS) buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, bstrategy); LockBuffer(buf, BUFFER_LOCK_SHARE); - page = (Page) BufferGetPage(buf); + page = BufferGetPage(buf); if (PageIsNew(page)) stats.unused_pages++; diff --git a/contrib/pgstattuple/pgstattuple.c b/contrib/pgstattuple/pgstattuple.c index 0d9c2b0b65369..b5de68b7232d2 100644 --- a/contrib/pgstattuple/pgstattuple.c +++ b/contrib/pgstattuple/pgstattuple.c @@ -378,7 +378,7 @@ pgstat_heap(Relation rel, FunctionCallInfo fcinfo) buffer = ReadBufferExtended(rel, MAIN_FORKNUM, block, RBM_NORMAL, hscan->rs_strategy); LockBuffer(buffer, BUFFER_LOCK_SHARE); - stat.free_space += PageGetExactFreeSpace((Page) BufferGetPage(buffer)); + stat.free_space += PageGetExactFreeSpace(BufferGetPage(buffer)); UnlockReleaseBuffer(buffer); block++; } @@ -391,7 +391,7 @@ pgstat_heap(Relation rel, FunctionCallInfo fcinfo) buffer = ReadBufferExtended(rel, MAIN_FORKNUM, block, RBM_NORMAL, hscan->rs_strategy); LockBuffer(buffer, BUFFER_LOCK_SHARE); - stat.free_space += PageGetExactFreeSpace((Page) BufferGetPage(buffer)); + stat.free_space += PageGetExactFreeSpace(BufferGetPage(buffer)); UnlockReleaseBuffer(buffer); block++; } diff --git a/src/backend/access/brin/brin_xlog.c b/src/backend/access/brin/brin_xlog.c index 85db2f0fd5ace..55348140fad3b 100644 --- a/src/backend/access/brin/brin_xlog.c +++ b/src/backend/access/brin/brin_xlog.c @@ -31,7 +31,7 @@ brin_xlog_createidx(XLogReaderState *record) /* create the index' metapage */ buf = XLogInitBufferForRedo(record, 0); Assert(BufferIsValid(buf)); - page = (Page) BufferGetPage(buf); + page = BufferGetPage(buf); brin_metapage_init(page, xlrec->pagesPerRange, xlrec->version); PageSetLSN(page, lsn); MarkBufferDirty(buf); @@ -82,7 +82,7 @@ brin_xlog_insert_update(XLogReaderState *record, Assert(tuple->bt_blkno == xlrec->heapBlk); - page = (Page) BufferGetPage(buffer); + page = BufferGetPage(buffer); offnum = xlrec->offnum; if (PageGetMaxOffsetNumber(page) + 1 < offnum) elog(PANIC, "brin_xlog_insert_update: invalid max offset number"); @@ -104,7 +104,7 @@ brin_xlog_insert_update(XLogReaderState *record, ItemPointerData tid; ItemPointerSet(&tid, regpgno, xlrec->offnum); - page = (Page) BufferGetPage(buffer); + page = BufferGetPage(buffer); brinSetHeapBlockItemptr(buffer, xlrec->pagesPerRange, xlrec->heapBlk, tid); @@ -146,7 +146,7 @@ brin_xlog_update(XLogReaderState *record) Page page; OffsetNumber offnum; - page = (Page) BufferGetPage(buffer); + page = BufferGetPage(buffer); offnum = xlrec->oldOffnum; @@ -185,7 +185,7 @@ brin_xlog_samepage_update(XLogReaderState *record) brintuple = (BrinTuple *) XLogRecGetBlockData(record, 0, &tuplen); - page = (Page) BufferGetPage(buffer); + page = BufferGetPage(buffer); offnum = xlrec->offnum; @@ -254,7 +254,7 @@ brin_xlog_revmap_extend(XLogReaderState *record) */ buf = XLogInitBufferForRedo(record, 1); - page = (Page) BufferGetPage(buf); + page = BufferGetPage(buf); brin_page_init(page, BRIN_PAGETYPE_REVMAP); PageSetLSN(page, lsn); diff --git a/src/backend/access/gin/ginvacuum.c b/src/backend/access/gin/ginvacuum.c index fbbe3a6dd7046..2d833d6d61875 100644 --- a/src/backend/access/gin/ginvacuum.c +++ b/src/backend/access/gin/ginvacuum.c @@ -753,7 +753,7 @@ ginvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats) buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno, RBM_NORMAL, info->strategy); LockBuffer(buffer, GIN_SHARE); - page = (Page) BufferGetPage(buffer); + page = BufferGetPage(buffer); if (GinPageIsRecyclable(page)) { diff --git a/src/backend/access/gin/ginxlog.c b/src/backend/access/gin/ginxlog.c index 55a1ec09776ba..4478e92820494 100644 --- a/src/backend/access/gin/ginxlog.c +++ b/src/backend/access/gin/ginxlog.c @@ -30,7 +30,7 @@ ginRedoClearIncompleteSplit(XLogReaderState *record, uint8 block_id) if (XLogReadBufferForRedo(record, block_id, &buffer) == BLK_NEEDS_REDO) { - page = (Page) BufferGetPage(buffer); + page = BufferGetPage(buffer); GinPageGetOpaque(page)->flags &= ~GIN_INCOMPLETE_SPLIT; PageSetLSN(page, lsn); @@ -50,7 +50,7 @@ ginRedoCreatePTree(XLogReaderState *record) Page page; buffer = XLogInitBufferForRedo(record, 0); - page = (Page) BufferGetPage(buffer); + page = BufferGetPage(buffer); GinInitBuffer(buffer, GIN_DATA | GIN_LEAF | GIN_COMPRESSED); diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c index 7b24380c97801..a96796d5c7da9 100644 --- a/src/backend/access/gist/gist.c +++ b/src/backend/access/gist/gist.c @@ -696,7 +696,7 @@ gistdoinsert(Relation r, IndexTuple itup, Size freespace, gistcheckpage(state.r, stack->buffer); } - stack->page = (Page) BufferGetPage(stack->buffer); + stack->page = BufferGetPage(stack->buffer); stack->lsn = xlocked ? PageGetLSN(stack->page) : BufferGetLSNAtomic(stack->buffer); Assert(!RelationNeedsWAL(state.r) || !XLogRecPtrIsInvalid(stack->lsn)); @@ -783,7 +783,7 @@ gistdoinsert(Relation r, IndexTuple itup, Size freespace, LockBuffer(stack->buffer, GIST_UNLOCK); LockBuffer(stack->buffer, GIST_EXCLUSIVE); xlocked = true; - stack->page = (Page) BufferGetPage(stack->buffer); + stack->page = BufferGetPage(stack->buffer); if (PageGetLSN(stack->page) != stack->lsn) { @@ -847,7 +847,7 @@ gistdoinsert(Relation r, IndexTuple itup, Size freespace, LockBuffer(stack->buffer, GIST_UNLOCK); LockBuffer(stack->buffer, GIST_EXCLUSIVE); xlocked = true; - stack->page = (Page) BufferGetPage(stack->buffer); + stack->page = BufferGetPage(stack->buffer); stack->lsn = PageGetLSN(stack->page); if (stack->blkno == GIST_ROOT_BLKNO) @@ -938,7 +938,7 @@ gistFindPath(Relation r, BlockNumber child, OffsetNumber *downlinkoffnum) buffer = ReadBuffer(r, top->blkno); LockBuffer(buffer, GIST_SHARE); gistcheckpage(r, buffer); - page = (Page) BufferGetPage(buffer); + page = BufferGetPage(buffer); if (GistPageIsLeaf(page)) { @@ -1033,7 +1033,7 @@ gistFindCorrectParent(Relation r, GISTInsertStack *child, bool is_build) GISTInsertStack *ptr; gistcheckpage(r, parent->buffer); - parent->page = (Page) BufferGetPage(parent->buffer); + parent->page = BufferGetPage(parent->buffer); maxoff = PageGetMaxOffsetNumber(parent->page); /* Check if the downlink is still where it was before */ @@ -1098,7 +1098,7 @@ gistFindCorrectParent(Relation r, GISTInsertStack *child, bool is_build) parent->buffer = ReadBuffer(r, parent->blkno); LockBuffer(parent->buffer, GIST_EXCLUSIVE); gistcheckpage(r, parent->buffer); - parent->page = (Page) BufferGetPage(parent->buffer); + parent->page = BufferGetPage(parent->buffer); } /* @@ -1121,7 +1121,7 @@ gistFindCorrectParent(Relation r, GISTInsertStack *child, bool is_build) while (ptr) { ptr->buffer = ReadBuffer(r, ptr->blkno); - ptr->page = (Page) BufferGetPage(ptr->buffer); + ptr->page = BufferGetPage(ptr->buffer); ptr = ptr->parent; } diff --git a/src/backend/access/gist/gistbuild.c b/src/backend/access/gist/gistbuild.c index 9e707167d984b..9b2ec9815f17e 100644 --- a/src/backend/access/gist/gistbuild.c +++ b/src/backend/access/gist/gistbuild.c @@ -969,7 +969,7 @@ gistProcessItup(GISTBuildState *buildstate, IndexTuple itup, buffer = ReadBuffer(indexrel, blkno); LockBuffer(buffer, GIST_EXCLUSIVE); - page = (Page) BufferGetPage(buffer); + page = BufferGetPage(buffer); childoffnum = gistchoose(indexrel, page, itup, giststate); iid = PageGetItemId(page, childoffnum); idxtuple = (IndexTuple) PageGetItem(page, iid); @@ -1448,7 +1448,7 @@ gistGetMaxLevel(Relation index) * pro forma. */ LockBuffer(buffer, GIST_SHARE); - page = (Page) BufferGetPage(buffer); + page = BufferGetPage(buffer); if (GistPageIsLeaf(page)) { diff --git a/src/backend/access/gist/gistvacuum.c b/src/backend/access/gist/gistvacuum.c index dca236b6e5735..b925eda2b9b42 100644 --- a/src/backend/access/gist/gistvacuum.c +++ b/src/backend/access/gist/gistvacuum.c @@ -330,7 +330,7 @@ gistvacuumpage(GistVacState *vstate, Buffer buffer) * exclusive lock. */ LockBuffer(buffer, GIST_EXCLUSIVE); - page = (Page) BufferGetPage(buffer); + page = BufferGetPage(buffer); if (gistPageRecyclable(page)) { @@ -528,7 +528,7 @@ gistvacuum_delete_empty_pages(IndexVacuumInfo *info, GistVacState *vstate) RBM_NORMAL, info->strategy); LockBuffer(buffer, GIST_SHARE); - page = (Page) BufferGetPage(buffer); + page = BufferGetPage(buffer); if (PageIsNew(page) || GistPageIsDeleted(page) || GistPageIsLeaf(page)) { diff --git a/src/backend/access/gist/gistxlog.c b/src/backend/access/gist/gistxlog.c index b354e4ba5d1b7..42fee1f0764fa 100644 --- a/src/backend/access/gist/gistxlog.c +++ b/src/backend/access/gist/gistxlog.c @@ -83,7 +83,7 @@ gistRedoPageUpdateRecord(XLogReaderState *record) data = begin = XLogRecGetBlockData(record, 0, &datalen); - page = (Page) BufferGetPage(buffer); + page = BufferGetPage(buffer); if (xldata->ntodelete == 1 && xldata->ntoinsert == 1) { @@ -201,7 +201,7 @@ gistRedoDeleteRecord(XLogReaderState *record) if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO) { - page = (Page) BufferGetPage(buffer); + page = BufferGetPage(buffer); PageIndexMultiDelete(page, toDelete, xldata->ntodelete); @@ -280,7 +280,7 @@ gistRedoPageSplitRecord(XLogReaderState *record) } buffer = XLogInitBufferForRedo(record, i + 1); - page = (Page) BufferGetPage(buffer); + page = BufferGetPage(buffer); data = XLogRecGetBlockData(record, i + 1, &datalen); tuples = decodePageSplitRecord(data, datalen, &num); @@ -348,7 +348,7 @@ gistRedoPageDelete(XLogReaderState *record) if (XLogReadBufferForRedo(record, 0, &leafBuffer) == BLK_NEEDS_REDO) { - Page page = (Page) BufferGetPage(leafBuffer); + Page page = BufferGetPage(leafBuffer); GistPageSetDeleted(page, xldata->deleteXid); @@ -358,7 +358,7 @@ gistRedoPageDelete(XLogReaderState *record) if (XLogReadBufferForRedo(record, 1, &parentBuffer) == BLK_NEEDS_REDO) { - Page page = (Page) BufferGetPage(parentBuffer); + Page page = BufferGetPage(parentBuffer); PageIndexTupleDelete(page, xldata->downlinkOffset); diff --git a/src/backend/access/hash/hash_xlog.c b/src/backend/access/hash/hash_xlog.c index 8d97067fe5403..d963a0c370292 100644 --- a/src/backend/access/hash/hash_xlog.c +++ b/src/backend/access/hash/hash_xlog.c @@ -38,7 +38,7 @@ hash_xlog_init_meta_page(XLogReaderState *record) Assert(BufferIsValid(metabuf)); _hash_init_metabuffer(metabuf, xlrec->num_tuples, xlrec->procid, xlrec->ffactor, true); - page = (Page) BufferGetPage(metabuf); + page = BufferGetPage(metabuf); PageSetLSN(page, lsn); MarkBufferDirty(metabuf); @@ -235,7 +235,7 @@ hash_xlog_add_ovfl_page(XLogReaderState *record) if (XLogReadBufferForRedo(record, 2, &mapbuffer) == BLK_NEEDS_REDO) { - Page mappage = (Page) BufferGetPage(mapbuffer); + Page mappage = BufferGetPage(mapbuffer); uint32 *freep = NULL; uint32 *bitmap_page_bit; @@ -538,7 +538,7 @@ hash_xlog_move_page_contents(XLogReaderState *record) data = begin = XLogRecGetBlockData(record, 1, &datalen); - writepage = (Page) BufferGetPage(writebuf); + writepage = BufferGetPage(writebuf); if (xldata->ntups > 0) { @@ -584,7 +584,7 @@ hash_xlog_move_page_contents(XLogReaderState *record) ptr = XLogRecGetBlockData(record, 2, &len); - page = (Page) BufferGetPage(deletebuf); + page = BufferGetPage(deletebuf); if (len > 0) { @@ -670,7 +670,7 @@ hash_xlog_squeeze_page(XLogReaderState *record) data = begin = XLogRecGetBlockData(record, 1, &datalen); - writepage = (Page) BufferGetPage(writebuf); + writepage = BufferGetPage(writebuf); if (xldata->ntups > 0) { @@ -807,7 +807,7 @@ hash_xlog_squeeze_page(XLogReaderState *record) /* replay the record for bitmap page */ if (XLogReadBufferForRedo(record, 5, &mapbuf) == BLK_NEEDS_REDO) { - Page mappage = (Page) BufferGetPage(mapbuf); + Page mappage = BufferGetPage(mapbuf); uint32 *freep = NULL; char *data; uint32 *bitmap_page_bit; @@ -895,7 +895,7 @@ hash_xlog_delete(XLogReaderState *record) ptr = XLogRecGetBlockData(record, 1, &len); - page = (Page) BufferGetPage(deletebuf); + page = BufferGetPage(deletebuf); if (len > 0) { @@ -946,7 +946,7 @@ hash_xlog_split_cleanup(XLogReaderState *record) { HashPageOpaque bucket_opaque; - page = (Page) BufferGetPage(buffer); + page = BufferGetPage(buffer); bucket_opaque = HashPageGetOpaque(page); bucket_opaque->hasho_flag &= ~LH_BUCKET_NEEDS_SPLIT_CLEANUP; @@ -1029,7 +1029,7 @@ hash_xlog_vacuum_one_page(XLogReaderState *record) if (action == BLK_NEEDS_REDO) { - page = (Page) BufferGetPage(buffer); + page = BufferGetPage(buffer); PageIndexMultiDelete(page, toDelete, xldata->ntuples); diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 7491cc3cb93bc..e3e7307ef5f79 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -6099,7 +6099,7 @@ heap_finish_speculative(Relation relation, ItemPointer tid) buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid)); LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); - page = (Page) BufferGetPage(buffer); + page = BufferGetPage(buffer); offnum = ItemPointerGetOffsetNumber(tid); if (PageGetMaxOffsetNumber(page) >= offnum) diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c index cb4bc35c93ed4..bcbac844bb669 100644 --- a/src/backend/access/heap/heapam_handler.c +++ b/src/backend/access/heap/heapam_handler.c @@ -2280,7 +2280,7 @@ heapam_scan_sample_next_tuple(TableScanDesc scan, SampleScanState *scanstate, if (!pagemode) LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE); - page = (Page) BufferGetPage(hscan->rs_cbuf); + page = BufferGetPage(hscan->rs_cbuf); all_visible = PageIsAllVisible(page) && !scan->rs_snapshot->takenDuringRecovery; maxoffset = PageGetMaxOffsetNumber(page); diff --git a/src/backend/access/heap/heapam_xlog.c b/src/backend/access/heap/heapam_xlog.c index eb4bd3d6ae3a3..5d48f071f53a7 100644 --- a/src/backend/access/heap/heapam_xlog.c +++ b/src/backend/access/heap/heapam_xlog.c @@ -78,7 +78,7 @@ heap_xlog_prune_freeze(XLogReaderState *record) &buffer); if (action == BLK_NEEDS_REDO) { - Page page = (Page) BufferGetPage(buffer); + Page page = BufferGetPage(buffer); OffsetNumber *redirected; OffsetNumber *nowdead; OffsetNumber *nowunused; @@ -599,7 +599,7 @@ heap_xlog_multi_insert(XLogReaderState *record) tupdata = XLogRecGetBlockData(record, 0, &len); endptr = tupdata + len; - page = (Page) BufferGetPage(buffer); + page = BufferGetPage(buffer); for (i = 0; i < xlrec->ntuples; i++) { @@ -801,7 +801,7 @@ heap_xlog_update(XLogReaderState *record, bool hot_update) else if (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE) { nbuffer = XLogInitBufferForRedo(record, 0); - page = (Page) BufferGetPage(nbuffer); + page = BufferGetPage(nbuffer); PageInit(page, BufferGetPageSize(nbuffer), 0); newaction = BLK_NEEDS_REDO; } @@ -1027,7 +1027,7 @@ heap_xlog_lock(XLogReaderState *record) if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO) { - page = (Page) BufferGetPage(buffer); + page = BufferGetPage(buffer); offnum = xlrec->offnum; if (PageGetMaxOffsetNumber(page) >= offnum) diff --git a/src/backend/access/heap/pruneheap.c b/src/backend/access/heap/pruneheap.c index a8025889be088..7ebd22f00a370 100644 --- a/src/backend/access/heap/pruneheap.c +++ b/src/backend/access/heap/pruneheap.c @@ -1563,7 +1563,7 @@ heap_page_prune_execute(Buffer buffer, bool lp_truncate_only, OffsetNumber *nowdead, int ndead, OffsetNumber *nowunused, int nunused) { - Page page = (Page) BufferGetPage(buffer); + Page page = BufferGetPage(buffer); OffsetNumber *offnum; HeapTupleHeader htup PG_USED_FOR_ASSERTS_ONLY; diff --git a/src/backend/access/heap/visibilitymap.c b/src/backend/access/heap/visibilitymap.c index 8f918e00af7ed..953ad4a484399 100644 --- a/src/backend/access/heap/visibilitymap.c +++ b/src/backend/access/heap/visibilitymap.c @@ -259,7 +259,7 @@ visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf, #endif Assert(InRecovery || XLogRecPtrIsInvalid(recptr)); - Assert(InRecovery || PageIsAllVisible((Page) BufferGetPage(heapBuf))); + Assert(InRecovery || PageIsAllVisible(BufferGetPage(heapBuf))); Assert((flags & VISIBILITYMAP_VALID_BITS) == flags); /* Must never set all_frozen bit without also setting all_visible bit */ diff --git a/src/backend/access/nbtree/nbtxlog.c b/src/backend/access/nbtree/nbtxlog.c index d31dd56732d2f..69ea668bb0d92 100644 --- a/src/backend/access/nbtree/nbtxlog.c +++ b/src/backend/access/nbtree/nbtxlog.c @@ -143,7 +143,7 @@ _bt_clear_incomplete_split(XLogReaderState *record, uint8 block_id) if (XLogReadBufferForRedo(record, block_id, &buf) == BLK_NEEDS_REDO) { - Page page = (Page) BufferGetPage(buf); + Page page = BufferGetPage(buf); BTPageOpaque pageop = BTPageGetOpaque(page); Assert(P_INCOMPLETE_SPLIT(pageop)); @@ -287,7 +287,7 @@ btree_xlog_split(bool newitemonleft, XLogReaderState *record) /* Reconstruct right (new) sibling page from scratch */ rbuf = XLogInitBufferForRedo(record, 1); datapos = XLogRecGetBlockData(record, 1, &datalen); - rpage = (Page) BufferGetPage(rbuf); + rpage = BufferGetPage(rbuf); _bt_pageinit(rpage, BufferGetPageSize(rbuf)); ropaque = BTPageGetOpaque(rpage); @@ -314,7 +314,7 @@ btree_xlog_split(bool newitemonleft, XLogReaderState *record) * checking possible. See also _bt_restore_page(), which does the * same for the right page. */ - Page origpage = (Page) BufferGetPage(buf); + Page origpage = BufferGetPage(buf); BTPageOpaque oopaque = BTPageGetOpaque(origpage); OffsetNumber off; IndexTuple newitem = NULL, @@ -439,7 +439,7 @@ btree_xlog_split(bool newitemonleft, XLogReaderState *record) if (XLogReadBufferForRedo(record, 2, &sbuf) == BLK_NEEDS_REDO) { - Page spage = (Page) BufferGetPage(sbuf); + Page spage = BufferGetPage(sbuf); BTPageOpaque spageop = BTPageGetOpaque(spage); spageop->btpo_prev = rightpagenumber; @@ -470,7 +470,7 @@ btree_xlog_dedup(XLogReaderState *record) if (XLogReadBufferForRedo(record, 0, &buf) == BLK_NEEDS_REDO) { char *ptr = XLogRecGetBlockData(record, 0, NULL); - Page page = (Page) BufferGetPage(buf); + Page page = BufferGetPage(buf); BTPageOpaque opaque = BTPageGetOpaque(page); OffsetNumber offnum, minoff, @@ -614,7 +614,7 @@ btree_xlog_vacuum(XLogReaderState *record) { char *ptr = XLogRecGetBlockData(record, 0, NULL); - page = (Page) BufferGetPage(buffer); + page = BufferGetPage(buffer); if (xlrec->nupdated > 0) { @@ -680,7 +680,7 @@ btree_xlog_delete(XLogReaderState *record) { char *ptr = XLogRecGetBlockData(record, 0, NULL); - page = (Page) BufferGetPage(buffer); + page = BufferGetPage(buffer); if (xlrec->nupdated > 0) { @@ -740,7 +740,7 @@ btree_xlog_mark_page_halfdead(uint8 info, XLogReaderState *record) OffsetNumber nextoffset; BlockNumber rightsib; - page = (Page) BufferGetPage(buffer); + page = BufferGetPage(buffer); pageop = BTPageGetOpaque(page); poffset = xlrec->poffset; @@ -769,7 +769,7 @@ btree_xlog_mark_page_halfdead(uint8 info, XLogReaderState *record) /* Rewrite the leaf page as a halfdead page */ buffer = XLogInitBufferForRedo(record, 0); - page = (Page) BufferGetPage(buffer); + page = BufferGetPage(buffer); _bt_pageinit(page, BufferGetPageSize(buffer)); pageop = BTPageGetOpaque(page); @@ -836,7 +836,7 @@ btree_xlog_unlink_page(uint8 info, XLogReaderState *record) { if (XLogReadBufferForRedo(record, 1, &leftbuf) == BLK_NEEDS_REDO) { - page = (Page) BufferGetPage(leftbuf); + page = BufferGetPage(leftbuf); pageop = BTPageGetOpaque(page); pageop->btpo_next = rightsib; @@ -849,7 +849,7 @@ btree_xlog_unlink_page(uint8 info, XLogReaderState *record) /* Rewrite target page as empty deleted page */ target = XLogInitBufferForRedo(record, 0); - page = (Page) BufferGetPage(target); + page = BufferGetPage(target); _bt_pageinit(page, BufferGetPageSize(target)); pageop = BTPageGetOpaque(page); @@ -868,7 +868,7 @@ btree_xlog_unlink_page(uint8 info, XLogReaderState *record) /* Fix left-link of right sibling */ if (XLogReadBufferForRedo(record, 2, &rightbuf) == BLK_NEEDS_REDO) { - page = (Page) BufferGetPage(rightbuf); + page = BufferGetPage(rightbuf); pageop = BTPageGetOpaque(page); pageop->btpo_prev = leftsib; @@ -907,7 +907,7 @@ btree_xlog_unlink_page(uint8 info, XLogReaderState *record) Assert(!isleaf); leafbuf = XLogInitBufferForRedo(record, 3); - page = (Page) BufferGetPage(leafbuf); + page = BufferGetPage(leafbuf); _bt_pageinit(page, BufferGetPageSize(leafbuf)); pageop = BTPageGetOpaque(page); @@ -949,7 +949,7 @@ btree_xlog_newroot(XLogReaderState *record) Size len; buffer = XLogInitBufferForRedo(record, 0); - page = (Page) BufferGetPage(buffer); + page = BufferGetPage(buffer); _bt_pageinit(page, BufferGetPageSize(buffer)); pageop = BTPageGetOpaque(page); diff --git a/src/backend/access/spgist/spgvacuum.c b/src/backend/access/spgist/spgvacuum.c index 2678f7ab7829a..8f8a1ad7796aa 100644 --- a/src/backend/access/spgist/spgvacuum.c +++ b/src/backend/access/spgist/spgvacuum.c @@ -626,7 +626,7 @@ spgvacuumpage(spgBulkDeleteState *bds, Buffer buffer) Page page; LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); - page = (Page) BufferGetPage(buffer); + page = BufferGetPage(buffer); if (PageIsNew(page)) { @@ -707,7 +707,7 @@ spgprocesspending(spgBulkDeleteState *bds) buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno, RBM_NORMAL, bds->info->strategy); LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); - page = (Page) BufferGetPage(buffer); + page = BufferGetPage(buffer); if (PageIsNew(page) || SpGistPageIsDeleted(page)) { diff --git a/src/backend/access/spgist/spgxlog.c b/src/backend/access/spgist/spgxlog.c index b7986e6f7131e..d4620c915d0cc 100644 --- a/src/backend/access/spgist/spgxlog.c +++ b/src/backend/access/spgist/spgxlog.c @@ -576,7 +576,7 @@ spgRedoPickSplit(XLogReaderState *record) { /* just re-init the source page */ srcBuffer = XLogInitBufferForRedo(record, 0); - srcPage = (Page) BufferGetPage(srcBuffer); + srcPage = BufferGetPage(srcBuffer); SpGistInitBuffer(srcBuffer, SPGIST_LEAF | (xldata->storesNulls ? SPGIST_NULLS : 0)); @@ -629,7 +629,7 @@ spgRedoPickSplit(XLogReaderState *record) { /* just re-init the dest page */ destBuffer = XLogInitBufferForRedo(record, 1); - destPage = (Page) BufferGetPage(destBuffer); + destPage = BufferGetPage(destBuffer); SpGistInitBuffer(destBuffer, SPGIST_LEAF | (xldata->storesNulls ? SPGIST_NULLS : 0)); @@ -642,7 +642,7 @@ spgRedoPickSplit(XLogReaderState *record) * full-page-image case, but for safety let's hold it till later. */ if (XLogReadBufferForRedo(record, 1, &destBuffer) == BLK_NEEDS_REDO) - destPage = (Page) BufferGetPage(destBuffer); + destPage = BufferGetPage(destBuffer); else destPage = NULL; /* don't do any page updates */ } diff --git a/src/backend/access/transam/xlogutils.c b/src/backend/access/transam/xlogutils.c index 27ea52fdfee66..38176d9688e4c 100644 --- a/src/backend/access/transam/xlogutils.c +++ b/src/backend/access/transam/xlogutils.c @@ -523,7 +523,7 @@ XLogReadBufferExtended(RelFileLocator rlocator, ForkNumber forknum, if (mode == RBM_NORMAL) { /* check that page has been initialized */ - Page page = (Page) BufferGetPage(buffer); + Page page = BufferGetPage(buffer); /* * We assume that PageIsNew is safe without a lock. During recovery, diff --git a/src/backend/commands/sequence.c b/src/backend/commands/sequence.c index a3c8cff97b03b..636d3c3ec737b 100644 --- a/src/backend/commands/sequence.c +++ b/src/backend/commands/sequence.c @@ -1920,7 +1920,7 @@ seq_redo(XLogReaderState *record) elog(PANIC, "seq_redo: unknown op code %u", info); buffer = XLogInitBufferForRedo(record, 0); - page = (Page) BufferGetPage(buffer); + page = BufferGetPage(buffer); /* * We always reinit the page. However, since this WAL record type is also From 991295f387a6a453fe061831bcc36294989fe77b Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Fri, 29 Aug 2025 07:33:50 +0200 Subject: [PATCH 589/602] Mark ItemPointer arguments as const in tuple/table lock functions The functions LockTuple, ConditionalLockTuple, UnlockTuple, and XactLockTableWait take an ItemPointer argument that they do not modify, so the argument can be const-qualified to better convey intent and allow the compiler to enforce immutability. Author: Chao Li Discussion: https://www.postgresql.org/message-id/flat/CAEoWx2m9e4rECHBwpRE4%2BGCH%2BpbYZXLh2f4rB1Du5hDfKug%2BOg%40mail.gmail.com --- src/backend/storage/lmgr/lmgr.c | 10 +++++----- src/include/storage/lmgr.h | 8 ++++---- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/backend/storage/lmgr/lmgr.c b/src/backend/storage/lmgr/lmgr.c index 3f6bf70bd3c24..4798eb7900379 100644 --- a/src/backend/storage/lmgr/lmgr.c +++ b/src/backend/storage/lmgr/lmgr.c @@ -55,7 +55,7 @@ typedef struct XactLockTableWaitInfo { XLTW_Oper oper; Relation rel; - ItemPointer ctid; + const ItemPointerData *ctid; } XactLockTableWaitInfo; static void XactLockTableWaitErrorCb(void *arg); @@ -559,7 +559,7 @@ UnlockPage(Relation relation, BlockNumber blkno, LOCKMODE lockmode) * tuple. See heap_lock_tuple before using this! */ void -LockTuple(Relation relation, ItemPointer tid, LOCKMODE lockmode) +LockTuple(Relation relation, const ItemPointerData *tid, LOCKMODE lockmode) { LOCKTAG tag; @@ -579,7 +579,7 @@ LockTuple(Relation relation, ItemPointer tid, LOCKMODE lockmode) * Returns true iff the lock was acquired. */ bool -ConditionalLockTuple(Relation relation, ItemPointer tid, LOCKMODE lockmode, +ConditionalLockTuple(Relation relation, const ItemPointerData *tid, LOCKMODE lockmode, bool logLockFailure) { LOCKTAG tag; @@ -598,7 +598,7 @@ ConditionalLockTuple(Relation relation, ItemPointer tid, LOCKMODE lockmode, * UnlockTuple */ void -UnlockTuple(Relation relation, ItemPointer tid, LOCKMODE lockmode) +UnlockTuple(Relation relation, const ItemPointerData *tid, LOCKMODE lockmode) { LOCKTAG tag; @@ -660,7 +660,7 @@ XactLockTableDelete(TransactionId xid) * and if so wait for its parent. */ void -XactLockTableWait(TransactionId xid, Relation rel, ItemPointer ctid, +XactLockTableWait(TransactionId xid, Relation rel, const ItemPointerData *ctid, XLTW_Oper oper) { LOCKTAG tag; diff --git a/src/include/storage/lmgr.h b/src/include/storage/lmgr.h index 58eee4e0d54c2..b7abd18397d62 100644 --- a/src/include/storage/lmgr.h +++ b/src/include/storage/lmgr.h @@ -71,16 +71,16 @@ extern bool ConditionalLockPage(Relation relation, BlockNumber blkno, LOCKMODE l extern void UnlockPage(Relation relation, BlockNumber blkno, LOCKMODE lockmode); /* Lock a tuple (see heap_lock_tuple before assuming you understand this) */ -extern void LockTuple(Relation relation, ItemPointer tid, LOCKMODE lockmode); -extern bool ConditionalLockTuple(Relation relation, ItemPointer tid, +extern void LockTuple(Relation relation, const ItemPointerData *tid, LOCKMODE lockmode); +extern bool ConditionalLockTuple(Relation relation, const ItemPointerData *tid, LOCKMODE lockmode, bool logLockFailure); -extern void UnlockTuple(Relation relation, ItemPointer tid, LOCKMODE lockmode); +extern void UnlockTuple(Relation relation, const ItemPointerData *tid, LOCKMODE lockmode); /* Lock an XID (used to wait for a transaction to finish) */ extern void XactLockTableInsert(TransactionId xid); extern void XactLockTableDelete(TransactionId xid); extern void XactLockTableWait(TransactionId xid, Relation rel, - ItemPointer ctid, XLTW_Oper oper); + const ItemPointerData *ctid, XLTW_Oper oper); extern bool ConditionalXactLockTableWait(TransactionId xid, bool logLockFailure); From 664e0d678965719fe0d385aa3a8f2777058b119b Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Fri, 29 Aug 2025 09:00:29 +0200 Subject: [PATCH 590/602] headerscheck: Use ICU_CFLAGS Otherwise, headerscheck will fail if the ICU headers are in a location not reached by the normal CFLAGS/CPPFLAGS: ../src/include/utils/pg_locale.h:21:10: fatal error: unicode/ucol.h: No such file or directory Reviewed-by: Tom Lane Discussion: https://www.postgresql.org/message-id/flat/1127775.1754417387%40sss.pgh.pa.us --- src/tools/pginclude/headerscheck | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/tools/pginclude/headerscheck b/src/tools/pginclude/headerscheck index 9e86d0493621e..30afa96730592 100755 --- a/src/tools/pginclude/headerscheck +++ b/src/tools/pginclude/headerscheck @@ -44,6 +44,7 @@ CXXFLAGS=${CXXFLAGS:- -fsyntax-only -Wall} MGLOB="$builddir/src/Makefile.global" CPPFLAGS=`sed -n 's/^CPPFLAGS[ ]*=[ ]*//p' "$MGLOB"` CFLAGS=`sed -n 's/^CFLAGS[ ]*=[ ]*//p' "$MGLOB"` +ICU_CFLAGS=`sed -n 's/^ICU_CFLAGS[ ]*=[ ]*//p' "$MGLOB"` CC=`sed -n 's/^CC[ ]*=[ ]*//p' "$MGLOB"` CXX=`sed -n 's/^CXX[ ]*=[ ]*//p' "$MGLOB"` PG_SYSROOT=`sed -n 's/^PG_SYSROOT[ ]*=[ ]*//p' "$MGLOB"` @@ -64,11 +65,11 @@ if $cplusplus; then -I*|-D*) CXXPPFLAGS="$CXXPPFLAGS $flag";; esac done - COMPILER_FLAGS="$CXXPPFLAGS $CXXFLAGS" + COMPILER_FLAGS="$CXXPPFLAGS $CXXFLAGS $ICU_CFLAGS" else ext=c COMPILER=${CC:-gcc} - COMPILER_FLAGS="$CPPFLAGS $CFLAGS" + COMPILER_FLAGS="$CPPFLAGS $CFLAGS $ICU_CFLAGS" fi # Create temp directory. From da0413373c092295d89d4523be6a956c34d39540 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Fri, 29 Aug 2025 09:00:29 +0200 Subject: [PATCH 591/602] headerscheck: Ignore Windows-specific header Ignore src/include/port/win32/sys/resource.h. At least on macOS, including this results in warnings and errors because of duplication with system headers: ../src/include/port/win32/sys/resource.h:10:9: warning: 'RUSAGE_CHILDREN' redefined ../src/include/port/win32/sys/resource.h:16:1: error: redefinition of struct or union 'struct rusage' Since we are also not checking similar system-replacement headers for Windows, it makes sense to exclude this one, too. Reviewed-by: Tom Lane Discussion: https://www.postgresql.org/message-id/flat/1127775.1754417387%40sss.pgh.pa.us --- src/tools/pginclude/headerscheck | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tools/pginclude/headerscheck b/src/tools/pginclude/headerscheck index 30afa96730592..17138a7569e4f 100755 --- a/src/tools/pginclude/headerscheck +++ b/src/tools/pginclude/headerscheck @@ -98,7 +98,7 @@ do # Additional Windows-specific headers. test "$f" = src/include/port/win32_port.h && continue test "$f" = src/include/port/win32/netdb.h && continue - $cplusplus && test "$f" = src/include/port/win32/sys/resource.h && continue + test "$f" = src/include/port/win32/sys/resource.h && continue test "$f" = src/include/port/win32/sys/socket.h && continue test "$f" = src/include/port/win32_msvc/dirent.h && continue test "$f" = src/include/port/win32_msvc/utime.h && continue From f5d07085822a144afb169d0f422e25689cb6209f Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Fri, 29 Aug 2025 09:30:50 +0200 Subject: [PATCH 592/602] headerscheck: Document that --with-llvm is required We already documented that other --with-* options are required for a successful run. It turns out --with-llvm is also required. Suggested-by: Tom Lane Discussion: https://www.postgresql.org/message-id/flat/1127775.1754417387%40sss.pgh.pa.us --- src/tools/pginclude/README | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/tools/pginclude/README b/src/tools/pginclude/README index 2f8fe6b78baa5..944bcb01c64dc 100644 --- a/src/tools/pginclude/README +++ b/src/tools/pginclude/README @@ -55,8 +55,12 @@ and are skipped by the headerscheck script. The easy way to run the script is to say "make -s headerscheck" in the top-level build directory after completing a build. You should -have included "--with-perl --with-python" in your configure options, -else you're likely to get errors about related headers not being found. +have included at least + + --with-llvm --with-perl --with-python + +in your configure options, else you're likely to get errors about +related headers not being found. A limitation of the current script is that it doesn't know exactly which headers are for frontend or backend; when in doubt it uses postgres.h as @@ -78,8 +82,12 @@ and are skipped by the script in the --cplusplus mode. The easy way to run the script is to say "make -s cpluspluscheck" in the top-level build directory after completing a build. You should -have included "--with-perl --with-python" in your configure options, -else you're likely to get errors about related headers not being found. +have included at least + + --with-llvm --with-perl --with-python + +in your configure options, else you're likely to get errors about +related headers not being found. If you are using a non-g++-compatible C++ compiler, you may need to override the script's CXXFLAGS setting by setting a suitable environment From 5d7f58848ce59d9e09b7214d2541ab3ec853f89c Mon Sep 17 00:00:00 2001 From: Daniel Gustafsson Date: Fri, 29 Aug 2025 13:08:32 +0200 Subject: [PATCH 593/602] Fix typo in isolation test spec Replace 'committs' with 'commits'. Author: Chao Li Reviewed-by: Ashutosh Bapat Discussion: https://postgr.es/m/CAEoWx2=BESkfXsZ9jQW+1NcGTazKuj2wEXsPm1_EpgzHs0BHDQ@mail.gmail.com --- src/test/isolation/specs/eval-plan-qual-trigger.spec | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/test/isolation/specs/eval-plan-qual-trigger.spec b/src/test/isolation/specs/eval-plan-qual-trigger.spec index b512edd28798a..232b3e27652a1 100644 --- a/src/test/isolation/specs/eval-plan-qual-trigger.spec +++ b/src/test/isolation/specs/eval-plan-qual-trigger.spec @@ -336,7 +336,7 @@ permutation s1_trig_rep_b_u s1_trig_rep_a_u s1_ins_a s1_ins_c s1_b_rc s2_b_rc s1_upd_a_tob s2_upd_all_data s1_c s2_c s0_rep -# s1 deletes, s2 updates, s1 committs, EPQ failure should lead to no update +# s1 deletes, s2 updates, s1 commits, EPQ failure should lead to no update permutation s1_trig_rep_b_d s1_trig_rep_b_u s1_trig_rep_a_d s1_trig_rep_a_u s1_ins_a s1_ins_c s1_b_rc s2_b_rc s1_del_a s2_upd_a_data s1_c s2_c @@ -346,7 +346,7 @@ permutation s1_trig_rep_b_d s1_trig_rep_b_u s1_trig_rep_a_d s1_trig_rep_a_u s1_ins_a s1_ins_c s1_b_rc s2_b_rc s1_del_a s2_upd_a_data s1_r s2_c s0_rep -# s1 deletes, s2 deletes, s1 committs, EPQ failure should lead to no delete +# s1 deletes, s2 deletes, s1 commits, EPQ failure should lead to no delete permutation s1_trig_rep_b_d s1_trig_rep_a_d s1_ins_a s1_ins_c s1_b_rc s2_b_rc s1_del_a s2_del_a s1_c s2_c From f225473cbae2cca5e1fde376aa4217a6b3a3f770 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Herrera?= Date: Fri, 29 Aug 2025 14:43:47 +0200 Subject: [PATCH 594/602] CREATE STATISTICS: improve misleading error message MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I think the error message for a different condition was inadvertently copied. This problem seems to have been introduced by commit a4d75c86bf15. Author: Álvaro Herrera Reported-by: jian he Reviewed-by: Tom Lane Backpatch-through: 14 Discussion: https://postgr.es/m/CACJufxEZ48toGH0Em_6vdsT57Y3L8pLF=DZCQ_gCii6=C3MeXw@mail.gmail.com --- src/backend/tcop/utility.c | 3 ++- src/test/regress/expected/stats_ext.out | 26 +++++++++++++++++++++++++ src/test/regress/sql/stats_ext.sql | 12 ++++++++++++ 3 files changed, 40 insertions(+), 1 deletion(-) diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c index 4f4191b0ea6b4..5f442bc3bd4e1 100644 --- a/src/backend/tcop/utility.c +++ b/src/backend/tcop/utility.c @@ -1874,7 +1874,8 @@ ProcessUtilitySlow(ParseState *pstate, if (!IsA(rel, RangeVar)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("only a single relation is allowed in CREATE STATISTICS"))); + errmsg("cannot create statistics on the specified relation"), + errdetail("CREATE STATISTICS only supports tables, foreign tables and materialized views."))); /* * CREATE STATISTICS will influence future execution plans diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out index c66e09f8b1651..a1f83b58b2398 100644 --- a/src/test/regress/expected/stats_ext.out +++ b/src/test/regress/expected/stats_ext.out @@ -54,6 +54,32 @@ CREATE STATISTICS tst ON (x || 'x'), (x || 'x'), y FROM ext_stats_test; ERROR: duplicate expression in statistics definition CREATE STATISTICS tst (unrecognized) ON x, y FROM ext_stats_test; ERROR: unrecognized statistics kind "unrecognized" +-- unsupported targets +CREATE STATISTICS tst ON a FROM (VALUES (x)) AS foo; +ERROR: cannot create statistics on the specified relation +DETAIL: CREATE STATISTICS only supports tables, foreign tables and materialized views. +CREATE STATISTICS tst ON a FROM foo NATURAL JOIN bar; +ERROR: cannot create statistics on the specified relation +DETAIL: CREATE STATISTICS only supports tables, foreign tables and materialized views. +CREATE STATISTICS tst ON a FROM (SELECT * FROM ext_stats_test) AS foo; +ERROR: cannot create statistics on the specified relation +DETAIL: CREATE STATISTICS only supports tables, foreign tables and materialized views. +CREATE STATISTICS tst ON a FROM ext_stats_test s TABLESAMPLE system (x); +ERROR: cannot create statistics on the specified relation +DETAIL: CREATE STATISTICS only supports tables, foreign tables and materialized views. +CREATE STATISTICS tst ON a FROM XMLTABLE('foo' PASSING 'bar' COLUMNS a text); +ERROR: cannot create statistics on the specified relation +DETAIL: CREATE STATISTICS only supports tables, foreign tables and materialized views. +CREATE STATISTICS tst ON a FROM JSON_TABLE(jsonb '123', '$' COLUMNS (item int)); +ERROR: cannot create statistics on the specified relation +DETAIL: CREATE STATISTICS only supports tables, foreign tables and materialized views. +CREATE FUNCTION tftest(int) returns table(a int, b int) as $$ +SELECT $1, $1+i FROM generate_series(1,5) g(i); +$$ LANGUAGE sql IMMUTABLE STRICT; +CREATE STATISTICS alt_stat2 ON a FROM tftest(1); +ERROR: cannot create statistics on the specified relation +DETAIL: CREATE STATISTICS only supports tables, foreign tables and materialized views. +DROP FUNCTION tftest; -- incorrect expressions CREATE STATISTICS tst ON (y) FROM ext_stats_test; -- single column reference ERROR: extended statistics require at least 2 columns diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql index 9ce4c670ecbcd..823c7db9dab49 100644 --- a/src/test/regress/sql/stats_ext.sql +++ b/src/test/regress/sql/stats_ext.sql @@ -40,6 +40,18 @@ CREATE STATISTICS tst ON x, x, y, x, x, (x || 'x'), (y + 1), (x || 'x'), (x || ' CREATE STATISTICS tst ON (x || 'x'), (x || 'x'), (y + 1), (x || 'x'), (x || 'x'), (y + 1), (x || 'x'), (x || 'x'), (y + 1) FROM ext_stats_test; CREATE STATISTICS tst ON (x || 'x'), (x || 'x'), y FROM ext_stats_test; CREATE STATISTICS tst (unrecognized) ON x, y FROM ext_stats_test; +-- unsupported targets +CREATE STATISTICS tst ON a FROM (VALUES (x)) AS foo; +CREATE STATISTICS tst ON a FROM foo NATURAL JOIN bar; +CREATE STATISTICS tst ON a FROM (SELECT * FROM ext_stats_test) AS foo; +CREATE STATISTICS tst ON a FROM ext_stats_test s TABLESAMPLE system (x); +CREATE STATISTICS tst ON a FROM XMLTABLE('foo' PASSING 'bar' COLUMNS a text); +CREATE STATISTICS tst ON a FROM JSON_TABLE(jsonb '123', '$' COLUMNS (item int)); +CREATE FUNCTION tftest(int) returns table(a int, b int) as $$ +SELECT $1, $1+i FROM generate_series(1,5) g(i); +$$ LANGUAGE sql IMMUTABLE STRICT; +CREATE STATISTICS alt_stat2 ON a FROM tftest(1); +DROP FUNCTION tftest; -- incorrect expressions CREATE STATISTICS tst ON (y) FROM ext_stats_test; -- single column reference CREATE STATISTICS tst ON y + z FROM ext_stats_test; -- missing parentheses From da9f9f75e5ce27a45878ffa262156d18f0046188 Mon Sep 17 00:00:00 2001 From: David Rowley Date: Sat, 30 Aug 2025 00:50:50 +1200 Subject: [PATCH 595/602] Fix possible use after free in expand_partitioned_rtentry() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's possible that if the only live partition is concurrently dropped and try_table_open() fails, that the bms_del_member() will pfree the live_parts Bitmapset. Since the bms_del_member() call does not assign the result back to the live_parts local variable, the while loop could segfault as that variable would still reference the pfree'd Bitmapset. Backpatch to 15. 52f3de874 was backpatched to 14, but there's no bms_del_member() there due to live_parts not yet existing in RelOptInfo in that version. Technically there's no bug in version 15 as bms_del_member() didn't pfree when the set became empty prior to 00b41463c (from v16). Applied to v15 anyway to keep the code similar and to avoid the bad coding pattern. Author: Bernd Reiß Reviewed-by: David Rowley Discussion: https://postgr.es/m/6b88f27a-c45c-4826-8e37-d61a04d90182@gmx.at Backpatch-through: 15 --- src/backend/optimizer/util/inherit.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/backend/optimizer/util/inherit.c b/src/backend/optimizer/util/inherit.c index 30d158069e332..856d5959d1031 100644 --- a/src/backend/optimizer/util/inherit.c +++ b/src/backend/optimizer/util/inherit.c @@ -322,7 +322,6 @@ expand_partitioned_rtentry(PlannerInfo *root, RelOptInfo *relinfo, PlanRowMark *top_parentrc, LOCKMODE lockmode) { PartitionDesc partdesc; - Bitmapset *live_parts; int num_live_parts; int i; @@ -356,10 +355,10 @@ expand_partitioned_rtentry(PlannerInfo *root, RelOptInfo *relinfo, * that survive pruning. Below, we will initialize child objects for the * surviving partitions. */ - relinfo->live_parts = live_parts = prune_append_rel_partitions(relinfo); + relinfo->live_parts = prune_append_rel_partitions(relinfo); /* Expand simple_rel_array and friends to hold child objects. */ - num_live_parts = bms_num_members(live_parts); + num_live_parts = bms_num_members(relinfo->live_parts); if (num_live_parts > 0) expand_planner_arrays(root, num_live_parts); @@ -378,7 +377,7 @@ expand_partitioned_rtentry(PlannerInfo *root, RelOptInfo *relinfo, * table itself, because it's not going to be scanned. */ i = -1; - while ((i = bms_next_member(live_parts, i)) >= 0) + while ((i = bms_next_member(relinfo->live_parts, i)) >= 0) { Oid childOID = partdesc->oids[i]; Relation childrel; From 8722e7965fd24bf94c278fb86cab99d0c5360532 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Fri, 29 Aug 2025 10:46:13 -0400 Subject: [PATCH 596/602] Silence -Wmissing-variable-declarations in headerscheck. Newer gcc versions will emit warnings about missing extern declarations if certain header files are compiled by themselves. Add the "extern" declarations needed to quiet that. Author: Tom Lane Reviewed-by: Peter Eisentraut Discussion: https://postgr.es/m/1127775.1754417387@sss.pgh.pa.us --- .../ecpg/test/expected/preproc-strings.c | 15 ++++++++++++++- src/interfaces/ecpg/test/preproc/strings.h | 10 ++++++++++ src/tools/gen_keywordlist.pl | 11 ++++++++++- 3 files changed, 34 insertions(+), 2 deletions(-) diff --git a/src/interfaces/ecpg/test/expected/preproc-strings.c b/src/interfaces/ecpg/test/expected/preproc-strings.c index a26817968de79..06a86a2bfae4b 100644 --- a/src/interfaces/ecpg/test/expected/preproc-strings.c +++ b/src/interfaces/ecpg/test/expected/preproc-strings.c @@ -18,6 +18,16 @@ #line 3 "strings.pgc" /* exec sql begin declare section */ #line 1 "strings.h" +/* redundant declaration to silence -Wmissing-variable-declarations */ + + + + + + + + + @@ -29,7 +39,10 @@ #line 5 "strings.pgc" -#line 1 "strings.h" +#line 2 "strings.h" + extern char * s1 , * s2 , * s3 , * s4 , * s5 , * s6 , * s7 , * s8 ; + +#line 11 "strings.h" char * s1 , * s2 , * s3 , * s4 , * s5 , * s6 , * s7 , * s8 ; /* exec sql end declare section */ #line 5 "strings.pgc" diff --git a/src/interfaces/ecpg/test/preproc/strings.h b/src/interfaces/ecpg/test/preproc/strings.h index edb5be5339e6d..8932b4ea9ed11 100644 --- a/src/interfaces/ecpg/test/preproc/strings.h +++ b/src/interfaces/ecpg/test/preproc/strings.h @@ -1,3 +1,13 @@ +/* redundant declaration to silence -Wmissing-variable-declarations */ +extern char *s1, + *s2, + *s3, + *s4, + *s5, + *s6, + *s7, + *s8; + char *s1, *s2, *s3, diff --git a/src/tools/gen_keywordlist.pl b/src/tools/gen_keywordlist.pl index 6ec83ff33f9a9..bcb0d8027a04d 100644 --- a/src/tools/gen_keywordlist.pl +++ b/src/tools/gen_keywordlist.pl @@ -169,7 +169,16 @@ # Emit the struct that wraps all this lookup info into one variable. -printf $kwdef "static " if !$extern; +if ($extern) +{ + # redundant declaration to silence -Wmissing-variable-declarations + printf $kwdef "extern PGDLLIMPORT const ScanKeywordList %s;\n\n", + $varname; +} +else +{ + printf $kwdef "static "; +} printf $kwdef "const ScanKeywordList %s = {\n", $varname; printf $kwdef qq|\t%s_kw_string,\n|, $varname; printf $kwdef qq|\t%s_kw_offsets,\n|, $varname; From 6fbd7b93c615f34df63ee978e97c6eb0c85255de Mon Sep 17 00:00:00 2001 From: Nathan Bossart Date: Fri, 29 Aug 2025 10:56:10 -0500 Subject: [PATCH 597/602] Remove unused parameter from ProcessSlotSyncInterrupts(). Oversight in commit 93db6cbda0. Author: ChangAo Chen Discussion: https://postgr.es/m/tencent_7B42BBE8D0A5C28DDAB91436192CBCCB8307%40qq.com --- src/backend/replication/logical/slotsync.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/backend/replication/logical/slotsync.c b/src/backend/replication/logical/slotsync.c index caa003cb7b3ac..f5501c106dcaf 100644 --- a/src/backend/replication/logical/slotsync.c +++ b/src/backend/replication/logical/slotsync.c @@ -1170,7 +1170,7 @@ slotsync_reread_config(void) * Interrupt handler for main loop of slot sync worker. */ static void -ProcessSlotSyncInterrupts(WalReceiverConn *wrconn) +ProcessSlotSyncInterrupts(void) { CHECK_FOR_INTERRUPTS(); @@ -1505,7 +1505,7 @@ ReplSlotSyncWorkerMain(const void *startup_data, size_t startup_data_len) { bool some_slot_updated = false; - ProcessSlotSyncInterrupts(wrconn); + ProcessSlotSyncInterrupts(); some_slot_updated = synchronize_slots(wrconn); From 66fa3b5eef6e4764a3beb097ecc9e04b6d956bc9 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Fri, 29 Aug 2025 12:05:58 -0400 Subject: [PATCH 598/602] Fix .gitignore for src/interfaces/libpq-oauth. This missed files created when running the oauth tests. --- src/interfaces/libpq-oauth/.gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/interfaces/libpq-oauth/.gitignore b/src/interfaces/libpq-oauth/.gitignore index a4afe7c1c6858..eb5b98aea544c 100644 --- a/src/interfaces/libpq-oauth/.gitignore +++ b/src/interfaces/libpq-oauth/.gitignore @@ -1 +1,4 @@ /exports.list +/oauth_tests + +/tmp_check/ From 67fcf48c3bb3cc3cf51788332701ffb694fb108b Mon Sep 17 00:00:00 2001 From: Nathan Bossart Date: Fri, 29 Aug 2025 12:13:37 -0500 Subject: [PATCH 599/602] Make LWLockCounter a global variable. Using the LWLockCounter requires first calculating its address in shared memory like this: LWLockCounter = (int *) ((char *) MainLWLockArray - sizeof(int)); Commit 82e861fbe1 started this trend in order to fix EXEC_BACKEND builds, but it could also be fixed by adding it to the BackendParameters struct. The current approach is somewhat difficult to follow, so this commit switches to the latter. While at it, swap around the code in LWLockShmemSize() to match the order of assignments in CreateLWLocks() for added readability. Reviewed-by: Tom Lane Reviewed-by: Bertrand Drouvot Discussion: https://postgr.es/m/aLDLnan9gNCS9fHx%40nathan --- src/backend/postmaster/launch_backend.c | 3 +++ src/backend/storage/lmgr/lwlock.c | 24 ++++++++---------------- src/include/storage/lwlock.h | 1 + 3 files changed, 12 insertions(+), 16 deletions(-) diff --git a/src/backend/postmaster/launch_backend.c b/src/backend/postmaster/launch_backend.c index bf6b55ee83048..cd9547b03a32b 100644 --- a/src/backend/postmaster/launch_backend.c +++ b/src/backend/postmaster/launch_backend.c @@ -102,6 +102,7 @@ typedef struct #endif int NamedLWLockTrancheRequests; NamedLWLockTranche *NamedLWLockTrancheArray; + int *LWLockCounter; LWLockPadded *MainLWLockArray; slock_t *ProcStructLock; PROC_HDR *ProcGlobal; @@ -761,6 +762,7 @@ save_backend_variables(BackendParameters *param, param->NamedLWLockTrancheRequests = NamedLWLockTrancheRequests; param->NamedLWLockTrancheArray = NamedLWLockTrancheArray; + param->LWLockCounter = LWLockCounter; param->MainLWLockArray = MainLWLockArray; param->ProcStructLock = ProcStructLock; param->ProcGlobal = ProcGlobal; @@ -1021,6 +1023,7 @@ restore_backend_variables(BackendParameters *param) NamedLWLockTrancheRequests = param->NamedLWLockTrancheRequests; NamedLWLockTrancheArray = param->NamedLWLockTrancheArray; + LWLockCounter = param->LWLockCounter; MainLWLockArray = param->MainLWLockArray; ProcStructLock = param->ProcStructLock; ProcGlobal = param->ProcGlobal; diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c index c80b43f1f55cf..a4aecd1fbc34f 100644 --- a/src/backend/storage/lmgr/lwlock.c +++ b/src/backend/storage/lmgr/lwlock.c @@ -196,6 +196,7 @@ int NamedLWLockTrancheRequests = 0; /* points to data in shared memory: */ NamedLWLockTranche *NamedLWLockTrancheArray = NULL; +int *LWLockCounter = NULL; static void InitializeLWLocks(void); static inline void LWLockReportWaitStart(LWLock *lock); @@ -397,11 +398,11 @@ LWLockShmemSize(void) /* Calculate total number of locks needed in the main array. */ numLocks += NumLWLocksForNamedTranches(); - /* Space for the LWLock array. */ - size = mul_size(numLocks, sizeof(LWLockPadded)); - /* Space for dynamic allocation counter, plus room for alignment. */ - size = add_size(size, sizeof(int) + LWLOCK_PADDED_SIZE); + size = sizeof(int) + LWLOCK_PADDED_SIZE; + + /* Space for the LWLock array. */ + size = add_size(size, mul_size(numLocks, sizeof(LWLockPadded))); /* space for named tranches. */ size = add_size(size, mul_size(NamedLWLockTrancheRequests, sizeof(NamedLWLockTranche))); @@ -423,27 +424,20 @@ CreateLWLocks(void) if (!IsUnderPostmaster) { Size spaceLocks = LWLockShmemSize(); - int *LWLockCounter; char *ptr; /* Allocate space */ ptr = (char *) ShmemAlloc(spaceLocks); - /* Leave room for dynamic allocation of tranches */ + /* Initialize the dynamic-allocation counter for tranches */ + LWLockCounter = (int *) ptr; + *LWLockCounter = LWTRANCHE_FIRST_USER_DEFINED; ptr += sizeof(int); /* Ensure desired alignment of LWLock array */ ptr += LWLOCK_PADDED_SIZE - ((uintptr_t) ptr) % LWLOCK_PADDED_SIZE; - MainLWLockArray = (LWLockPadded *) ptr; - /* - * Initialize the dynamic-allocation counter for tranches, which is - * stored just before the first LWLock. - */ - LWLockCounter = (int *) ((char *) MainLWLockArray - sizeof(int)); - *LWLockCounter = LWTRANCHE_FIRST_USER_DEFINED; - /* Initialize all LWLocks */ InitializeLWLocks(); } @@ -574,9 +568,7 @@ int LWLockNewTrancheId(void) { int result; - int *LWLockCounter; - LWLockCounter = (int *) ((char *) MainLWLockArray - sizeof(int)); /* We use the ShmemLock spinlock to protect LWLockCounter */ SpinLockAcquire(ShmemLock); result = (*LWLockCounter)++; diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h index 5e717765764f4..f9cf57f8d266d 100644 --- a/src/include/storage/lwlock.h +++ b/src/include/storage/lwlock.h @@ -82,6 +82,7 @@ typedef struct NamedLWLockTranche extern PGDLLIMPORT NamedLWLockTranche *NamedLWLockTrancheArray; extern PGDLLIMPORT int NamedLWLockTrancheRequests; +extern PGDLLIMPORT int *LWLockCounter; /* * It's a bit odd to declare NUM_BUFFER_PARTITIONS and NUM_LOCK_PARTITIONS From e686010c5b47f2e7f7e4e8d31ef69efadbbc4d72 Mon Sep 17 00:00:00 2001 From: Daniel Gustafsson Date: Fri, 29 Aug 2025 19:28:46 +0200 Subject: [PATCH 600/602] pg_dump: Fix compression API errorhandling Compression in pg_dump is abstracted using an API with multiple implementations which can be selected at runtime by the user. The API and its implementations have evolved over time, notable commits include bf9aa490db, e9960732a9, 84adc8e20, and 0da243fed. The errorhandling defined by the API was however problematic and the implementations had a few bugs and/or were not following the API specification. This commit modifies the API to ensure that callers can perform errorhandling efficiently and fixes all the implementations such that they all implement the API in the same way. A full list of the changes can be seen below. * write_func: - Make write_func throw an error on all error conditions. All callers of write_func were already checking for success and calling pg_fatal on all errors, so we might as well make the API support that case directly with simpler errorhandling as a result. * open_func: - zstd: move stream initialization from the open function to the read and write functions as they can have fatal errors. Also ensure to dup the file descriptor like none and gzip. - lz4: Ensure to dup the file descriptor like none and gzip. * close_func: - zstd: Ensure to close the file descriptor even if closing down the compressor fails, and clean up state allocation on fclose failures. Make sure to capture errors set by fclose. - lz4: Ensure to close the file descriptor even if closing down the compressor fails, and instead of calling pg_fatal log the failures using pg_log_error. Make sure to capture errors set by fclose. - none: Make sure to catch errors set by fclose. * read_func / gets_func: - Make read_func unconditionally return the number of read bytes instead of making it optional per implementation. - lz4: Make sure to call throw an error and not return -1 - gzip: gzread returning zero cannot be assumed to indicate EOF as it is documented to return zero for some types of errors. - lz4, zstd: Convert the _read_internal helper functions to not call pg_fatal on errors to be able to handle gets_func returning NULL on error. * getc_func: - zstd: Use an unsigned char rather than an int to read char into. * LZ4Stream_init: - Make sure to not switch to inited state until we know that initialization succeeded and reset errno just in case. On top of these changes there are minor comment cleanups and improvements as well as an attempt to consistently reset errno in codepaths where it is inspected. This work was initiated by a report of API misuse, which turned into a larger body of work. As this is an internal API these changes can be backpatched into all affected branches. Author: Tom Lane Author: Daniel Gustafsson Reported-by: Evgeniy Gorbanev Discussion: https://postgr.es/m/517794.1750082166@sss.pgh.pa.us Backpatch-through: 16 --- src/bin/pg_dump/compress_gzip.c | 35 +++++-- src/bin/pg_dump/compress_io.c | 2 + src/bin/pg_dump/compress_io.h | 15 +-- src/bin/pg_dump/compress_lz4.c | 92 +++++++++------- src/bin/pg_dump/compress_none.c | 29 +++--- src/bin/pg_dump/compress_zstd.c | 144 ++++++++++++++++---------- src/bin/pg_dump/pg_backup_archiver.c | 4 +- src/bin/pg_dump/pg_backup_directory.c | 52 ++-------- 8 files changed, 208 insertions(+), 165 deletions(-) diff --git a/src/bin/pg_dump/compress_gzip.c b/src/bin/pg_dump/compress_gzip.c index 5a30ebf9bf5b5..4a067e1402c3b 100644 --- a/src/bin/pg_dump/compress_gzip.c +++ b/src/bin/pg_dump/compress_gzip.c @@ -251,34 +251,49 @@ InitCompressorGzip(CompressorState *cs, *---------------------- */ -static bool -Gzip_read(void *ptr, size_t size, size_t *rsize, CompressFileHandle *CFH) +static size_t +Gzip_read(void *ptr, size_t size, CompressFileHandle *CFH) { gzFile gzfp = (gzFile) CFH->private_data; int gzret; gzret = gzread(gzfp, ptr, size); - if (gzret <= 0 && !gzeof(gzfp)) + + /* + * gzread returns zero on EOF as well as some error conditions, and less + * than zero on other error conditions, so we need to inspect for EOF on + * zero. + */ + if (gzret <= 0) { int errnum; - const char *errmsg = gzerror(gzfp, &errnum); + const char *errmsg; + + if (gzret == 0 && gzeof(gzfp)) + return 0; + + errmsg = gzerror(gzfp, &errnum); pg_fatal("could not read from input file: %s", errnum == Z_ERRNO ? strerror(errno) : errmsg); } - if (rsize) - *rsize = (size_t) gzret; - - return true; + return (size_t) gzret; } -static bool +static void Gzip_write(const void *ptr, size_t size, CompressFileHandle *CFH) { gzFile gzfp = (gzFile) CFH->private_data; + int errnum; + const char *errmsg; - return gzwrite(gzfp, ptr, size) > 0; + if (gzwrite(gzfp, ptr, size) != size) + { + errmsg = gzerror(gzfp, &errnum); + pg_fatal("could not write to file: %s", + errnum == Z_ERRNO ? strerror(errno) : errmsg); + } } static int diff --git a/src/bin/pg_dump/compress_io.c b/src/bin/pg_dump/compress_io.c index 8c3d9c911c47b..9cadc6f2a3f34 100644 --- a/src/bin/pg_dump/compress_io.c +++ b/src/bin/pg_dump/compress_io.c @@ -269,6 +269,7 @@ InitDiscoverCompressFileHandle(const char *path, const char *mode) } CFH = InitCompressFileHandle(compression_spec); + errno = 0; if (!CFH->open_func(fname, -1, mode, CFH)) { free_keep_errno(CFH); @@ -289,6 +290,7 @@ EndCompressFileHandle(CompressFileHandle *CFH) { bool ret = false; + errno = 0; if (CFH->private_data) ret = CFH->close_func(CFH); diff --git a/src/bin/pg_dump/compress_io.h b/src/bin/pg_dump/compress_io.h index db9b38744c8e2..25a7bf0904d2e 100644 --- a/src/bin/pg_dump/compress_io.h +++ b/src/bin/pg_dump/compress_io.h @@ -123,21 +123,22 @@ struct CompressFileHandle CompressFileHandle *CFH); /* - * Read 'size' bytes of data from the file and store them into 'ptr'. - * Optionally it will store the number of bytes read in 'rsize'. + * Read up to 'size' bytes of data from the file and store them into + * 'ptr'. * - * Returns true on success and throws an internal error otherwise. + * Returns number of bytes read (this might be less than 'size' if EOF was + * reached). Exits via pg_fatal for all error conditions. */ - bool (*read_func) (void *ptr, size_t size, size_t *rsize, + size_t (*read_func) (void *ptr, size_t size, CompressFileHandle *CFH); /* * Write 'size' bytes of data into the file from 'ptr'. * - * Returns true on success and false on error. + * Returns nothing, exits via pg_fatal for all error conditions. */ - bool (*write_func) (const void *ptr, size_t size, - struct CompressFileHandle *CFH); + void (*write_func) (const void *ptr, size_t size, + CompressFileHandle *CFH); /* * Read at most size - 1 characters from the compress file handle into diff --git a/src/bin/pg_dump/compress_lz4.c b/src/bin/pg_dump/compress_lz4.c index e99f0cad71fcb..e2f7c46829308 100644 --- a/src/bin/pg_dump/compress_lz4.c +++ b/src/bin/pg_dump/compress_lz4.c @@ -12,6 +12,7 @@ *------------------------------------------------------------------------- */ #include "postgres_fe.h" +#include #include "compress_lz4.h" #include "pg_backup_utils.h" @@ -358,7 +359,6 @@ LZ4Stream_init(LZ4State *state, int size, bool compressing) return true; state->compressing = compressing; - state->inited = true; /* When compressing, write LZ4 header to the output stream. */ if (state->compressing) @@ -367,6 +367,7 @@ LZ4Stream_init(LZ4State *state, int size, bool compressing) if (!LZ4State_compression_init(state)) return false; + errno = 0; if (fwrite(state->buffer, 1, state->compressedlen, state->fp) != state->compressedlen) { errno = (errno) ? errno : ENOSPC; @@ -390,6 +391,7 @@ LZ4Stream_init(LZ4State *state, int size, bool compressing) state->overflowlen = 0; } + state->inited = true; return true; } @@ -457,7 +459,11 @@ LZ4Stream_read_internal(LZ4State *state, void *ptr, int ptrsize, bool eol_flag) /* Lazy init */ if (!LZ4Stream_init(state, size, false /* decompressing */ )) + { + pg_log_error("unable to initialize LZ4 library: %s", + LZ4F_getErrorName(state->errcode)); return -1; + } /* No work needs to be done for a zero-sized output buffer */ if (size <= 0) @@ -484,7 +490,10 @@ LZ4Stream_read_internal(LZ4State *state, void *ptr, int ptrsize, bool eol_flag) rsize = fread(readbuf, 1, size, state->fp); if (rsize < size && !feof(state->fp)) + { + pg_log_error("could not read from input file: %m"); return -1; + } rp = (char *) readbuf; rend = (char *) readbuf + rsize; @@ -501,6 +510,8 @@ LZ4Stream_read_internal(LZ4State *state, void *ptr, int ptrsize, bool eol_flag) if (LZ4F_isError(status)) { state->errcode = status; + pg_log_error("could not read from input file: %s", + LZ4F_getErrorName(state->errcode)); return -1; } @@ -558,7 +569,7 @@ LZ4Stream_read_internal(LZ4State *state, void *ptr, int ptrsize, bool eol_flag) /* * Compress size bytes from ptr and write them to the stream. */ -static bool +static void LZ4Stream_write(const void *ptr, size_t size, CompressFileHandle *CFH) { LZ4State *state = (LZ4State *) CFH->private_data; @@ -567,7 +578,8 @@ LZ4Stream_write(const void *ptr, size_t size, CompressFileHandle *CFH) /* Lazy init */ if (!LZ4Stream_init(state, size, true)) - return false; + pg_fatal("unable to initialize LZ4 library: %s", + LZ4F_getErrorName(state->errcode)); while (remaining > 0) { @@ -578,28 +590,24 @@ LZ4Stream_write(const void *ptr, size_t size, CompressFileHandle *CFH) status = LZ4F_compressUpdate(state->ctx, state->buffer, state->buflen, ptr, chunk, NULL); if (LZ4F_isError(status)) - { - state->errcode = status; - return false; - } + pg_fatal("error during writing: %s", LZ4F_getErrorName(status)); + errno = 0; if (fwrite(state->buffer, 1, status, state->fp) != status) { errno = (errno) ? errno : ENOSPC; - return false; + pg_fatal("error during writing: %m"); } ptr = ((const char *) ptr) + chunk; } - - return true; } /* * fread() equivalent implementation for LZ4 compressed files. */ -static bool -LZ4Stream_read(void *ptr, size_t size, size_t *rsize, CompressFileHandle *CFH) +static size_t +LZ4Stream_read(void *ptr, size_t size, CompressFileHandle *CFH) { LZ4State *state = (LZ4State *) CFH->private_data; int ret; @@ -607,10 +615,7 @@ LZ4Stream_read(void *ptr, size_t size, size_t *rsize, CompressFileHandle *CFH) if ((ret = LZ4Stream_read_internal(state, ptr, size, false)) < 0) pg_fatal("could not read from input file: %s", LZ4Stream_get_error(CFH)); - if (rsize) - *rsize = (size_t) ret; - - return true; + return (size_t) ret; } /* @@ -643,11 +648,13 @@ LZ4Stream_gets(char *ptr, int size, CompressFileHandle *CFH) int ret; ret = LZ4Stream_read_internal(state, ptr, size - 1, true); - if (ret < 0 || (ret == 0 && !LZ4Stream_eof(CFH))) - pg_fatal("could not read from input file: %s", LZ4Stream_get_error(CFH)); - /* Done reading */ - if (ret == 0) + /* + * LZ4Stream_read_internal returning 0 or -1 means that it was either an + * EOF or an error, but gets_func is defined to return NULL in either case + * so we can treat both the same here. + */ + if (ret <= 0) return NULL; /* @@ -669,6 +676,7 @@ LZ4Stream_close(CompressFileHandle *CFH) FILE *fp; LZ4State *state = (LZ4State *) CFH->private_data; size_t status; + int ret; fp = state->fp; if (state->inited) @@ -677,25 +685,31 @@ LZ4Stream_close(CompressFileHandle *CFH) { status = LZ4F_compressEnd(state->ctx, state->buffer, state->buflen, NULL); if (LZ4F_isError(status)) - pg_fatal("could not end compression: %s", - LZ4F_getErrorName(status)); - else if (fwrite(state->buffer, 1, status, state->fp) != status) { - errno = (errno) ? errno : ENOSPC; - WRITE_ERROR_EXIT; + pg_log_error("could not end compression: %s", + LZ4F_getErrorName(status)); + } + else + { + errno = 0; + if (fwrite(state->buffer, 1, status, state->fp) != status) + { + errno = (errno) ? errno : ENOSPC; + pg_log_error("could not write to output file: %m"); + } } status = LZ4F_freeCompressionContext(state->ctx); if (LZ4F_isError(status)) - pg_fatal("could not end compression: %s", - LZ4F_getErrorName(status)); + pg_log_error("could not end compression: %s", + LZ4F_getErrorName(status)); } else { status = LZ4F_freeDecompressionContext(state->dtx); if (LZ4F_isError(status)) - pg_fatal("could not end decompression: %s", - LZ4F_getErrorName(status)); + pg_log_error("could not end decompression: %s", + LZ4F_getErrorName(status)); pg_free(state->overflowbuf); } @@ -703,29 +717,35 @@ LZ4Stream_close(CompressFileHandle *CFH) } pg_free(state); + CFH->private_data = NULL; - return fclose(fp) == 0; + errno = 0; + ret = fclose(fp); + if (ret != 0) + { + pg_log_error("could not close file: %m"); + return false; + } + + return true; } static bool LZ4Stream_open(const char *path, int fd, const char *mode, CompressFileHandle *CFH) { - FILE *fp; LZ4State *state = (LZ4State *) CFH->private_data; if (fd >= 0) - fp = fdopen(fd, mode); + state->fp = fdopen(dup(fd), mode); else - fp = fopen(path, mode); - if (fp == NULL) + state->fp = fopen(path, mode); + if (state->fp == NULL) { state->errcode = errno; return false; } - state->fp = fp; - return true; } diff --git a/src/bin/pg_dump/compress_none.c b/src/bin/pg_dump/compress_none.c index 3fc89c9985461..4abb2e95abc88 100644 --- a/src/bin/pg_dump/compress_none.c +++ b/src/bin/pg_dump/compress_none.c @@ -83,35 +83,31 @@ InitCompressorNone(CompressorState *cs, * Private routines */ -static bool -read_none(void *ptr, size_t size, size_t *rsize, CompressFileHandle *CFH) +static size_t +read_none(void *ptr, size_t size, CompressFileHandle *CFH) { FILE *fp = (FILE *) CFH->private_data; size_t ret; - if (size == 0) - return true; - ret = fread(ptr, 1, size, fp); - if (ret != size && !feof(fp)) + if (ferror(fp)) pg_fatal("could not read from input file: %m"); - if (rsize) - *rsize = ret; - - return true; + return ret; } -static bool +static void write_none(const void *ptr, size_t size, CompressFileHandle *CFH) { size_t ret; + errno = 0; ret = fwrite(ptr, 1, size, (FILE *) CFH->private_data); if (ret != size) - return false; - - return true; + { + errno = (errno) ? errno : ENOSPC; + pg_fatal("could not write to file: %m"); + } } static const char * @@ -153,7 +149,12 @@ close_none(CompressFileHandle *CFH) CFH->private_data = NULL; if (fp) + { + errno = 0; ret = fclose(fp); + if (ret != 0) + pg_log_error("could not close file: %m"); + } return ret == 0; } diff --git a/src/bin/pg_dump/compress_zstd.c b/src/bin/pg_dump/compress_zstd.c index cb595b10c2d32..e24d45e1bbe07 100644 --- a/src/bin/pg_dump/compress_zstd.c +++ b/src/bin/pg_dump/compress_zstd.c @@ -13,6 +13,7 @@ */ #include "postgres_fe.h" +#include #include "compress_zstd.h" #include "pg_backup_utils.h" @@ -258,8 +259,8 @@ InitCompressorZstd(CompressorState *cs, * Compressed stream API */ -static bool -Zstd_read(void *ptr, size_t size, size_t *rdsize, CompressFileHandle *CFH) +static size_t +Zstd_read_internal(void *ptr, size_t size, CompressFileHandle *CFH, bool exit_on_error) { ZstdCompressorState *zstdcs = (ZstdCompressorState *) CFH->private_data; ZSTD_inBuffer *input = &zstdcs->input; @@ -268,6 +269,22 @@ Zstd_read(void *ptr, size_t size, size_t *rdsize, CompressFileHandle *CFH) size_t res, cnt; + /* + * If this is the first call to the reading function, initialize the + * required datastructures. + */ + if (zstdcs->dstream == NULL) + { + zstdcs->input.src = pg_malloc0(input_allocated_size); + zstdcs->dstream = ZSTD_createDStream(); + if (zstdcs->dstream == NULL) + { + if (exit_on_error) + pg_fatal("could not initialize compression library"); + return -1; + } + } + output->size = size; output->dst = ptr; output->pos = 0; @@ -292,6 +309,13 @@ Zstd_read(void *ptr, size_t size, size_t *rdsize, CompressFileHandle *CFH) if (input->pos == input->size) { cnt = fread(unconstify(void *, input->src), 1, input_allocated_size, zstdcs->fp); + if (ferror(zstdcs->fp)) + { + if (exit_on_error) + pg_fatal("could not read from input file: %m"); + return -1; + } + input->size = cnt; Assert(cnt <= input_allocated_size); @@ -307,7 +331,11 @@ Zstd_read(void *ptr, size_t size, size_t *rdsize, CompressFileHandle *CFH) res = ZSTD_decompressStream(zstdcs->dstream, output, input); if (ZSTD_isError(res)) - pg_fatal("could not decompress data: %s", ZSTD_getErrorName(res)); + { + if (exit_on_error) + pg_fatal("could not decompress data: %s", ZSTD_getErrorName(res)); + return -1; + } if (output->pos == output->size) break; /* No more room for output */ @@ -320,13 +348,10 @@ Zstd_read(void *ptr, size_t size, size_t *rdsize, CompressFileHandle *CFH) break; /* We read all the data that fits */ } - if (rdsize != NULL) - *rdsize = output->pos; - - return true; + return output->pos; } -static bool +static void Zstd_write(const void *ptr, size_t size, CompressFileHandle *CFH) { ZstdCompressorState *zstdcs = (ZstdCompressorState *) CFH->private_data; @@ -339,41 +364,40 @@ Zstd_write(const void *ptr, size_t size, CompressFileHandle *CFH) input->size = size; input->pos = 0; + if (zstdcs->cstream == NULL) + { + zstdcs->output.size = ZSTD_CStreamOutSize(); + zstdcs->output.dst = pg_malloc0(zstdcs->output.size); + zstdcs->cstream = _ZstdCStreamParams(CFH->compression_spec); + if (zstdcs->cstream == NULL) + pg_fatal("could not initialize compression library"); + } + /* Consume all input, to be flushed later */ while (input->pos != input->size) { output->pos = 0; res = ZSTD_compressStream2(zstdcs->cstream, output, input, ZSTD_e_continue); if (ZSTD_isError(res)) - { - zstdcs->zstderror = ZSTD_getErrorName(res); - return false; - } + pg_fatal("could not write to file: %s", ZSTD_getErrorName(res)); + errno = 0; cnt = fwrite(output->dst, 1, output->pos, zstdcs->fp); if (cnt != output->pos) { - zstdcs->zstderror = strerror(errno); - return false; + errno = (errno) ? errno : ENOSPC; + pg_fatal("could not write to file: %m"); } } - - return size; } static int Zstd_getc(CompressFileHandle *CFH) { - ZstdCompressorState *zstdcs = (ZstdCompressorState *) CFH->private_data; - int ret; + unsigned char ret; - if (CFH->read_func(&ret, 1, NULL, CFH) != 1) - { - if (feof(zstdcs->fp)) - pg_fatal("could not read from input file: end of file"); - else - pg_fatal("could not read from input file: %m"); - } + if (CFH->read_func(&ret, 1, CFH) != 1) + pg_fatal("could not read from input file: end of file"); return ret; } @@ -390,11 +414,7 @@ Zstd_gets(char *buf, int len, CompressFileHandle *CFH) */ for (i = 0; i < len - 1; ++i) { - size_t readsz; - - if (!CFH->read_func(&buf[i], 1, &readsz, CFH)) - break; - if (readsz != 1) + if (Zstd_read_internal(&buf[i], 1, CFH, false) != 1) break; if (buf[i] == '\n') { @@ -406,10 +426,17 @@ Zstd_gets(char *buf, int len, CompressFileHandle *CFH) return i > 0 ? buf : NULL; } +static size_t +Zstd_read(void *ptr, size_t size, CompressFileHandle *CFH) +{ + return Zstd_read_internal(ptr, size, CFH, true); +} + static bool Zstd_close(CompressFileHandle *CFH) { ZstdCompressorState *zstdcs = (ZstdCompressorState *) CFH->private_data; + bool success = true; if (zstdcs->cstream) { @@ -426,14 +453,18 @@ Zstd_close(CompressFileHandle *CFH) if (ZSTD_isError(res)) { zstdcs->zstderror = ZSTD_getErrorName(res); - return false; + success = false; + break; } + errno = 0; cnt = fwrite(output->dst, 1, output->pos, zstdcs->fp); if (cnt != output->pos) { + errno = (errno) ? errno : ENOSPC; zstdcs->zstderror = strerror(errno); - return false; + success = false; + break; } if (res == 0) @@ -450,11 +481,16 @@ Zstd_close(CompressFileHandle *CFH) pg_free(unconstify(void *, zstdcs->input.src)); } + errno = 0; if (fclose(zstdcs->fp) != 0) - return false; + { + zstdcs->zstderror = strerror(errno); + success = false; + } pg_free(zstdcs); - return true; + CFH->private_data = NULL; + return success; } static bool @@ -472,35 +508,33 @@ Zstd_open(const char *path, int fd, const char *mode, FILE *fp; ZstdCompressorState *zstdcs; + /* + * Clear state storage to avoid having the fd point to non-NULL memory on + * error return. + */ + CFH->private_data = NULL; + + zstdcs = (ZstdCompressorState *) pg_malloc_extended(sizeof(*zstdcs), + MCXT_ALLOC_NO_OOM | MCXT_ALLOC_ZERO); + if (!zstdcs) + { + errno = ENOMEM; + return false; + } + if (fd >= 0) - fp = fdopen(fd, mode); + fp = fdopen(dup(fd), mode); else fp = fopen(path, mode); if (fp == NULL) + { + pg_free(zstdcs); return false; + } - zstdcs = (ZstdCompressorState *) pg_malloc0(sizeof(*zstdcs)); - CFH->private_data = zstdcs; zstdcs->fp = fp; - - if (mode[0] == 'r') - { - zstdcs->input.src = pg_malloc0(ZSTD_DStreamInSize()); - zstdcs->dstream = ZSTD_createDStream(); - if (zstdcs->dstream == NULL) - pg_fatal("could not initialize compression library"); - } - else if (mode[0] == 'w' || mode[0] == 'a') - { - zstdcs->output.size = ZSTD_CStreamOutSize(); - zstdcs->output.dst = pg_malloc0(zstdcs->output.size); - zstdcs->cstream = _ZstdCStreamParams(CFH->compression_spec); - if (zstdcs->cstream == NULL) - pg_fatal("could not initialize compression library"); - } - else - pg_fatal("unhandled mode \"%s\"", mode); + CFH->private_data = zstdcs; return true; } diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c index 3c3acbaccdb51..058b5d659bacf 100644 --- a/src/bin/pg_dump/pg_backup_archiver.c +++ b/src/bin/pg_dump/pg_backup_archiver.c @@ -1883,8 +1883,8 @@ ahwrite(const void *ptr, size_t size, size_t nmemb, ArchiveHandle *AH) { CompressFileHandle *CFH = (CompressFileHandle *) AH->OF; - if (CFH->write_func(ptr, size * nmemb, CFH)) - bytes_written = size * nmemb; + CFH->write_func(ptr, size * nmemb, CFH); + bytes_written = size * nmemb; } if (bytes_written != size * nmemb) diff --git a/src/bin/pg_dump/pg_backup_directory.c b/src/bin/pg_dump/pg_backup_directory.c index bc2a2fb479741..94d401d8a4e5a 100644 --- a/src/bin/pg_dump/pg_backup_directory.c +++ b/src/bin/pg_dump/pg_backup_directory.c @@ -316,15 +316,9 @@ _WriteData(ArchiveHandle *AH, const void *data, size_t dLen) lclContext *ctx = (lclContext *) AH->formatData; CompressFileHandle *CFH = ctx->dataFH; - errno = 0; - if (dLen > 0 && !CFH->write_func(data, dLen, CFH)) - { - /* if write didn't set errno, assume problem is no disk space */ - if (errno == 0) - errno = ENOSPC; - pg_fatal("could not write to output file: %s", - CFH->get_error_func(CFH)); - } + if (dLen <= 0) + return; + CFH->write_func(data, dLen, CFH); } /* @@ -351,7 +345,7 @@ _EndData(ArchiveHandle *AH, TocEntry *te) static void _PrintFileData(ArchiveHandle *AH, char *filename) { - size_t cnt = 0; + size_t cnt; char *buf; size_t buflen; CompressFileHandle *CFH; @@ -366,7 +360,7 @@ _PrintFileData(ArchiveHandle *AH, char *filename) buflen = DEFAULT_IO_BUFFER_SIZE; buf = pg_malloc(buflen); - while (CFH->read_func(buf, buflen, &cnt, CFH) && cnt > 0) + while ((cnt = CFH->read_func(buf, buflen, CFH)) > 0) { ahwrite(buf, 1, cnt, AH); } @@ -470,16 +464,7 @@ _WriteByte(ArchiveHandle *AH, const int i) lclContext *ctx = (lclContext *) AH->formatData; CompressFileHandle *CFH = ctx->dataFH; - errno = 0; - if (!CFH->write_func(&c, 1, CFH)) - { - /* if write didn't set errno, assume problem is no disk space */ - if (errno == 0) - errno = ENOSPC; - pg_fatal("could not write to output file: %s", - CFH->get_error_func(CFH)); - } - + CFH->write_func(&c, 1, CFH); return 1; } @@ -508,15 +493,7 @@ _WriteBuf(ArchiveHandle *AH, const void *buf, size_t len) lclContext *ctx = (lclContext *) AH->formatData; CompressFileHandle *CFH = ctx->dataFH; - errno = 0; - if (!CFH->write_func(buf, len, CFH)) - { - /* if write didn't set errno, assume problem is no disk space */ - if (errno == 0) - errno = ENOSPC; - pg_fatal("could not write to output file: %s", - CFH->get_error_func(CFH)); - } + CFH->write_func(buf, len, CFH); } /* @@ -531,10 +508,10 @@ _ReadBuf(ArchiveHandle *AH, void *buf, size_t len) CompressFileHandle *CFH = ctx->dataFH; /* - * If there was an I/O error, we already exited in readF(), so here we - * exit on short reads. + * We do not expect a short read, so fail if we get one. The read_func + * already dealt with any outright I/O error. */ - if (!CFH->read_func(buf, len, NULL, CFH)) + if (CFH->read_func(buf, len, CFH) != len) pg_fatal("could not read from input file: end of file"); } @@ -677,14 +654,7 @@ _EndLO(ArchiveHandle *AH, TocEntry *te, Oid oid) /* register the LO in blobs_NNN.toc */ len = snprintf(buf, sizeof(buf), "%u blob_%u.dat\n", oid, oid); - if (!CFH->write_func(buf, len, CFH)) - { - /* if write didn't set errno, assume problem is no disk space */ - if (errno == 0) - errno = ENOSPC; - pg_fatal("could not write to LOs TOC file: %s", - CFH->get_error_func(CFH)); - } + CFH->write_func(buf, len, CFH); } /* From f727b63e810724c7187f38b2580b2915bdbc3c9c Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Fri, 29 Aug 2025 15:43:34 -0400 Subject: [PATCH 601/602] Provide error context when an error is thrown within WaitOnLock(). Show the requested lock level and the object being waited on, in the same format we use for deadlock reports and similar errors. This is particularly helpful for debugging lock-timeout errors, since otherwise the user has very little to go on about which lock timed out. The performance cost of setting up the callback should be negligible compared to the other tracing support already present in WaitOnLock. As in the deadlock-report case, we just show numeric object OIDs, because it seems too scary to try to perform catalog lookups in this context. Reported-by: Steve Baldwin Author: Tom Lane Discussion: https://postgr.es/m/1602369.1752167154@sss.pgh.pa.us --- src/backend/storage/lmgr/lock.c | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c index 233b85b623d5d..4cc7f645c3171 100644 --- a/src/backend/storage/lmgr/lock.c +++ b/src/backend/storage/lmgr/lock.c @@ -415,6 +415,7 @@ static void GrantLockLocal(LOCALLOCK *locallock, ResourceOwner owner); static void BeginStrongLockAcquire(LOCALLOCK *locallock, uint32 fasthashcode); static void FinishStrongLockAcquire(void); static ProcWaitStatus WaitOnLock(LOCALLOCK *locallock, ResourceOwner owner); +static void waitonlock_error_callback(void *arg); static void ReleaseLockIfHeld(LOCALLOCK *locallock, bool sessionLock); static void LockReassignOwner(LOCALLOCK *locallock, ResourceOwner parent); static bool UnGrantLock(LOCK *lock, LOCKMODE lockmode, @@ -1931,6 +1932,7 @@ static ProcWaitStatus WaitOnLock(LOCALLOCK *locallock, ResourceOwner owner) { ProcWaitStatus result; + ErrorContextCallback waiterrcontext; TRACE_POSTGRESQL_LOCK_WAIT_START(locallock->tag.lock.locktag_field1, locallock->tag.lock.locktag_field2, @@ -1939,6 +1941,12 @@ WaitOnLock(LOCALLOCK *locallock, ResourceOwner owner) locallock->tag.lock.locktag_type, locallock->tag.mode); + /* Setup error traceback support for ereport() */ + waiterrcontext.callback = waitonlock_error_callback; + waiterrcontext.arg = (void *) locallock; + waiterrcontext.previous = error_context_stack; + error_context_stack = &waiterrcontext; + /* adjust the process title to indicate that it's waiting */ set_ps_display_suffix("waiting"); @@ -1990,6 +1998,8 @@ WaitOnLock(LOCALLOCK *locallock, ResourceOwner owner) /* reset ps display to remove the suffix */ set_ps_display_remove_suffix(); + error_context_stack = waiterrcontext.previous; + TRACE_POSTGRESQL_LOCK_WAIT_DONE(locallock->tag.lock.locktag_field1, locallock->tag.lock.locktag_field2, locallock->tag.lock.locktag_field3, @@ -2000,6 +2010,28 @@ WaitOnLock(LOCALLOCK *locallock, ResourceOwner owner) return result; } +/* + * error context callback for failures in WaitOnLock + * + * We report which lock was being waited on, in the same style used in + * deadlock reports. This helps with lock timeout errors in particular. + */ +static void +waitonlock_error_callback(void *arg) +{ + LOCALLOCK *locallock = (LOCALLOCK *) arg; + const LOCKTAG *tag = &locallock->tag.lock; + LOCKMODE mode = locallock->tag.mode; + StringInfoData locktagbuf; + + initStringInfo(&locktagbuf); + DescribeLockTag(&locktagbuf, tag); + + errcontext("waiting for %s on %s", + GetLockmodeName(tag->locktag_lockmethodid, mode), + locktagbuf.data); +} + /* * Remove a proc from the wait-queue it is on (caller must know it is on one). * This is only used when the proc has failed to get the lock, so we set its From 5487058b56e0a27b6b8f6bb7e819f107587c1e54 Mon Sep 17 00:00:00 2001 From: Nathan Bossart Date: Fri, 29 Aug 2025 20:34:53 -0500 Subject: [PATCH 602/602] Prepare DSM registry for upcoming changes to LWLock tranche names. A proposed patch would place a limit of NAMEDATALEN-1 (i.e., 63) bytes on the names of dynamically-allocated LWLock tranches, but GetNamedDSA() and GetNamedDSHash() may register tranches with longer names. This commit lowers the maximum DSM registry entry name length to NAMEDATALEN-1 bytes and modifies GetNamedDSHash() to create only one tranche, thereby allowing us to keep the DSM registry's tranche names below NAMEDATALEN bytes. Author: Sami Imseih Discussion: https://postgr.es/m/aKzIg1JryN1qhNuy%40nathan --- src/backend/storage/ipc/dsm_registry.c | 57 +++++++++----------------- 1 file changed, 19 insertions(+), 38 deletions(-) diff --git a/src/backend/storage/ipc/dsm_registry.c b/src/backend/storage/ipc/dsm_registry.c index 1682cc6d34c7f..ca12815f4a854 100644 --- a/src/backend/storage/ipc/dsm_registry.c +++ b/src/backend/storage/ipc/dsm_registry.c @@ -48,12 +48,6 @@ #include "utils/builtins.h" #include "utils/memutils.h" -#define DSMR_NAME_LEN 128 - -#define DSMR_DSA_TRANCHE_SUFFIX " DSA" -#define DSMR_DSA_TRANCHE_SUFFIX_LEN (sizeof(DSMR_DSA_TRANCHE_SUFFIX) - 1) -#define DSMR_DSA_TRANCHE_NAME_LEN (DSMR_NAME_LEN + DSMR_DSA_TRANCHE_SUFFIX_LEN) - typedef struct DSMRegistryCtxStruct { dsa_handle dsah; @@ -72,15 +66,13 @@ typedef struct NamedDSAState { dsa_handle handle; int tranche; - char tranche_name[DSMR_DSA_TRANCHE_NAME_LEN]; } NamedDSAState; typedef struct NamedDSHState { - NamedDSAState dsa; - dshash_table_handle handle; + dsa_handle dsa_handle; + dshash_table_handle dsh_handle; int tranche; - char tranche_name[DSMR_NAME_LEN]; } NamedDSHState; typedef enum DSMREntryType @@ -99,7 +91,7 @@ static const char *const DSMREntryTypeNames[] = typedef struct DSMRegistryEntry { - char name[DSMR_NAME_LEN]; + char name[NAMEDATALEN]; DSMREntryType type; union { @@ -308,8 +300,7 @@ GetNamedDSA(const char *name, bool *found) /* Initialize the LWLock tranche for the DSA. */ state->tranche = LWLockNewTrancheId(); - strcpy(state->tranche_name, name); - LWLockRegisterTranche(state->tranche, state->tranche_name); + LWLockRegisterTranche(state->tranche, name); /* Initialize the DSA. */ ret = dsa_create(state->tranche); @@ -331,7 +322,7 @@ GetNamedDSA(const char *name, bool *found) (errmsg("requested DSA already attached to current process"))); /* Initialize existing LWLock tranche for the DSA. */ - LWLockRegisterTranche(state->tranche, state->tranche_name); + LWLockRegisterTranche(state->tranche, name); /* Attach to existing DSA. */ ret = dsa_attach(state->handle); @@ -348,11 +339,10 @@ GetNamedDSA(const char *name, bool *found) * Initialize or attach a named dshash table. * * This routine returns the address of the table. The tranche_id member of - * params is ignored; new tranche IDs will be generated if needed. Note that - * the DSA lock tranche will be registered with the provided name with " DSA" - * appended. The dshash lock tranche will be registered with the provided - * name. Also note that this should be called at most once for a given table - * in each backend. + * params is ignored; a new LWLock tranche ID will be generated if needed. + * Note that the lock tranche will be registered with the provided name. Also + * note that this should be called at most once for a given table in each + * backend. */ dshash_table * GetNamedDSHash(const char *name, const dshash_parameters *params, bool *found) @@ -381,25 +371,18 @@ GetNamedDSHash(const char *name, const dshash_parameters *params, bool *found) entry = dshash_find_or_insert(dsm_registry_table, name, found); if (!(*found)) { - NamedDSAState *dsa_state = &entry->data.dsh.dsa; NamedDSHState *dsh_state = &entry->data.dsh; dshash_parameters params_copy; dsa_area *dsa; entry->type = DSMR_ENTRY_TYPE_DSH; - /* Initialize the LWLock tranche for the DSA. */ - dsa_state->tranche = LWLockNewTrancheId(); - sprintf(dsa_state->tranche_name, "%s%s", name, DSMR_DSA_TRANCHE_SUFFIX); - LWLockRegisterTranche(dsa_state->tranche, dsa_state->tranche_name); - - /* Initialize the LWLock tranche for the dshash table. */ + /* Initialize the LWLock tranche for the hash table. */ dsh_state->tranche = LWLockNewTrancheId(); - strcpy(dsh_state->tranche_name, name); - LWLockRegisterTranche(dsh_state->tranche, dsh_state->tranche_name); + LWLockRegisterTranche(dsh_state->tranche, name); /* Initialize the DSA for the hash table. */ - dsa = dsa_create(dsa_state->tranche); + dsa = dsa_create(dsh_state->tranche); dsa_pin(dsa); dsa_pin_mapping(dsa); @@ -409,34 +392,32 @@ GetNamedDSHash(const char *name, const dshash_parameters *params, bool *found) ret = dshash_create(dsa, ¶ms_copy, NULL); /* Store handles for other backends to use. */ - dsa_state->handle = dsa_get_handle(dsa); - dsh_state->handle = dshash_get_hash_table_handle(ret); + dsh_state->dsa_handle = dsa_get_handle(dsa); + dsh_state->dsh_handle = dshash_get_hash_table_handle(ret); } else if (entry->type != DSMR_ENTRY_TYPE_DSH) ereport(ERROR, (errmsg("requested DSHash does not match type of existing entry"))); else { - NamedDSAState *dsa_state = &entry->data.dsh.dsa; NamedDSHState *dsh_state = &entry->data.dsh; dsa_area *dsa; /* XXX: Should we verify params matches what table was created with? */ - if (dsa_is_attached(dsa_state->handle)) + if (dsa_is_attached(dsh_state->dsa_handle)) ereport(ERROR, (errmsg("requested DSHash already attached to current process"))); - /* Initialize existing LWLock tranches for the DSA and dshash table. */ - LWLockRegisterTranche(dsa_state->tranche, dsa_state->tranche_name); - LWLockRegisterTranche(dsh_state->tranche, dsh_state->tranche_name); + /* Initialize existing LWLock tranche for the hash table. */ + LWLockRegisterTranche(dsh_state->tranche, name); /* Attach to existing DSA for the hash table. */ - dsa = dsa_attach(dsa_state->handle); + dsa = dsa_attach(dsh_state->dsa_handle); dsa_pin_mapping(dsa); /* Attach to existing dshash table. */ - ret = dshash_attach(dsa, params, dsh_state->handle, NULL); + ret = dshash_attach(dsa, params, dsh_state->dsh_handle, NULL); } dshash_release_lock(dsm_registry_table, entry);