Skip to content

Commit 054701a

Browse files
committed
Disallow partitionwise join when collations don't match
If the collation of any join key column doesn’t match the collation of the corresponding partition key, partitionwise joins can yield incorrect results. For example, rows that would match under the join key collation might be located in different partitions due to the partitioning collation. In such cases, a partitionwise join would yield different results from a non-partitionwise join, so disallow it in such cases. Reported-by: Tender Wang <tndrwang@gmail.com> Author: Jian He <jian.universality@gmail.com> Reviewed-by: Tender Wang <tndrwang@gmail.com> Reviewed-by: Junwang Zhao <zhjwpku@gmail.com> Discussion: https://postgr.es/m/CAHewXNno_HKiQ6PqyLYfuqDtwp7KKHZiH1J7Pqyz0nr+PS2Dwg@mail.gmail.com Backpatch-through: 12
1 parent ff65f69 commit 054701a

File tree

3 files changed

+144
-0
lines changed

3 files changed

+144
-0
lines changed

src/backend/optimizer/util/relnode.c

+4
Original file line numberDiff line numberDiff line change
@@ -1792,6 +1792,10 @@ have_partkey_equi_join(RelOptInfo *joinrel,
17921792
if (ipk1 != ipk2)
17931793
continue;
17941794

1795+
/* Reject if the partition key collation differs from the clause's. */
1796+
if (rel1->part_scheme->partcollation[ipk1] != opexpr->inputcollid)
1797+
return false;
1798+
17951799
/*
17961800
* The clause allows partitionwise join only if it uses the same
17971801
* operator family as that specified by the partition key.

src/test/regress/expected/collate.icu.utf8.out

+118
Original file line numberDiff line numberDiff line change
@@ -2036,6 +2036,124 @@ SELECT c collate "C", count(c) FROM pagg_tab3 GROUP BY c collate "C" ORDER BY 1;
20362036
b | 5
20372037
(4 rows)
20382038

2039+
-- Partitionwise join should not be allowed too when the collation used by the
2040+
-- join keys doesn't match the partition key collation.
2041+
SET enable_partitionwise_join TO false;
2042+
EXPLAIN (COSTS OFF)
2043+
SELECT t1.c, count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c GROUP BY 1 ORDER BY t1.c COLLATE "C";
2044+
QUERY PLAN
2045+
-------------------------------------------------------------
2046+
Sort
2047+
Sort Key: t1.c COLLATE "C"
2048+
-> HashAggregate
2049+
Group Key: t1.c
2050+
-> Hash Join
2051+
Hash Cond: (t1.c = t2.c)
2052+
-> Append
2053+
-> Seq Scan on pagg_tab3_p2 t1_1
2054+
-> Seq Scan on pagg_tab3_p1 t1_2
2055+
-> Hash
2056+
-> Append
2057+
-> Seq Scan on pagg_tab3_p2 t2_1
2058+
-> Seq Scan on pagg_tab3_p1 t2_2
2059+
(13 rows)
2060+
2061+
SELECT t1.c, count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c GROUP BY 1 ORDER BY t1.c COLLATE "C";
2062+
c | count
2063+
---+-------
2064+
A | 100
2065+
B | 100
2066+
(2 rows)
2067+
2068+
SET enable_partitionwise_join TO true;
2069+
EXPLAIN (COSTS OFF)
2070+
SELECT t1.c, count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c GROUP BY 1 ORDER BY t1.c COLLATE "C";
2071+
QUERY PLAN
2072+
-------------------------------------------------------------
2073+
Sort
2074+
Sort Key: t1.c COLLATE "C"
2075+
-> HashAggregate
2076+
Group Key: t1.c
2077+
-> Hash Join
2078+
Hash Cond: (t1.c = t2.c)
2079+
-> Append
2080+
-> Seq Scan on pagg_tab3_p2 t1_1
2081+
-> Seq Scan on pagg_tab3_p1 t1_2
2082+
-> Hash
2083+
-> Append
2084+
-> Seq Scan on pagg_tab3_p2 t2_1
2085+
-> Seq Scan on pagg_tab3_p1 t2_2
2086+
(13 rows)
2087+
2088+
SELECT t1.c, count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c GROUP BY 1 ORDER BY t1.c COLLATE "C";
2089+
c | count
2090+
---+-------
2091+
A | 100
2092+
B | 100
2093+
(2 rows)
2094+
2095+
-- OK when the join clause uses the same collation as the partition key.
2096+
EXPLAIN (COSTS OFF)
2097+
SELECT t1.c COLLATE "C", count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c COLLATE "C" GROUP BY t1.c COLLATE "C" ORDER BY t1.c COLLATE "C";
2098+
QUERY PLAN
2099+
------------------------------------------------------------------
2100+
Sort
2101+
Sort Key: ((t1.c)::text) COLLATE "C"
2102+
-> Append
2103+
-> HashAggregate
2104+
Group Key: (t1.c)::text
2105+
-> Hash Join
2106+
Hash Cond: ((t1.c)::text = (t2.c)::text)
2107+
-> Seq Scan on pagg_tab3_p2 t1
2108+
-> Hash
2109+
-> Seq Scan on pagg_tab3_p2 t2
2110+
-> HashAggregate
2111+
Group Key: (t1_1.c)::text
2112+
-> Hash Join
2113+
Hash Cond: ((t1_1.c)::text = (t2_1.c)::text)
2114+
-> Seq Scan on pagg_tab3_p1 t1_1
2115+
-> Hash
2116+
-> Seq Scan on pagg_tab3_p1 t2_1
2117+
(17 rows)
2118+
2119+
SELECT t1.c COLLATE "C", count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c COLLATE "C" GROUP BY t1.c COLLATE "C" ORDER BY t1.c COLLATE "C";
2120+
c | count
2121+
---+-------
2122+
A | 25
2123+
B | 25
2124+
a | 25
2125+
b | 25
2126+
(4 rows)
2127+
2128+
SET enable_partitionwise_join TO false;
2129+
EXPLAIN (COSTS OFF)
2130+
SELECT t1.c COLLATE "C", count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c COLLATE "C" GROUP BY t1.c COLLATE "C" ORDER BY t1.c COLLATE "C";
2131+
QUERY PLAN
2132+
-------------------------------------------------------------
2133+
Sort
2134+
Sort Key: ((t1.c)::text) COLLATE "C"
2135+
-> HashAggregate
2136+
Group Key: (t1.c)::text
2137+
-> Hash Join
2138+
Hash Cond: ((t1.c)::text = (t2.c)::text)
2139+
-> Append
2140+
-> Seq Scan on pagg_tab3_p2 t1_1
2141+
-> Seq Scan on pagg_tab3_p1 t1_2
2142+
-> Hash
2143+
-> Append
2144+
-> Seq Scan on pagg_tab3_p2 t2_1
2145+
-> Seq Scan on pagg_tab3_p1 t2_2
2146+
(13 rows)
2147+
2148+
SELECT t1.c COLLATE "C", count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c COLLATE "C" GROUP BY t1.c COLLATE "C" ORDER BY t1.c COLLATE "C";
2149+
c | count
2150+
---+-------
2151+
A | 25
2152+
B | 25
2153+
a | 25
2154+
b | 25
2155+
(4 rows)
2156+
20392157
DROP TABLE pagg_tab3;
20402158
RESET enable_partitionwise_aggregate;
20412159
RESET max_parallel_workers_per_gather;

src/test/regress/sql/collate.icu.utf8.sql

+22
Original file line numberDiff line numberDiff line change
@@ -779,6 +779,28 @@ EXPLAIN (COSTS OFF)
779779
SELECT c collate "C", count(c) FROM pagg_tab3 GROUP BY c collate "C" ORDER BY 1;
780780
SELECT c collate "C", count(c) FROM pagg_tab3 GROUP BY c collate "C" ORDER BY 1;
781781

782+
-- Partitionwise join should not be allowed too when the collation used by the
783+
-- join keys doesn't match the partition key collation.
784+
SET enable_partitionwise_join TO false;
785+
EXPLAIN (COSTS OFF)
786+
SELECT t1.c, count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c GROUP BY 1 ORDER BY t1.c COLLATE "C";
787+
SELECT t1.c, count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c GROUP BY 1 ORDER BY t1.c COLLATE "C";
788+
789+
SET enable_partitionwise_join TO true;
790+
EXPLAIN (COSTS OFF)
791+
SELECT t1.c, count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c GROUP BY 1 ORDER BY t1.c COLLATE "C";
792+
SELECT t1.c, count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c GROUP BY 1 ORDER BY t1.c COLLATE "C";
793+
794+
-- OK when the join clause uses the same collation as the partition key.
795+
EXPLAIN (COSTS OFF)
796+
SELECT t1.c COLLATE "C", count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c COLLATE "C" GROUP BY t1.c COLLATE "C" ORDER BY t1.c COLLATE "C";
797+
SELECT t1.c COLLATE "C", count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c COLLATE "C" GROUP BY t1.c COLLATE "C" ORDER BY t1.c COLLATE "C";
798+
799+
SET enable_partitionwise_join TO false;
800+
EXPLAIN (COSTS OFF)
801+
SELECT t1.c COLLATE "C", count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c COLLATE "C" GROUP BY t1.c COLLATE "C" ORDER BY t1.c COLLATE "C";
802+
SELECT t1.c COLLATE "C", count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c COLLATE "C" GROUP BY t1.c COLLATE "C" ORDER BY t1.c COLLATE "C";
803+
782804
DROP TABLE pagg_tab3;
783805

784806
RESET enable_partitionwise_aggregate;

0 commit comments

Comments
 (0)