Skip to content

Commit a0cdfc8

Browse files
committed
Disallow partitionwise join when collations don't match
If the collation of any join key column doesn’t match the collation of the corresponding partition key, partitionwise joins can yield incorrect results. For example, rows that would match under the join key collation might be located in different partitions due to the partitioning collation. In such cases, a partitionwise join would yield different results from a non-partitionwise join, so disallow it in such cases. Reported-by: Tender Wang <tndrwang@gmail.com> Author: Jian He <jian.universality@gmail.com> Reviewed-by: Tender Wang <tndrwang@gmail.com> Reviewed-by: Junwang Zhao <zhjwpku@gmail.com> Discussion: https://postgr.es/m/CAHewXNno_HKiQ6PqyLYfuqDtwp7KKHZiH1J7Pqyz0nr+PS2Dwg@mail.gmail.com Backpatch-through: 12
1 parent b6484ca commit a0cdfc8

File tree

3 files changed

+144
-0
lines changed

3 files changed

+144
-0
lines changed

src/backend/optimizer/util/relnode.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2188,6 +2188,10 @@ have_partkey_equi_join(PlannerInfo *root, RelOptInfo *joinrel,
21882188
if (ipk1 != ipk2)
21892189
continue;
21902190

2191+
/* Reject if the partition key collation differs from the clause's. */
2192+
if (rel1->part_scheme->partcollation[ipk1] != opexpr->inputcollid)
2193+
return false;
2194+
21912195
/*
21922196
* The clause allows partitionwise join only if it uses the same
21932197
* operator family as that specified by the partition key.

src/test/regress/expected/collate.icu.utf8.out

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2138,6 +2138,124 @@ SELECT c collate "C", count(c) FROM pagg_tab3 GROUP BY c collate "C" ORDER BY 1;
21382138
b | 5
21392139
(4 rows)
21402140

2141+
-- Partitionwise join should not be allowed too when the collation used by the
2142+
-- join keys doesn't match the partition key collation.
2143+
SET enable_partitionwise_join TO false;
2144+
EXPLAIN (COSTS OFF)
2145+
SELECT t1.c, count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c GROUP BY 1 ORDER BY t1.c COLLATE "C";
2146+
QUERY PLAN
2147+
-------------------------------------------------------------
2148+
Sort
2149+
Sort Key: t1.c COLLATE "C"
2150+
-> HashAggregate
2151+
Group Key: t1.c
2152+
-> Hash Join
2153+
Hash Cond: (t1.c = t2.c)
2154+
-> Append
2155+
-> Seq Scan on pagg_tab3_p2 t1_1
2156+
-> Seq Scan on pagg_tab3_p1 t1_2
2157+
-> Hash
2158+
-> Append
2159+
-> Seq Scan on pagg_tab3_p2 t2_1
2160+
-> Seq Scan on pagg_tab3_p1 t2_2
2161+
(13 rows)
2162+
2163+
SELECT t1.c, count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c GROUP BY 1 ORDER BY t1.c COLLATE "C";
2164+
c | count
2165+
---+-------
2166+
A | 100
2167+
B | 100
2168+
(2 rows)
2169+
2170+
SET enable_partitionwise_join TO true;
2171+
EXPLAIN (COSTS OFF)
2172+
SELECT t1.c, count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c GROUP BY 1 ORDER BY t1.c COLLATE "C";
2173+
QUERY PLAN
2174+
-------------------------------------------------------------
2175+
Sort
2176+
Sort Key: t1.c COLLATE "C"
2177+
-> HashAggregate
2178+
Group Key: t1.c
2179+
-> Hash Join
2180+
Hash Cond: (t1.c = t2.c)
2181+
-> Append
2182+
-> Seq Scan on pagg_tab3_p2 t1_1
2183+
-> Seq Scan on pagg_tab3_p1 t1_2
2184+
-> Hash
2185+
-> Append
2186+
-> Seq Scan on pagg_tab3_p2 t2_1
2187+
-> Seq Scan on pagg_tab3_p1 t2_2
2188+
(13 rows)
2189+
2190+
SELECT t1.c, count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c GROUP BY 1 ORDER BY t1.c COLLATE "C";
2191+
c | count
2192+
---+-------
2193+
A | 100
2194+
B | 100
2195+
(2 rows)
2196+
2197+
-- OK when the join clause uses the same collation as the partition key.
2198+
EXPLAIN (COSTS OFF)
2199+
SELECT t1.c COLLATE "C", count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c COLLATE "C" GROUP BY t1.c COLLATE "C" ORDER BY t1.c COLLATE "C";
2200+
QUERY PLAN
2201+
------------------------------------------------------------------
2202+
Sort
2203+
Sort Key: ((t1.c)::text) COLLATE "C"
2204+
-> Append
2205+
-> HashAggregate
2206+
Group Key: (t1.c)::text
2207+
-> Hash Join
2208+
Hash Cond: ((t1.c)::text = (t2.c)::text)
2209+
-> Seq Scan on pagg_tab3_p2 t1
2210+
-> Hash
2211+
-> Seq Scan on pagg_tab3_p2 t2
2212+
-> HashAggregate
2213+
Group Key: (t1_1.c)::text
2214+
-> Hash Join
2215+
Hash Cond: ((t1_1.c)::text = (t2_1.c)::text)
2216+
-> Seq Scan on pagg_tab3_p1 t1_1
2217+
-> Hash
2218+
-> Seq Scan on pagg_tab3_p1 t2_1
2219+
(17 rows)
2220+
2221+
SELECT t1.c COLLATE "C", count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c COLLATE "C" GROUP BY t1.c COLLATE "C" ORDER BY t1.c COLLATE "C";
2222+
c | count
2223+
---+-------
2224+
A | 25
2225+
B | 25
2226+
a | 25
2227+
b | 25
2228+
(4 rows)
2229+
2230+
SET enable_partitionwise_join TO false;
2231+
EXPLAIN (COSTS OFF)
2232+
SELECT t1.c COLLATE "C", count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c COLLATE "C" GROUP BY t1.c COLLATE "C" ORDER BY t1.c COLLATE "C";
2233+
QUERY PLAN
2234+
-------------------------------------------------------------
2235+
Sort
2236+
Sort Key: ((t1.c)::text) COLLATE "C"
2237+
-> HashAggregate
2238+
Group Key: (t1.c)::text
2239+
-> Hash Join
2240+
Hash Cond: ((t1.c)::text = (t2.c)::text)
2241+
-> Append
2242+
-> Seq Scan on pagg_tab3_p2 t1_1
2243+
-> Seq Scan on pagg_tab3_p1 t1_2
2244+
-> Hash
2245+
-> Append
2246+
-> Seq Scan on pagg_tab3_p2 t2_1
2247+
-> Seq Scan on pagg_tab3_p1 t2_2
2248+
(13 rows)
2249+
2250+
SELECT t1.c COLLATE "C", count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c COLLATE "C" GROUP BY t1.c COLLATE "C" ORDER BY t1.c COLLATE "C";
2251+
c | count
2252+
---+-------
2253+
A | 25
2254+
B | 25
2255+
a | 25
2256+
b | 25
2257+
(4 rows)
2258+
21412259
DROP TABLE pagg_tab3;
21422260
RESET enable_partitionwise_aggregate;
21432261
RESET max_parallel_workers_per_gather;

src/test/regress/sql/collate.icu.utf8.sql

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -828,6 +828,28 @@ EXPLAIN (COSTS OFF)
828828
SELECT c collate "C", count(c) FROM pagg_tab3 GROUP BY c collate "C" ORDER BY 1;
829829
SELECT c collate "C", count(c) FROM pagg_tab3 GROUP BY c collate "C" ORDER BY 1;
830830

831+
-- Partitionwise join should not be allowed too when the collation used by the
832+
-- join keys doesn't match the partition key collation.
833+
SET enable_partitionwise_join TO false;
834+
EXPLAIN (COSTS OFF)
835+
SELECT t1.c, count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c GROUP BY 1 ORDER BY t1.c COLLATE "C";
836+
SELECT t1.c, count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c GROUP BY 1 ORDER BY t1.c COLLATE "C";
837+
838+
SET enable_partitionwise_join TO true;
839+
EXPLAIN (COSTS OFF)
840+
SELECT t1.c, count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c GROUP BY 1 ORDER BY t1.c COLLATE "C";
841+
SELECT t1.c, count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c GROUP BY 1 ORDER BY t1.c COLLATE "C";
842+
843+
-- OK when the join clause uses the same collation as the partition key.
844+
EXPLAIN (COSTS OFF)
845+
SELECT t1.c COLLATE "C", count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c COLLATE "C" GROUP BY t1.c COLLATE "C" ORDER BY t1.c COLLATE "C";
846+
SELECT t1.c COLLATE "C", count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c COLLATE "C" GROUP BY t1.c COLLATE "C" ORDER BY t1.c COLLATE "C";
847+
848+
SET enable_partitionwise_join TO false;
849+
EXPLAIN (COSTS OFF)
850+
SELECT t1.c COLLATE "C", count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c COLLATE "C" GROUP BY t1.c COLLATE "C" ORDER BY t1.c COLLATE "C";
851+
SELECT t1.c COLLATE "C", count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c COLLATE "C" GROUP BY t1.c COLLATE "C" ORDER BY t1.c COLLATE "C";
852+
831853
DROP TABLE pagg_tab3;
832854

833855
RESET enable_partitionwise_aggregate;

0 commit comments

Comments
 (0)