Skip to content

Commit 075acdd

Browse files
committed
Disallow partitionwise join when collations don't match
If the collation of any join key column doesn’t match the collation of the corresponding partition key, partitionwise joins can yield incorrect results. For example, rows that would match under the join key collation might be located in different partitions due to the partitioning collation. In such cases, a partitionwise join would yield different results from a non-partitionwise join, so disallow it in such cases. Reported-by: Tender Wang <tndrwang@gmail.com> Author: Jian He <jian.universality@gmail.com> Reviewed-by: Tender Wang <tndrwang@gmail.com> Reviewed-by: Junwang Zhao <zhjwpku@gmail.com> Discussion: https://postgr.es/m/CAHewXNno_HKiQ6PqyLYfuqDtwp7KKHZiH1J7Pqyz0nr+PS2Dwg@mail.gmail.com Backpatch-through: 12
1 parent 90fe625 commit 075acdd

File tree

3 files changed

+324
-2
lines changed

3 files changed

+324
-2
lines changed

src/backend/optimizer/util/relnode.c

+26-2
Original file line numberDiff line numberDiff line change
@@ -2185,6 +2185,10 @@ have_partkey_equi_join(PlannerInfo *root, RelOptInfo *joinrel,
21852185
if (pk_known_equal[ipk1])
21862186
continue;
21872187

2188+
/* Reject if the partition key collation differs from the clause's. */
2189+
if (rel1->part_scheme->partcollation[ipk1] != opexpr->inputcollid)
2190+
return false;
2191+
21882192
/*
21892193
* The clause allows partitionwise join only if it uses the same
21902194
* operator family as that specified by the partition key.
@@ -2258,15 +2262,35 @@ have_partkey_equi_join(PlannerInfo *root, RelOptInfo *joinrel,
22582262
{
22592263
Node *expr1 = (Node *) lfirst(lc);
22602264
ListCell *lc2;
2265+
Oid partcoll1 = rel1->part_scheme->partcollation[ipk];
2266+
Oid exprcoll1 = exprCollation(expr1);
22612267

22622268
foreach(lc2, rel2->partexprs[ipk])
22632269
{
22642270
Node *expr2 = (Node *) lfirst(lc2);
22652271

22662272
if (exprs_known_equal(root, expr1, expr2, btree_opfamily))
22672273
{
2268-
pk_known_equal[ipk] = true;
2269-
break;
2274+
/*
2275+
* Ensure that the collation of the expression matches
2276+
* that of the partition key. Checking just one collation
2277+
* (partcoll1 and exprcoll1) suffices because partcoll1
2278+
* and partcoll2, as well as exprcoll1 and exprcoll2,
2279+
* should be identical. This holds because both rel1 and
2280+
* rel2 use the same PartitionScheme and expr1 and expr2
2281+
* are equal.
2282+
*/
2283+
if (partcoll1 == exprcoll1)
2284+
{
2285+
Oid partcoll2 PG_USED_FOR_ASSERTS_ONLY =
2286+
rel2->part_scheme->partcollation[ipk];
2287+
Oid exprcoll2 PG_USED_FOR_ASSERTS_ONLY =
2288+
exprCollation(expr2);
2289+
2290+
Assert(partcoll2 == exprcoll2);
2291+
pk_known_equal[ipk] = true;
2292+
break;
2293+
}
22702294
}
22712295
}
22722296
if (pk_known_equal[ipk])

src/test/regress/expected/collate.icu.utf8.out

+234
Original file line numberDiff line numberDiff line change
@@ -2140,7 +2140,241 @@ SELECT c collate "C", count(c) FROM pagg_tab3 GROUP BY c collate "C" ORDER BY 1;
21402140
b | 5
21412141
(4 rows)
21422142

2143+
-- Partitionwise join should not be allowed too when the collation used by the
2144+
-- join keys doesn't match the partition key collation.
2145+
SET enable_partitionwise_join TO false;
2146+
EXPLAIN (COSTS OFF)
2147+
SELECT t1.c, count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c GROUP BY 1 ORDER BY t1.c COLLATE "C";
2148+
QUERY PLAN
2149+
-------------------------------------------------------------
2150+
Sort
2151+
Sort Key: t1.c COLLATE "C"
2152+
-> HashAggregate
2153+
Group Key: t1.c
2154+
-> Hash Join
2155+
Hash Cond: (t1.c = t2.c)
2156+
-> Append
2157+
-> Seq Scan on pagg_tab3_p2 t1_1
2158+
-> Seq Scan on pagg_tab3_p1 t1_2
2159+
-> Hash
2160+
-> Append
2161+
-> Seq Scan on pagg_tab3_p2 t2_1
2162+
-> Seq Scan on pagg_tab3_p1 t2_2
2163+
(13 rows)
2164+
2165+
SELECT t1.c, count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c GROUP BY 1 ORDER BY t1.c COLLATE "C";
2166+
c | count
2167+
---+-------
2168+
A | 100
2169+
B | 100
2170+
(2 rows)
2171+
2172+
SET enable_partitionwise_join TO true;
2173+
EXPLAIN (COSTS OFF)
2174+
SELECT t1.c, count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c GROUP BY 1 ORDER BY t1.c COLLATE "C";
2175+
QUERY PLAN
2176+
-------------------------------------------------------------
2177+
Sort
2178+
Sort Key: t1.c COLLATE "C"
2179+
-> HashAggregate
2180+
Group Key: t1.c
2181+
-> Hash Join
2182+
Hash Cond: (t1.c = t2.c)
2183+
-> Append
2184+
-> Seq Scan on pagg_tab3_p2 t1_1
2185+
-> Seq Scan on pagg_tab3_p1 t1_2
2186+
-> Hash
2187+
-> Append
2188+
-> Seq Scan on pagg_tab3_p2 t2_1
2189+
-> Seq Scan on pagg_tab3_p1 t2_2
2190+
(13 rows)
2191+
2192+
SELECT t1.c, count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c GROUP BY 1 ORDER BY t1.c COLLATE "C";
2193+
c | count
2194+
---+-------
2195+
A | 100
2196+
B | 100
2197+
(2 rows)
2198+
2199+
-- OK when the join clause uses the same collation as the partition key.
2200+
EXPLAIN (COSTS OFF)
2201+
SELECT t1.c COLLATE "C", count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c COLLATE "C" GROUP BY t1.c COLLATE "C" ORDER BY t1.c COLLATE "C";
2202+
QUERY PLAN
2203+
------------------------------------------------------------------
2204+
Sort
2205+
Sort Key: ((t1.c)::text) COLLATE "C"
2206+
-> Append
2207+
-> HashAggregate
2208+
Group Key: (t1.c)::text
2209+
-> Hash Join
2210+
Hash Cond: ((t1.c)::text = (t2.c)::text)
2211+
-> Seq Scan on pagg_tab3_p2 t1
2212+
-> Hash
2213+
-> Seq Scan on pagg_tab3_p2 t2
2214+
-> HashAggregate
2215+
Group Key: (t1_1.c)::text
2216+
-> Hash Join
2217+
Hash Cond: ((t1_1.c)::text = (t2_1.c)::text)
2218+
-> Seq Scan on pagg_tab3_p1 t1_1
2219+
-> Hash
2220+
-> Seq Scan on pagg_tab3_p1 t2_1
2221+
(17 rows)
2222+
2223+
SELECT t1.c COLLATE "C", count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c COLLATE "C" GROUP BY t1.c COLLATE "C" ORDER BY t1.c COLLATE "C";
2224+
c | count
2225+
---+-------
2226+
A | 25
2227+
B | 25
2228+
a | 25
2229+
b | 25
2230+
(4 rows)
2231+
2232+
SET enable_partitionwise_join TO false;
2233+
EXPLAIN (COSTS OFF)
2234+
SELECT t1.c COLLATE "C", count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c COLLATE "C" GROUP BY t1.c COLLATE "C" ORDER BY t1.c COLLATE "C";
2235+
QUERY PLAN
2236+
-------------------------------------------------------------
2237+
Sort
2238+
Sort Key: ((t1.c)::text) COLLATE "C"
2239+
-> HashAggregate
2240+
Group Key: (t1.c)::text
2241+
-> Hash Join
2242+
Hash Cond: ((t1.c)::text = (t2.c)::text)
2243+
-> Append
2244+
-> Seq Scan on pagg_tab3_p2 t1_1
2245+
-> Seq Scan on pagg_tab3_p1 t1_2
2246+
-> Hash
2247+
-> Append
2248+
-> Seq Scan on pagg_tab3_p2 t2_1
2249+
-> Seq Scan on pagg_tab3_p1 t2_2
2250+
(13 rows)
2251+
2252+
SELECT t1.c COLLATE "C", count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c COLLATE "C" GROUP BY t1.c COLLATE "C" ORDER BY t1.c COLLATE "C";
2253+
c | count
2254+
---+-------
2255+
A | 25
2256+
B | 25
2257+
a | 25
2258+
b | 25
2259+
(4 rows)
2260+
2261+
-- Few other cases where the joined partition keys are matched via equivalence
2262+
-- class, not a join restriction clause.
2263+
-- Collations of joined columns match, but the partition keys collation is different
2264+
SET enable_partitionwise_join TO true;
2265+
CREATE TABLE pagg_tab4 (c text collate case_insensitive, b text collate case_insensitive) PARTITION BY LIST (b collate "C");
2266+
CREATE TABLE pagg_tab4_p1 PARTITION OF pagg_tab4 FOR VALUES IN ('a', 'b');
2267+
CREATE TABLE pagg_tab4_p2 PARTITION OF pagg_tab4 FOR VALUES IN ('B', 'A');
2268+
INSERT INTO pagg_tab4 (b, c) SELECT substr('abAB', (i % 4) + 1 , 1), substr('abAB', (i % 2) + 1 , 1) FROM generate_series(0, 11) i;
2269+
ANALYZE pagg_tab4;
2270+
EXPLAIN (COSTS OFF)
2271+
SELECT t1.c, count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab4 t2 ON t1.c = t2.c AND t1.c = t2.b GROUP BY 1 ORDER BY t1.c COLLATE "C";
2272+
QUERY PLAN
2273+
-------------------------------------------------------------
2274+
Sort
2275+
Sort Key: t1.c COLLATE "C"
2276+
-> HashAggregate
2277+
Group Key: t1.c
2278+
-> Hash Join
2279+
Hash Cond: (t1.c = t2.c)
2280+
-> Append
2281+
-> Seq Scan on pagg_tab3_p2 t1_1
2282+
-> Seq Scan on pagg_tab3_p1 t1_2
2283+
-> Hash
2284+
-> Append
2285+
-> Seq Scan on pagg_tab4_p2 t2_1
2286+
Filter: (c = b)
2287+
-> Seq Scan on pagg_tab4_p1 t2_2
2288+
Filter: (c = b)
2289+
(15 rows)
2290+
2291+
SELECT t1.c, count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab4 t2 ON t1.c = t2.c AND t1.c = t2.b GROUP BY 1 ORDER BY t1.c COLLATE "C";
2292+
c | count
2293+
---+-------
2294+
A | 60
2295+
B | 60
2296+
(2 rows)
2297+
2298+
-- OK when the partition key collation is same as that of the join columns
2299+
CREATE TABLE pagg_tab5 (c text collate case_insensitive, b text collate case_insensitive) PARTITION BY LIST (c collate case_insensitive);
2300+
CREATE TABLE pagg_tab5_p1 PARTITION OF pagg_tab5 FOR VALUES IN ('a', 'b');
2301+
CREATE TABLE pagg_tab5_p2 PARTITION OF pagg_tab5 FOR VALUES IN ('c', 'd');
2302+
INSERT INTO pagg_tab5 (b, c) SELECT substr('abAB', (i % 4) + 1 , 1), substr('abAB', (i % 2) + 1 , 1) FROM generate_series(0, 5) i;
2303+
INSERT INTO pagg_tab5 (b, c) SELECT substr('cdCD', (i % 4) + 1 , 1), substr('cdCD', (i % 2) + 1 , 1) FROM generate_series(0, 5) i;
2304+
ANALYZE pagg_tab5;
2305+
CREATE TABLE pagg_tab6 (c text collate case_insensitive, b text collate case_insensitive) PARTITION BY LIST (b collate case_insensitive);
2306+
CREATE TABLE pagg_tab6_p1 PARTITION OF pagg_tab6 FOR VALUES IN ('a', 'b');
2307+
CREATE TABLE pagg_tab6_p2 PARTITION OF pagg_tab6 FOR VALUES IN ('c', 'd');
2308+
INSERT INTO pagg_tab6 (b, c) SELECT substr('abAB', (i % 4) + 1 , 1), substr('abAB', (i % 2) + 1 , 1) FROM generate_series(0, 5) i;
2309+
INSERT INTO pagg_tab6 (b, c) SELECT substr('cdCD', (i % 4) + 1 , 1), substr('cdCD', (i % 2) + 1 , 1) FROM generate_series(0, 5) i;
2310+
ANALYZE pagg_tab6;
2311+
EXPLAIN (COSTS OFF)
2312+
SELECT t1.c, count(t2.c) FROM pagg_tab5 t1 JOIN pagg_tab6 t2 ON t1.c = t2.c AND t1.c = t2.b GROUP BY 1 ORDER BY t1.c COLLATE "C";
2313+
QUERY PLAN
2314+
-------------------------------------------------------
2315+
Sort
2316+
Sort Key: t1.c COLLATE "C"
2317+
-> Append
2318+
-> HashAggregate
2319+
Group Key: t1.c
2320+
-> Nested Loop
2321+
Join Filter: (t1.c = t2.c)
2322+
-> Seq Scan on pagg_tab6_p1 t2
2323+
Filter: (c = b)
2324+
-> Seq Scan on pagg_tab5_p1 t1
2325+
-> HashAggregate
2326+
Group Key: t1_1.c
2327+
-> Nested Loop
2328+
Join Filter: (t1_1.c = t2_1.c)
2329+
-> Seq Scan on pagg_tab6_p2 t2_1
2330+
Filter: (c = b)
2331+
-> Seq Scan on pagg_tab5_p2 t1_1
2332+
(17 rows)
2333+
2334+
SELECT t1.c, count(t2.c) FROM pagg_tab5 t1 JOIN pagg_tab6 t2 ON t1.c = t2.c AND t1.c = t2.b GROUP BY 1 ORDER BY t1.c COLLATE "C";
2335+
c | count
2336+
---+-------
2337+
a | 9
2338+
b | 9
2339+
c | 9
2340+
d | 9
2341+
(4 rows)
2342+
2343+
SET enable_partitionwise_join TO false;
2344+
EXPLAIN (COSTS OFF)
2345+
SELECT t1.c, count(t2.c) FROM pagg_tab5 t1 JOIN pagg_tab6 t2 ON t1.c = t2.c AND t1.c = t2.b GROUP BY 1 ORDER BY t1.c COLLATE "C";
2346+
QUERY PLAN
2347+
-------------------------------------------------------------
2348+
Sort
2349+
Sort Key: t1.c COLLATE "C"
2350+
-> HashAggregate
2351+
Group Key: t1.c
2352+
-> Hash Join
2353+
Hash Cond: (t1.c = t2.c)
2354+
-> Append
2355+
-> Seq Scan on pagg_tab5_p1 t1_1
2356+
-> Seq Scan on pagg_tab5_p2 t1_2
2357+
-> Hash
2358+
-> Append
2359+
-> Seq Scan on pagg_tab6_p1 t2_1
2360+
Filter: (c = b)
2361+
-> Seq Scan on pagg_tab6_p2 t2_2
2362+
Filter: (c = b)
2363+
(15 rows)
2364+
2365+
SELECT t1.c, count(t2.c) FROM pagg_tab5 t1 JOIN pagg_tab6 t2 ON t1.c = t2.c AND t1.c = t2.b GROUP BY 1 ORDER BY t1.c COLLATE "C";
2366+
c | count
2367+
---+-------
2368+
a | 9
2369+
b | 9
2370+
c | 9
2371+
d | 9
2372+
(4 rows)
2373+
21432374
DROP TABLE pagg_tab3;
2375+
DROP TABLE pagg_tab4;
2376+
DROP TABLE pagg_tab5;
2377+
DROP TABLE pagg_tab6;
21442378
RESET enable_partitionwise_aggregate;
21452379
RESET max_parallel_workers_per_gather;
21462380
RESET enable_incremental_sort;

src/test/regress/sql/collate.icu.utf8.sql

+64
Original file line numberDiff line numberDiff line change
@@ -828,7 +828,71 @@ EXPLAIN (COSTS OFF)
828828
SELECT c collate "C", count(c) FROM pagg_tab3 GROUP BY c collate "C" ORDER BY 1;
829829
SELECT c collate "C", count(c) FROM pagg_tab3 GROUP BY c collate "C" ORDER BY 1;
830830

831+
-- Partitionwise join should not be allowed too when the collation used by the
832+
-- join keys doesn't match the partition key collation.
833+
SET enable_partitionwise_join TO false;
834+
EXPLAIN (COSTS OFF)
835+
SELECT t1.c, count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c GROUP BY 1 ORDER BY t1.c COLLATE "C";
836+
SELECT t1.c, count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c GROUP BY 1 ORDER BY t1.c COLLATE "C";
837+
838+
SET enable_partitionwise_join TO true;
839+
EXPLAIN (COSTS OFF)
840+
SELECT t1.c, count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c GROUP BY 1 ORDER BY t1.c COLLATE "C";
841+
SELECT t1.c, count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c GROUP BY 1 ORDER BY t1.c COLLATE "C";
842+
843+
-- OK when the join clause uses the same collation as the partition key.
844+
EXPLAIN (COSTS OFF)
845+
SELECT t1.c COLLATE "C", count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c COLLATE "C" GROUP BY t1.c COLLATE "C" ORDER BY t1.c COLLATE "C";
846+
SELECT t1.c COLLATE "C", count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c COLLATE "C" GROUP BY t1.c COLLATE "C" ORDER BY t1.c COLLATE "C";
847+
848+
SET enable_partitionwise_join TO false;
849+
EXPLAIN (COSTS OFF)
850+
SELECT t1.c COLLATE "C", count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c COLLATE "C" GROUP BY t1.c COLLATE "C" ORDER BY t1.c COLLATE "C";
851+
SELECT t1.c COLLATE "C", count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c COLLATE "C" GROUP BY t1.c COLLATE "C" ORDER BY t1.c COLLATE "C";
852+
853+
-- Few other cases where the joined partition keys are matched via equivalence
854+
-- class, not a join restriction clause.
855+
856+
-- Collations of joined columns match, but the partition keys collation is different
857+
SET enable_partitionwise_join TO true;
858+
CREATE TABLE pagg_tab4 (c text collate case_insensitive, b text collate case_insensitive) PARTITION BY LIST (b collate "C");
859+
CREATE TABLE pagg_tab4_p1 PARTITION OF pagg_tab4 FOR VALUES IN ('a', 'b');
860+
CREATE TABLE pagg_tab4_p2 PARTITION OF pagg_tab4 FOR VALUES IN ('B', 'A');
861+
INSERT INTO pagg_tab4 (b, c) SELECT substr('abAB', (i % 4) + 1 , 1), substr('abAB', (i % 2) + 1 , 1) FROM generate_series(0, 11) i;
862+
ANALYZE pagg_tab4;
863+
864+
EXPLAIN (COSTS OFF)
865+
SELECT t1.c, count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab4 t2 ON t1.c = t2.c AND t1.c = t2.b GROUP BY 1 ORDER BY t1.c COLLATE "C";
866+
SELECT t1.c, count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab4 t2 ON t1.c = t2.c AND t1.c = t2.b GROUP BY 1 ORDER BY t1.c COLLATE "C";
867+
868+
-- OK when the partition key collation is same as that of the join columns
869+
CREATE TABLE pagg_tab5 (c text collate case_insensitive, b text collate case_insensitive) PARTITION BY LIST (c collate case_insensitive);
870+
CREATE TABLE pagg_tab5_p1 PARTITION OF pagg_tab5 FOR VALUES IN ('a', 'b');
871+
CREATE TABLE pagg_tab5_p2 PARTITION OF pagg_tab5 FOR VALUES IN ('c', 'd');
872+
INSERT INTO pagg_tab5 (b, c) SELECT substr('abAB', (i % 4) + 1 , 1), substr('abAB', (i % 2) + 1 , 1) FROM generate_series(0, 5) i;
873+
INSERT INTO pagg_tab5 (b, c) SELECT substr('cdCD', (i % 4) + 1 , 1), substr('cdCD', (i % 2) + 1 , 1) FROM generate_series(0, 5) i;
874+
ANALYZE pagg_tab5;
875+
876+
CREATE TABLE pagg_tab6 (c text collate case_insensitive, b text collate case_insensitive) PARTITION BY LIST (b collate case_insensitive);
877+
CREATE TABLE pagg_tab6_p1 PARTITION OF pagg_tab6 FOR VALUES IN ('a', 'b');
878+
CREATE TABLE pagg_tab6_p2 PARTITION OF pagg_tab6 FOR VALUES IN ('c', 'd');
879+
INSERT INTO pagg_tab6 (b, c) SELECT substr('abAB', (i % 4) + 1 , 1), substr('abAB', (i % 2) + 1 , 1) FROM generate_series(0, 5) i;
880+
INSERT INTO pagg_tab6 (b, c) SELECT substr('cdCD', (i % 4) + 1 , 1), substr('cdCD', (i % 2) + 1 , 1) FROM generate_series(0, 5) i;
881+
ANALYZE pagg_tab6;
882+
883+
EXPLAIN (COSTS OFF)
884+
SELECT t1.c, count(t2.c) FROM pagg_tab5 t1 JOIN pagg_tab6 t2 ON t1.c = t2.c AND t1.c = t2.b GROUP BY 1 ORDER BY t1.c COLLATE "C";
885+
SELECT t1.c, count(t2.c) FROM pagg_tab5 t1 JOIN pagg_tab6 t2 ON t1.c = t2.c AND t1.c = t2.b GROUP BY 1 ORDER BY t1.c COLLATE "C";
886+
887+
SET enable_partitionwise_join TO false;
888+
EXPLAIN (COSTS OFF)
889+
SELECT t1.c, count(t2.c) FROM pagg_tab5 t1 JOIN pagg_tab6 t2 ON t1.c = t2.c AND t1.c = t2.b GROUP BY 1 ORDER BY t1.c COLLATE "C";
890+
SELECT t1.c, count(t2.c) FROM pagg_tab5 t1 JOIN pagg_tab6 t2 ON t1.c = t2.c AND t1.c = t2.b GROUP BY 1 ORDER BY t1.c COLLATE "C";
891+
831892
DROP TABLE pagg_tab3;
893+
DROP TABLE pagg_tab4;
894+
DROP TABLE pagg_tab5;
895+
DROP TABLE pagg_tab6;
832896

833897
RESET enable_partitionwise_aggregate;
834898
RESET max_parallel_workers_per_gather;

0 commit comments

Comments
 (0)