Skip to content

Commit aa86129

Browse files
author
Richard Guo
committed
Support "Right Semi Join" plan shapes
Hash joins can support semijoin with the LHS input on the right, using the existing logic for inner join, combined with the assurance that only the first match for each inner tuple is considered, which can be achieved by leveraging the HEAP_TUPLE_HAS_MATCH flag. This can be very useful in some cases since we may now have the option to hash the smaller table instead of the larger. Merge join could likely support "Right Semi Join" too. However, the benefit of swapping inputs tends to be small here, so we do not address that in this patch. Note that this patch also modifies a test query in join.sql to ensure it continues testing as intended. With this patch the original query would result in a right-semi-join rather than semi-join, compromising its original purpose of testing the fix for neqjoinsel's behavior for semi-joins. Author: Richard Guo Reviewed-by: wenhui qiu, Alena Rybakina, Japin Li Discussion: https://postgr.es/m/CAMbWs4_X1mN=ic+SxcyymUqFx9bB8pqSLTGJ-F=MHy4PW3eRXw@mail.gmail.com
1 parent 5a519ab commit aa86129

File tree

14 files changed

+220
-171
lines changed

14 files changed

+220
-171
lines changed

contrib/postgres_fdw/expected/postgres_fdw.out

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4127,13 +4127,16 @@ RESET enable_sort;
41274127
-- subquery using immutable function (can be sent to remote)
41284128
PREPARE st3(int) AS SELECT * FROM ft1 t1 WHERE t1.c1 < $2 AND t1.c3 IN (SELECT c3 FROM ft2 t2 WHERE c1 > $1 AND date(c5) = '1970-01-17'::date) ORDER BY c1;
41294129
EXPLAIN (VERBOSE, COSTS OFF) EXECUTE st3(10, 20);
4130-
QUERY PLAN
4131-
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
4132-
Foreign Scan
4130+
QUERY PLAN
4131+
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
4132+
Sort
41334133
Output: t1.c1, t1.c2, t1.c3, t1.c4, t1.c5, t1.c6, t1.c7, t1.c8
4134-
Relations: (public.ft1 t1) SEMI JOIN (public.ft2 t2)
4135-
Remote SQL: SELECT r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8 FROM "S 1"."T 1" r1 WHERE ((r1."C 1" < 20)) AND EXISTS (SELECT NULL FROM "S 1"."T 1" r3 WHERE ((r3."C 1" > 10)) AND ((date(r3.c5) = '1970-01-17'::date)) AND ((r3.c3 = r1.c3))) ORDER BY r1."C 1" ASC NULLS LAST
4136-
(4 rows)
4134+
Sort Key: t1.c1
4135+
-> Foreign Scan
4136+
Output: t1.c1, t1.c2, t1.c3, t1.c4, t1.c5, t1.c6, t1.c7, t1.c8
4137+
Relations: (public.ft1 t1) SEMI JOIN (public.ft2 t2)
4138+
Remote SQL: SELECT r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8 FROM "S 1"."T 1" r1 WHERE ((r1."C 1" < 20)) AND EXISTS (SELECT NULL FROM "S 1"."T 1" r3 WHERE ((r3."C 1" > 10)) AND ((date(r3.c5) = '1970-01-17'::date)) AND ((r3.c3 = r1.c3)))
4139+
(7 rows)
41374140

41384141
EXECUTE st3(10, 20);
41394142
c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8

src/backend/commands/explain.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1749,6 +1749,9 @@ ExplainNode(PlanState *planstate, List *ancestors,
17491749
case JOIN_ANTI:
17501750
jointype = "Anti";
17511751
break;
1752+
case JOIN_RIGHT_SEMI:
1753+
jointype = "Right Semi";
1754+
break;
17521755
case JOIN_RIGHT_ANTI:
17531756
jointype = "Right Anti";
17541757
break;

src/backend/executor/nodeHashjoin.c

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -533,6 +533,14 @@ ExecHashJoinImpl(PlanState *pstate, bool parallel)
533533
}
534534
}
535535

536+
/*
537+
* In a right-semijoin, we only need the first match for each
538+
* inner tuple.
539+
*/
540+
if (node->js.jointype == JOIN_RIGHT_SEMI &&
541+
HeapTupleHeaderHasMatch(HJTUPLE_MINTUPLE(node->hj_CurTuple)))
542+
continue;
543+
536544
/*
537545
* We've got a match, but still need to test non-hashed quals.
538546
* ExecScanHashBucket already set up all the state needed to
@@ -549,10 +557,10 @@ ExecHashJoinImpl(PlanState *pstate, bool parallel)
549557
{
550558
node->hj_MatchedOuter = true;
551559

552-
553560
/*
554-
* This is really only needed if HJ_FILL_INNER(node), but
555-
* we'll avoid the branch and just set it always.
561+
* This is really only needed if HJ_FILL_INNER(node) or if
562+
* we are in a right-semijoin, but we'll avoid the branch
563+
* and just set it always.
556564
*/
557565
if (!HeapTupleHeaderHasMatch(HJTUPLE_MINTUPLE(node->hj_CurTuple)))
558566
HeapTupleHeaderSetMatch(HJTUPLE_MINTUPLE(node->hj_CurTuple));
@@ -779,6 +787,7 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags)
779787
{
780788
case JOIN_INNER:
781789
case JOIN_SEMI:
790+
case JOIN_RIGHT_SEMI:
782791
break;
783792
case JOIN_LEFT:
784793
case JOIN_ANTI:

src/backend/optimizer/path/joinpath.c

Lines changed: 30 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -288,8 +288,8 @@ add_paths_to_joinrel(PlannerInfo *root,
288288
* sorted. This includes both nestloops and mergejoins where the outer
289289
* path is already ordered. Again, skip this if we can't mergejoin.
290290
* (That's okay because we know that nestloop can't handle
291-
* right/right-anti/full joins at all, so it wouldn't work in the
292-
* prohibited cases either.)
291+
* right/right-anti/right-semi/full joins at all, so it wouldn't work in
292+
* the prohibited cases either.)
293293
*/
294294
if (mergejoin_allowed)
295295
match_unsorted_outer(root, joinrel, outerrel, innerrel,
@@ -1728,6 +1728,13 @@ match_unsorted_outer(PlannerInfo *root,
17281728
Path *matpath = NULL;
17291729
ListCell *lc1;
17301730

1731+
/*
1732+
* For now we do not support RIGHT_SEMI join in mergejoin or nestloop
1733+
* join.
1734+
*/
1735+
if (jointype == JOIN_RIGHT_SEMI)
1736+
return;
1737+
17311738
/*
17321739
* Nestloop only supports inner, left, semi, and anti joins. Also, if we
17331740
* are doing a right, right-anti or full mergejoin, we must use *all* the
@@ -2297,12 +2304,13 @@ hash_inner_and_outer(PlannerInfo *root,
22972304
* total inner path will also be parallel-safe, but if not, we'll
22982305
* have to search for the cheapest safe, unparameterized inner
22992306
* path. If doing JOIN_UNIQUE_INNER, we can't use any alternative
2300-
* inner path. If full, right, or right-anti join, we can't use
2301-
* parallelism (building the hash table in each backend) because
2302-
* no one process has all the match bits.
2307+
* inner path. If full, right, right-semi or right-anti join, we
2308+
* can't use parallelism (building the hash table in each backend)
2309+
* because no one process has all the match bits.
23032310
*/
23042311
if (save_jointype == JOIN_FULL ||
23052312
save_jointype == JOIN_RIGHT ||
2313+
save_jointype == JOIN_RIGHT_SEMI ||
23062314
save_jointype == JOIN_RIGHT_ANTI)
23072315
cheapest_safe_inner = NULL;
23082316
else if (cheapest_total_inner->parallel_safe)
@@ -2327,13 +2335,13 @@ hash_inner_and_outer(PlannerInfo *root,
23272335
* Returns a list of RestrictInfo nodes for those clauses.
23282336
*
23292337
* *mergejoin_allowed is normally set to true, but it is set to false if
2330-
* this is a right/right-anti/full join and there are nonmergejoinable join
2331-
* clauses. The executor's mergejoin machinery cannot handle such cases, so
2332-
* we have to avoid generating a mergejoin plan. (Note that this flag does
2333-
* NOT consider whether there are actually any mergejoinable clauses. This is
2334-
* correct because in some cases we need to build a clauseless mergejoin.
2335-
* Simply returning NIL is therefore not enough to distinguish safe from
2336-
* unsafe cases.)
2338+
* this is a right-semi join, or this is a right/right-anti/full join and
2339+
* there are nonmergejoinable join clauses. The executor's mergejoin
2340+
* machinery cannot handle such cases, so we have to avoid generating a
2341+
* mergejoin plan. (Note that this flag does NOT consider whether there are
2342+
* actually any mergejoinable clauses. This is correct because in some
2343+
* cases we need to build a clauseless mergejoin. Simply returning NIL is
2344+
* therefore not enough to distinguish safe from unsafe cases.)
23372345
*
23382346
* We also mark each selected RestrictInfo to show which side is currently
23392347
* being considered as outer. These are transient markings that are only
@@ -2357,6 +2365,16 @@ select_mergejoin_clauses(PlannerInfo *root,
23572365
bool have_nonmergeable_joinclause = false;
23582366
ListCell *l;
23592367

2368+
/*
2369+
* For now we do not support RIGHT_SEMI join in mergejoin: the benefit of
2370+
* swapping inputs tends to be small here.
2371+
*/
2372+
if (jointype == JOIN_RIGHT_SEMI)
2373+
{
2374+
*mergejoin_allowed = false;
2375+
return NIL;
2376+
}
2377+
23602378
foreach(l, restrictlist)
23612379
{
23622380
RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);

src/backend/optimizer/path/joinrels.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -991,6 +991,9 @@ populate_joinrel_with_paths(PlannerInfo *root, RelOptInfo *rel1,
991991
add_paths_to_joinrel(root, joinrel, rel1, rel2,
992992
JOIN_SEMI, sjinfo,
993993
restrictlist);
994+
add_paths_to_joinrel(root, joinrel, rel2, rel1,
995+
JOIN_RIGHT_SEMI, sjinfo,
996+
restrictlist);
994997
}
995998

996999
/*

src/backend/optimizer/path/pathkeys.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1294,6 +1294,9 @@ build_join_pathkeys(PlannerInfo *root,
12941294
JoinType jointype,
12951295
List *outer_pathkeys)
12961296
{
1297+
/* RIGHT_SEMI should not come here */
1298+
Assert(jointype != JOIN_RIGHT_SEMI);
1299+
12971300
if (jointype == JOIN_FULL ||
12981301
jointype == JOIN_RIGHT ||
12991302
jointype == JOIN_RIGHT_ANTI)

src/backend/optimizer/prep/prepjointree.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -455,8 +455,8 @@ pull_up_sublinks_jointree_recurse(PlannerInfo *root, Node *jtnode,
455455
* point of the available_rels machinations is to ensure that we only
456456
* pull up quals for which that's okay.
457457
*
458-
* We don't expect to see any pre-existing JOIN_SEMI, JOIN_ANTI, or
459-
* JOIN_RIGHT_ANTI jointypes here.
458+
* We don't expect to see any pre-existing JOIN_SEMI, JOIN_ANTI,
459+
* JOIN_RIGHT_SEMI, or JOIN_RIGHT_ANTI jointypes here.
460460
*/
461461
switch (j->jointype)
462462
{
@@ -2950,7 +2950,7 @@ reduce_outer_joins_pass2(Node *jtnode,
29502950
* These could only have been introduced by pull_up_sublinks,
29512951
* so there's no way that upper quals could refer to their
29522952
* righthand sides, and no point in checking. We don't expect
2953-
* to see JOIN_RIGHT_ANTI yet.
2953+
* to see JOIN_RIGHT_SEMI or JOIN_RIGHT_ANTI yet.
29542954
*/
29552955
break;
29562956
default:

src/include/nodes/nodes.h

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,7 @@ typedef enum JoinType
306306
*/
307307
JOIN_SEMI, /* 1 copy of each LHS row that has match(es) */
308308
JOIN_ANTI, /* 1 copy of each LHS row that has no match */
309+
JOIN_RIGHT_SEMI, /* 1 copy of each RHS row that has match(es) */
309310
JOIN_RIGHT_ANTI, /* 1 copy of each RHS row that has no match */
310311

311312
/*
@@ -322,10 +323,10 @@ typedef enum JoinType
322323

323324
/*
324325
* OUTER joins are those for which pushed-down quals must behave differently
325-
* from the join's own quals. This is in fact everything except INNER and
326-
* SEMI joins. However, this macro must also exclude the JOIN_UNIQUE symbols
327-
* since those are temporary proxies for what will eventually be an INNER
328-
* join.
326+
* from the join's own quals. This is in fact everything except INNER, SEMI
327+
* and RIGHT_SEMI joins. However, this macro must also exclude the
328+
* JOIN_UNIQUE symbols since those are temporary proxies for what will
329+
* eventually be an INNER join.
329330
*
330331
* Note: semijoins are a hybrid case, but we choose to treat them as not
331332
* being outer joins. This is okay principally because the SQL syntax makes

src/include/nodes/pathnodes.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2823,9 +2823,9 @@ typedef struct PlaceHolderVar
28232823
* min_lefthand and min_righthand for higher joins.)
28242824
*
28252825
* jointype is never JOIN_RIGHT; a RIGHT JOIN is handled by switching
2826-
* the inputs to make it a LEFT JOIN. It's never JOIN_RIGHT_ANTI either.
2827-
* So the allowed values of jointype in a join_info_list member are only
2828-
* LEFT, FULL, SEMI, or ANTI.
2826+
* the inputs to make it a LEFT JOIN. It's never JOIN_RIGHT_SEMI or
2827+
* JOIN_RIGHT_ANTI either. So the allowed values of jointype in a
2828+
* join_info_list member are only LEFT, FULL, SEMI, or ANTI.
28292829
*
28302830
* ojrelid is the RT index of the join RTE representing this outer join,
28312831
* if there is one. It is zero when jointype is INNER or SEMI, and can be

src/test/regress/expected/join.out

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1905,23 +1905,24 @@ SELECT *
19051905
-- semijoin selectivity for <>
19061906
--
19071907
explain (costs off)
1908-
select * from int4_tbl i4, tenk1 a
1909-
where exists(select * from tenk1 b
1910-
where a.twothousand = b.twothousand and a.fivethous <> b.fivethous)
1911-
and i4.f1 = a.tenthous;
1912-
QUERY PLAN
1913-
----------------------------------------------
1908+
select * from tenk1 a, tenk1 b
1909+
where exists(select * from tenk1 c
1910+
where b.twothousand = c.twothousand and b.fivethous <> c.fivethous)
1911+
and a.tenthous = b.tenthous and a.tenthous < 5000;
1912+
QUERY PLAN
1913+
-----------------------------------------------
19141914
Hash Semi Join
1915-
Hash Cond: (a.twothousand = b.twothousand)
1916-
Join Filter: (a.fivethous <> b.fivethous)
1915+
Hash Cond: (b.twothousand = c.twothousand)
1916+
Join Filter: (b.fivethous <> c.fivethous)
19171917
-> Hash Join
1918-
Hash Cond: (a.tenthous = i4.f1)
1919-
-> Seq Scan on tenk1 a
1918+
Hash Cond: (b.tenthous = a.tenthous)
1919+
-> Seq Scan on tenk1 b
19201920
-> Hash
1921-
-> Seq Scan on int4_tbl i4
1921+
-> Seq Scan on tenk1 a
1922+
Filter: (tenthous < 5000)
19221923
-> Hash
1923-
-> Seq Scan on tenk1 b
1924-
(10 rows)
1924+
-> Seq Scan on tenk1 c
1925+
(11 rows)
19251926

19261927
--
19271928
-- More complicated constructs

0 commit comments

Comments
 (0)