Skip to content

Commit 2cca95e

Browse files
committed
Improve NestLoopParam generation for lateral subqueries
It was possible in cases where we had a LATERAL joined subquery that when the same Var is mentioned in both the lateral references and in the outer Vars of the scan clauses that the given Var wouldn't be assigned to the same NestLoopParam. This could cause issues in Memoize as the cache key would reference the Var for the scan clauses but when the parameter for the lateral references changed some code in Memoize would see that some other parameter had changed that's not part of the cache key and end up purging the entire cache as a result, thinking the cache had become stale. This could result in a Nested Loop -> Memoize plan being quite inefficient as, in the worst case, the cache purging could result in never getting a cache hit. In no cases could this problem lead to incorrect query results. Here we switch the order of operations so that we create NestLoopParam for the lateral references first before doing replace_nestloop_params(). replace_nestloop_params() will find and reuse the existing NestLoopParam in cases where the Var exists in both locations. Author: Richard Guo Reviewed-by: Tom Lane, David Rowley Discussion: https://postgr.es/m/CAMbWs48XHJEK1Q1CzAQ7L9sTANTs9W1cepXu8%3DKc0quUL%2Btg4Q%40mail.gmail.com
1 parent f2743a7 commit 2cca95e

File tree

3 files changed

+66
-3
lines changed

3 files changed

+66
-3
lines changed

src/backend/optimizer/plan/createplan.c

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3720,13 +3720,22 @@ create_subqueryscan_plan(PlannerInfo *root, SubqueryScanPath *best_path,
37203720
/* Reduce RestrictInfo list to bare expressions; ignore pseudoconstants */
37213721
scan_clauses = extract_actual_clauses(scan_clauses, false);
37223722

3723-
/* Replace any outer-relation variables with nestloop params */
3723+
/*
3724+
* Replace any outer-relation variables with nestloop params.
3725+
*
3726+
* We must provide nestloop params for both lateral references of the
3727+
* subquery and outer vars in the scan_clauses. It's better to assign the
3728+
* former first, because that code path requires specific param IDs, while
3729+
* replace_nestloop_params can adapt to the IDs assigned by
3730+
* process_subquery_nestloop_params. This avoids possibly duplicating
3731+
* nestloop params when the same Var is needed for both reasons.
3732+
*/
37243733
if (best_path->path.param_info)
37253734
{
3726-
scan_clauses = (List *)
3727-
replace_nestloop_params(root, (Node *) scan_clauses);
37283735
process_subquery_nestloop_params(root,
37293736
rel->subplan_params);
3737+
scan_clauses = (List *)
3738+
replace_nestloop_params(root, (Node *) scan_clauses);
37303739
}
37313740

37323741
scan_plan = make_subqueryscan(tlist,

src/test/regress/expected/memoize.out

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,43 @@ WHERE t1.unique1 < 1000;
9292
1000 | 9.5000000000000000
9393
(1 row)
9494

95+
-- Try with LATERAL joins
96+
SELECT explain_memoize('
97+
SELECT COUNT(*),AVG(t2.t1two) FROM tenk1 t1 LEFT JOIN
98+
LATERAL (
99+
SELECT t1.two as t1two, * FROM tenk1 t2 WHERE t2.unique1 < 5 OFFSET 0
100+
) t2
101+
ON t1.two = t2.two
102+
WHERE t1.unique1 < 10;', false);
103+
explain_memoize
104+
----------------------------------------------------------------------------------------------
105+
Aggregate (actual rows=1 loops=N)
106+
-> Nested Loop Left Join (actual rows=25 loops=N)
107+
-> Index Scan using tenk1_unique1 on tenk1 t1 (actual rows=10 loops=N)
108+
Index Cond: (unique1 < 10)
109+
-> Memoize (actual rows=2 loops=N)
110+
Cache Key: t1.two, t1.two
111+
Cache Mode: binary
112+
Hits: 8 Misses: 2 Evictions: Zero Overflows: 0 Memory Usage: NkB
113+
-> Subquery Scan on t2 (actual rows=2 loops=N)
114+
Filter: (t1.two = t2.two)
115+
Rows Removed by Filter: 2
116+
-> Index Scan using tenk1_unique1 on tenk1 t2_1 (actual rows=5 loops=N)
117+
Index Cond: (unique1 < 5)
118+
(13 rows)
119+
120+
-- And check we get the expected results.
121+
SELECT COUNT(*),AVG(t2.t1two) FROM tenk1 t1 LEFT JOIN
122+
LATERAL (
123+
SELECT t1.two as t1two, * FROM tenk1 t2 WHERE t2.unique1 < 5 OFFSET 0
124+
) t2
125+
ON t1.two = t2.two
126+
WHERE t1.unique1 < 10;
127+
count | avg
128+
-------+------------------------
129+
25 | 0.40000000000000000000
130+
(1 row)
131+
95132
-- Reduce work_mem and hash_mem_multiplier so that we see some cache evictions
96133
SET work_mem TO '64kB';
97134
SET hash_mem_multiplier TO 1.0;

src/test/regress/sql/memoize.sql

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,23 @@ LATERAL (SELECT t2.unique1 FROM tenk1 t2
5757
WHERE t1.twenty = t2.unique1 OFFSET 0) t2
5858
WHERE t1.unique1 < 1000;
5959

60+
-- Try with LATERAL joins
61+
SELECT explain_memoize('
62+
SELECT COUNT(*),AVG(t2.t1two) FROM tenk1 t1 LEFT JOIN
63+
LATERAL (
64+
SELECT t1.two as t1two, * FROM tenk1 t2 WHERE t2.unique1 < 5 OFFSET 0
65+
) t2
66+
ON t1.two = t2.two
67+
WHERE t1.unique1 < 10;', false);
68+
69+
-- And check we get the expected results.
70+
SELECT COUNT(*),AVG(t2.t1two) FROM tenk1 t1 LEFT JOIN
71+
LATERAL (
72+
SELECT t1.two as t1two, * FROM tenk1 t2 WHERE t2.unique1 < 5 OFFSET 0
73+
) t2
74+
ON t1.two = t2.two
75+
WHERE t1.unique1 < 10;
76+
6077
-- Reduce work_mem and hash_mem_multiplier so that we see some cache evictions
6178
SET work_mem TO '64kB';
6279
SET hash_mem_multiplier TO 1.0;

0 commit comments

Comments
 (0)