Skip to content

Commit e629846

Browse files
committed
Fix incorrect accessing of pfree'd memory in Memoize
For pass-by-reference types, the code added in 0b053e7, which aimed to resolve a memory leak, was overly aggressive in resetting the per-tuple memory context which could result in pfree'd memory being accessed resulting in failing to find previously cached results in the hash table. What was happening was prepare_probe_slot() was switching to the per-tuple memory context and calling ExecEvalExpr(). ExecEvalExpr() may have required a memory allocation. Both MemoizeHash_hash() and MemoizeHash_equal() were aggressively resetting the per-tuple context and after determining the hash value, the context would have gotten reset before MemoizeHash_equal() was called. This could have resulted in MemoizeHash_equal() looking at pfree'd memory. This is less likely to have caused issues on a production build as some other allocation would have had to have reused the pfree'd memory to overwrite it. Otherwise, the original contents would have been intact. However, this clearly caused issues on MEMORY_CONTEXT_CHECKING builds. Author: Tender Wang, Andrei Lepikhov Reported-by: Tender Wang (using SQLancer) Reviewed-by: Andrei Lepikhov, Richard Guo, David Rowley Discussion: https://postgr.es/m/CAHewXNnT6N6UJkya0z-jLFzVxcwGfeRQSfhiwA+NyLg-x8iGew@mail.gmail.com Backpatch-through: 14, where Memoize was added
1 parent 21e3a8b commit e629846

File tree

3 files changed

+63
-6
lines changed

3 files changed

+63
-6
lines changed

src/backend/executor/nodeMemoize.c

+11-4
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
* Memoize nodes are intended to sit above parameterized nodes in the plan
1414
* tree in order to cache results from them. The intention here is that a
1515
* repeat scan with a parameter value that has already been seen by the node
16-
* can fetch tuples from the cache rather than having to re-scan the outer
16+
* can fetch tuples from the cache rather than having to re-scan the inner
1717
* node all over again. The query planner may choose to make use of one of
1818
* these when it thinks rescans for previously seen values are likely enough
1919
* to warrant adding the additional node.
@@ -207,7 +207,6 @@ MemoizeHash_hash(struct memoize_hash *tb, const MemoizeKey *key)
207207
}
208208
}
209209

210-
ResetExprContext(econtext);
211210
MemoryContextSwitchTo(oldcontext);
212211
return murmurhash32(hashkey);
213212
}
@@ -265,15 +264,14 @@ MemoizeHash_equal(struct memoize_hash *tb, const MemoizeKey *key1,
265264
}
266265
}
267266

268-
ResetExprContext(econtext);
269267
MemoryContextSwitchTo(oldcontext);
270268
return match;
271269
}
272270
else
273271
{
274272
econtext->ecxt_innertuple = tslot;
275273
econtext->ecxt_outertuple = pslot;
276-
return ExecQualAndReset(mstate->cache_eq_expr, econtext);
274+
return ExecQual(mstate->cache_eq_expr, econtext);
277275
}
278276
}
279277

@@ -699,9 +697,18 @@ static TupleTableSlot *
699697
ExecMemoize(PlanState *pstate)
700698
{
701699
MemoizeState *node = castNode(MemoizeState, pstate);
700+
ExprContext *econtext = node->ss.ps.ps_ExprContext;
702701
PlanState *outerNode;
703702
TupleTableSlot *slot;
704703

704+
CHECK_FOR_INTERRUPTS();
705+
706+
/*
707+
* Reset per-tuple memory context to free any expression evaluation
708+
* storage allocated in the previous tuple cycle.
709+
*/
710+
ResetExprContext(econtext);
711+
705712
switch (node->mstatus)
706713
{
707714
case MEMO_CACHE_LOOKUP:

src/test/regress/expected/memoize.out

+30-1
Original file line numberDiff line numberDiff line change
@@ -129,10 +129,39 @@ WHERE t1.unique1 < 10;
129129
20 | 0.50000000000000000000
130130
(1 row)
131131

132+
SET enable_mergejoin TO off;
133+
-- Test for varlena datatype with expr evaluation
134+
CREATE TABLE expr_key (x numeric, t text);
135+
INSERT INTO expr_key (x, t)
136+
SELECT d1::numeric, d1::text FROM (
137+
SELECT round((d / pi())::numeric, 7) AS d1 FROM generate_series(1, 20) AS d
138+
) t;
139+
-- duplicate rows so we get some cache hits
140+
INSERT INTO expr_key SELECT * FROM expr_key;
141+
CREATE INDEX expr_key_idx_x_t ON expr_key (x, t);
142+
VACUUM ANALYZE expr_key;
143+
-- Ensure we get we get a cache miss and hit for each of the 20 distinct values
144+
SELECT explain_memoize('
145+
SELECT * FROM expr_key t1 INNER JOIN expr_key t2
146+
ON t1.x = t2.t::numeric AND t1.t::numeric = t2.x;', false);
147+
explain_memoize
148+
-------------------------------------------------------------------------------------------
149+
Nested Loop (actual rows=80 loops=N)
150+
-> Seq Scan on expr_key t1 (actual rows=40 loops=N)
151+
-> Memoize (actual rows=2 loops=N)
152+
Cache Key: t1.x, (t1.t)::numeric
153+
Cache Mode: logical
154+
Hits: 20 Misses: 20 Evictions: Zero Overflows: 0 Memory Usage: NkB
155+
-> Index Only Scan using expr_key_idx_x_t on expr_key t2 (actual rows=2 loops=N)
156+
Index Cond: (x = (t1.t)::numeric)
157+
Filter: (t1.x = (t)::numeric)
158+
Heap Fetches: N
159+
(10 rows)
160+
161+
DROP TABLE expr_key;
132162
-- Reduce work_mem and hash_mem_multiplier so that we see some cache evictions
133163
SET work_mem TO '64kB';
134164
SET hash_mem_multiplier TO 1.0;
135-
SET enable_mergejoin TO off;
136165
-- Ensure we get some evictions. We're unable to validate the hits and misses
137166
-- here as the number of entries that fit in the cache at once will vary
138167
-- between different machines.

src/test/regress/sql/memoize.sql

+22-1
Original file line numberDiff line numberDiff line change
@@ -74,10 +74,31 @@ LATERAL (
7474
ON t1.two = t2.two
7575
WHERE t1.unique1 < 10;
7676

77+
SET enable_mergejoin TO off;
78+
79+
-- Test for varlena datatype with expr evaluation
80+
CREATE TABLE expr_key (x numeric, t text);
81+
INSERT INTO expr_key (x, t)
82+
SELECT d1::numeric, d1::text FROM (
83+
SELECT round((d / pi())::numeric, 7) AS d1 FROM generate_series(1, 20) AS d
84+
) t;
85+
86+
-- duplicate rows so we get some cache hits
87+
INSERT INTO expr_key SELECT * FROM expr_key;
88+
89+
CREATE INDEX expr_key_idx_x_t ON expr_key (x, t);
90+
VACUUM ANALYZE expr_key;
91+
92+
-- Ensure we get we get a cache miss and hit for each of the 20 distinct values
93+
SELECT explain_memoize('
94+
SELECT * FROM expr_key t1 INNER JOIN expr_key t2
95+
ON t1.x = t2.t::numeric AND t1.t::numeric = t2.x;', false);
96+
97+
DROP TABLE expr_key;
98+
7799
-- Reduce work_mem and hash_mem_multiplier so that we see some cache evictions
78100
SET work_mem TO '64kB';
79101
SET hash_mem_multiplier TO 1.0;
80-
SET enable_mergejoin TO off;
81102
-- Ensure we get some evictions. We're unable to validate the hits and misses
82103
-- here as the number of entries that fit in the cache at once will vary
83104
-- between different machines.

0 commit comments

Comments
 (0)