Skip to content

Commit 6c32c09

Browse files
committed
Allow Memoize to operate in binary comparison mode
Memoize would always use the hash equality operator for the cache key types to determine if the current set of parameters were the same as some previously cached set. Certain types such as floating points where -0.0 and +0.0 differ in their binary representation but are classed as equal by the hash equality operator may cause problems as unless the join uses the same operator it's possible that whichever join operator is being used would be able to distinguish the two values. In which case we may accidentally return in the incorrect rows out of the cache. To fix this here we add a binary mode to Memoize to allow it to the current set of parameters to previously cached values by comparing bit-by-bit rather than logically using the hash equality operator. This binary mode is always used for LATERAL joins and it's used for normal joins when any of the join operators are not hashable. Reported-by: Tom Lane Author: David Rowley Discussion: https://postgr.es/m/3004308.1632952496@sss.pgh.pa.us Backpatch-through: 14, where Memoize was added
1 parent 0fdf674 commit 6c32c09

File tree

19 files changed

+347
-41
lines changed

19 files changed

+347
-41
lines changed

contrib/postgres_fdw/expected/postgres_fdw.out

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2151,6 +2151,7 @@ SELECT t1."C 1" FROM "S 1"."T 1" t1, LATERAL (SELECT DISTINCT t2.c1, t3.c1 FROM
21512151
Output: t1."C 1", t1.c2, t1.c3, t1.c4, t1.c5, t1.c6, t1.c7, t1.c8
21522152
-> Memoize
21532153
Cache Key: t1.c2
2154+
Cache Mode: binary
21542155
-> Subquery Scan on q
21552156
-> HashAggregate
21562157
Output: t2.c1, t3.c1
@@ -2159,7 +2160,7 @@ SELECT t1."C 1" FROM "S 1"."T 1" t1, LATERAL (SELECT DISTINCT t2.c1, t3.c1 FROM
21592160
Output: t2.c1, t3.c1
21602161
Relations: (public.ft1 t2) INNER JOIN (public.ft2 t3)
21612162
Remote SQL: SELECT r1."C 1", r2."C 1" FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")) AND ((r1.c2 = $1::integer))))
2162-
(16 rows)
2163+
(17 rows)
21632164

21642165
SELECT t1."C 1" FROM "S 1"."T 1" t1, LATERAL (SELECT DISTINCT t2.c1, t3.c1 FROM ft1 t2, ft2 t3 WHERE t2.c1 = t3.c1 AND t2.c2 = t1.c2) q ORDER BY t1."C 1" OFFSET 10 LIMIT 10;
21652166
C 1

src/backend/commands/explain.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3127,11 +3127,14 @@ show_memoize_info(MemoizeState *mstate, List *ancestors, ExplainState *es)
31273127
if (es->format != EXPLAIN_FORMAT_TEXT)
31283128
{
31293129
ExplainPropertyText("Cache Key", keystr.data, es);
3130+
ExplainPropertyText("Cache Mode", mstate->binary_mode ? "binary" : "logical", es);
31303131
}
31313132
else
31323133
{
31333134
ExplainIndentText(es);
31343135
appendStringInfo(es->str, "Cache Key: %s\n", keystr.data);
3136+
ExplainIndentText(es);
3137+
appendStringInfo(es->str, "Cache Mode: %s\n", mstate->binary_mode ? "binary" : "logical");
31353138
}
31363139

31373140
pfree(keystr.data);

src/backend/executor/nodeMemoize.c

Lines changed: 78 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@
7171
#include "executor/nodeMemoize.h"
7272
#include "lib/ilist.h"
7373
#include "miscadmin.h"
74+
#include "utils/datum.h"
7475
#include "utils/lsyscache.h"
7576

7677
/* States of the ExecMemoize state machine */
@@ -131,16 +132,16 @@ typedef struct MemoizeEntry
131132

132133
static uint32 MemoizeHash_hash(struct memoize_hash *tb,
133134
const MemoizeKey *key);
134-
static int MemoizeHash_equal(struct memoize_hash *tb,
135-
const MemoizeKey *params1,
136-
const MemoizeKey *params2);
135+
static bool MemoizeHash_equal(struct memoize_hash *tb,
136+
const MemoizeKey *params1,
137+
const MemoizeKey *params2);
137138

138139
#define SH_PREFIX memoize
139140
#define SH_ELEMENT_TYPE MemoizeEntry
140141
#define SH_KEY_TYPE MemoizeKey *
141142
#define SH_KEY key
142143
#define SH_HASH_KEY(tb, key) MemoizeHash_hash(tb, key)
143-
#define SH_EQUAL(tb, a, b) (MemoizeHash_equal(tb, a, b) == 0)
144+
#define SH_EQUAL(tb, a, b) MemoizeHash_equal(tb, a, b)
144145
#define SH_SCOPE static inline
145146
#define SH_STORE_HASH
146147
#define SH_GET_HASH(tb, a) a->hash
@@ -160,21 +161,45 @@ MemoizeHash_hash(struct memoize_hash *tb, const MemoizeKey *key)
160161
TupleTableSlot *pslot = mstate->probeslot;
161162
uint32 hashkey = 0;
162163
int numkeys = mstate->nkeys;
163-
FmgrInfo *hashfunctions = mstate->hashfunctions;
164-
Oid *collations = mstate->collations;
165164

166-
for (int i = 0; i < numkeys; i++)
165+
if (mstate->binary_mode)
167166
{
168-
/* rotate hashkey left 1 bit at each step */
169-
hashkey = (hashkey << 1) | ((hashkey & 0x80000000) ? 1 : 0);
167+
for (int i = 0; i < numkeys; i++)
168+
{
169+
/* rotate hashkey left 1 bit at each step */
170+
hashkey = (hashkey << 1) | ((hashkey & 0x80000000) ? 1 : 0);
171+
172+
if (!pslot->tts_isnull[i]) /* treat nulls as having hash key 0 */
173+
{
174+
FormData_pg_attribute *attr;
175+
uint32 hkey;
176+
177+
attr = &pslot->tts_tupleDescriptor->attrs[i];
178+
179+
hkey = datum_image_hash(pslot->tts_values[i], attr->attbyval, attr->attlen);
180+
181+
hashkey ^= hkey;
182+
}
183+
}
184+
}
185+
else
186+
{
187+
FmgrInfo *hashfunctions = mstate->hashfunctions;
188+
Oid *collations = mstate->collations;
170189

171-
if (!pslot->tts_isnull[i]) /* treat nulls as having hash key 0 */
190+
for (int i = 0; i < numkeys; i++)
172191
{
173-
uint32 hkey;
192+
/* rotate hashkey left 1 bit at each step */
193+
hashkey = (hashkey << 1) | ((hashkey & 0x80000000) ? 1 : 0);
194+
195+
if (!pslot->tts_isnull[i]) /* treat nulls as having hash key 0 */
196+
{
197+
uint32 hkey;
174198

175-
hkey = DatumGetUInt32(FunctionCall1Coll(&hashfunctions[i],
176-
collations[i], pslot->tts_values[i]));
177-
hashkey ^= hkey;
199+
hkey = DatumGetUInt32(FunctionCall1Coll(&hashfunctions[i],
200+
collations[i], pslot->tts_values[i]));
201+
hashkey ^= hkey;
202+
}
178203
}
179204
}
180205

@@ -187,7 +212,7 @@ MemoizeHash_hash(struct memoize_hash *tb, const MemoizeKey *key)
187212
* table lookup. 'key2' is never used. Instead the MemoizeState's
188213
* probeslot is always populated with details of what's being looked up.
189214
*/
190-
static int
215+
static bool
191216
MemoizeHash_equal(struct memoize_hash *tb, const MemoizeKey *key1,
192217
const MemoizeKey *key2)
193218
{
@@ -199,9 +224,38 @@ MemoizeHash_equal(struct memoize_hash *tb, const MemoizeKey *key1,
199224
/* probeslot should have already been prepared by prepare_probe_slot() */
200225
ExecStoreMinimalTuple(key1->params, tslot, false);
201226

202-
econtext->ecxt_innertuple = tslot;
203-
econtext->ecxt_outertuple = pslot;
204-
return !ExecQualAndReset(mstate->cache_eq_expr, econtext);
227+
if (mstate->binary_mode)
228+
{
229+
int numkeys = mstate->nkeys;
230+
231+
slot_getallattrs(tslot);
232+
slot_getallattrs(pslot);
233+
234+
for (int i = 0; i < numkeys; i++)
235+
{
236+
FormData_pg_attribute *attr;
237+
238+
if (tslot->tts_isnull[i] != pslot->tts_isnull[i])
239+
return false;
240+
241+
/* both NULL? they're equal */
242+
if (tslot->tts_isnull[i])
243+
continue;
244+
245+
/* perform binary comparison on the two datums */
246+
attr = &tslot->tts_tupleDescriptor->attrs[i];
247+
if (!datum_image_eq(tslot->tts_values[i], pslot->tts_values[i],
248+
attr->attbyval, attr->attlen))
249+
return false;
250+
}
251+
return true;
252+
}
253+
else
254+
{
255+
econtext->ecxt_innertuple = tslot;
256+
econtext->ecxt_outertuple = pslot;
257+
return ExecQualAndReset(mstate->cache_eq_expr, econtext);
258+
}
205259
}
206260

207261
/*
@@ -926,6 +980,12 @@ ExecInitMemoize(Memoize *node, EState *estate, int eflags)
926980
*/
927981
mstate->singlerow = node->singlerow;
928982

983+
/*
984+
* Record if the cache keys should be compared bit by bit, or logically
985+
* using the type's hash equality operator
986+
*/
987+
mstate->binary_mode = node->binary_mode;
988+
929989
/* Zero the statistics counters */
930990
memset(&mstate->stats, 0, sizeof(MemoizeInstrumentation));
931991

src/backend/nodes/copyfuncs.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -970,6 +970,7 @@ _copyMemoize(const Memoize *from)
970970
COPY_POINTER_FIELD(collations, sizeof(Oid) * from->numKeys);
971971
COPY_NODE_FIELD(param_exprs);
972972
COPY_SCALAR_FIELD(singlerow);
973+
COPY_SCALAR_FIELD(binary_mode);
973974
COPY_SCALAR_FIELD(est_entries);
974975

975976
return newnode;

src/backend/nodes/outfuncs.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -859,6 +859,7 @@ _outMemoize(StringInfo str, const Memoize *node)
859859
WRITE_OID_ARRAY(collations, node->numKeys);
860860
WRITE_NODE_FIELD(param_exprs);
861861
WRITE_BOOL_FIELD(singlerow);
862+
WRITE_BOOL_FIELD(binary_mode);
862863
WRITE_UINT_FIELD(est_entries);
863864
}
864865

@@ -1958,6 +1959,7 @@ _outMemoizePath(StringInfo str, const MemoizePath *node)
19581959
WRITE_NODE_FIELD(hash_operators);
19591960
WRITE_NODE_FIELD(param_exprs);
19601961
WRITE_BOOL_FIELD(singlerow);
1962+
WRITE_BOOL_FIELD(binary_mode);
19611963
WRITE_FLOAT_FIELD(calls, "%.0f");
19621964
WRITE_UINT_FIELD(est_entries);
19631965
}

src/backend/nodes/readfuncs.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2229,6 +2229,7 @@ _readMemoize(void)
22292229
READ_OID_ARRAY(collations, local_node->numKeys);
22302230
READ_NODE_FIELD(param_exprs);
22312231
READ_BOOL_FIELD(singlerow);
2232+
READ_BOOL_FIELD(binary_mode);
22322233
READ_UINT_FIELD(est_entries);
22332234

22342235
READ_DONE();

src/backend/optimizer/path/joinpath.c

Lines changed: 34 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -371,19 +371,21 @@ allow_star_schema_join(PlannerInfo *root,
371371
* Returns true the hashing is possible, otherwise return false.
372372
*
373373
* Additionally we also collect the outer exprs and the hash operators for
374-
* each parameter to innerrel. These set in 'param_exprs' and 'operators'
375-
* when we return true.
374+
* each parameter to innerrel. These set in 'param_exprs', 'operators' and
375+
* 'binary_mode' when we return true.
376376
*/
377377
static bool
378378
paraminfo_get_equal_hashops(PlannerInfo *root, ParamPathInfo *param_info,
379379
RelOptInfo *outerrel, RelOptInfo *innerrel,
380-
List **param_exprs, List **operators)
380+
List **param_exprs, List **operators,
381+
bool *binary_mode)
381382

382383
{
383384
ListCell *lc;
384385

385386
*param_exprs = NIL;
386387
*operators = NIL;
388+
*binary_mode = false;
387389

388390
if (param_info != NULL)
389391
{
@@ -416,6 +418,20 @@ paraminfo_get_equal_hashops(PlannerInfo *root, ParamPathInfo *param_info,
416418

417419
*operators = lappend_oid(*operators, rinfo->hasheqoperator);
418420
*param_exprs = lappend(*param_exprs, expr);
421+
422+
/*
423+
* When the join operator is not hashable then it's possible that
424+
* the operator will be able to distinguish something that the
425+
* hash equality operator could not. For example with floating
426+
* point types -0.0 and +0.0 are classed as equal by the hash
427+
* function and equality function, but some other operator may be
428+
* able to tell those values apart. This means that we must put
429+
* memoize into binary comparison mode so that it does bit-by-bit
430+
* comparisons rather than a "logical" comparison as it would
431+
* using the hash equality operator.
432+
*/
433+
if (!OidIsValid(rinfo->hashjoinoperator))
434+
*binary_mode = true;
419435
}
420436
}
421437

@@ -446,6 +462,17 @@ paraminfo_get_equal_hashops(PlannerInfo *root, ParamPathInfo *param_info,
446462

447463
*operators = lappend_oid(*operators, typentry->eq_opr);
448464
*param_exprs = lappend(*param_exprs, expr);
465+
466+
/*
467+
* We must go into binary mode as we don't have too much of an idea of
468+
* how these lateral Vars are being used. See comment above when we
469+
* set *binary_mode for the non-lateral Var case. This could be
470+
* relaxed a bit if we had the RestrictInfos and knew the operators
471+
* being used, however for cases like Vars that are arguments to
472+
* functions we must operate in binary mode as we don't have
473+
* visibility into what the function is doing with the Vars.
474+
*/
475+
*binary_mode = true;
449476
}
450477

451478
/* We're okay to use memoize */
@@ -466,6 +493,7 @@ get_memoize_path(PlannerInfo *root, RelOptInfo *innerrel,
466493
List *param_exprs;
467494
List *hash_operators;
468495
ListCell *lc;
496+
bool binary_mode;
469497

470498
/* Obviously not if it's disabled */
471499
if (!enable_memoize)
@@ -557,14 +585,16 @@ get_memoize_path(PlannerInfo *root, RelOptInfo *innerrel,
557585
outerrel,
558586
innerrel,
559587
&param_exprs,
560-
&hash_operators))
588+
&hash_operators,
589+
&binary_mode))
561590
{
562591
return (Path *) create_memoize_path(root,
563592
innerrel,
564593
inner_path,
565594
param_exprs,
566595
hash_operators,
567596
extra->inner_unique,
597+
binary_mode,
568598
outer_path->parent->rows);
569599
}
570600

src/backend/optimizer/plan/createplan.c

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -279,7 +279,8 @@ static Sort *make_sort_from_groupcols(List *groupcls,
279279
static Material *make_material(Plan *lefttree);
280280
static Memoize *make_memoize(Plan *lefttree, Oid *hashoperators,
281281
Oid *collations, List *param_exprs,
282-
bool singlerow, uint32 est_entries);
282+
bool singlerow, bool binary_mode,
283+
uint32 est_entries);
283284
static WindowAgg *make_windowagg(List *tlist, Index winref,
284285
int partNumCols, AttrNumber *partColIdx, Oid *partOperators, Oid *partCollations,
285286
int ordNumCols, AttrNumber *ordColIdx, Oid *ordOperators, Oid *ordCollations,
@@ -1617,7 +1618,8 @@ create_memoize_plan(PlannerInfo *root, MemoizePath *best_path, int flags)
16171618
}
16181619

16191620
plan = make_memoize(subplan, operators, collations, param_exprs,
1620-
best_path->singlerow, best_path->est_entries);
1621+
best_path->singlerow, best_path->binary_mode,
1622+
best_path->est_entries);
16211623

16221624
copy_generic_path_info(&plan->plan, (Path *) best_path);
16231625

@@ -6416,7 +6418,8 @@ materialize_finished_plan(Plan *subplan)
64166418

64176419
static Memoize *
64186420
make_memoize(Plan *lefttree, Oid *hashoperators, Oid *collations,
6419-
List *param_exprs, bool singlerow, uint32 est_entries)
6421+
List *param_exprs, bool singlerow, bool binary_mode,
6422+
uint32 est_entries)
64206423
{
64216424
Memoize *node = makeNode(Memoize);
64226425
Plan *plan = &node->plan;
@@ -6431,6 +6434,7 @@ make_memoize(Plan *lefttree, Oid *hashoperators, Oid *collations,
64316434
node->collations = collations;
64326435
node->param_exprs = param_exprs;
64336436
node->singlerow = singlerow;
6437+
node->binary_mode = binary_mode;
64346438
node->est_entries = est_entries;
64356439

64366440
return node;

src/backend/optimizer/util/pathnode.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1583,7 +1583,7 @@ create_material_path(RelOptInfo *rel, Path *subpath)
15831583
MemoizePath *
15841584
create_memoize_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
15851585
List *param_exprs, List *hash_operators,
1586-
bool singlerow, double calls)
1586+
bool singlerow, bool binary_mode, double calls)
15871587
{
15881588
MemoizePath *pathnode = makeNode(MemoizePath);
15891589

@@ -1603,6 +1603,7 @@ create_memoize_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
16031603
pathnode->hash_operators = hash_operators;
16041604
pathnode->param_exprs = param_exprs;
16051605
pathnode->singlerow = singlerow;
1606+
pathnode->binary_mode = binary_mode;
16061607
pathnode->calls = calls;
16071608

16081609
/*
@@ -3942,6 +3943,7 @@ reparameterize_path(PlannerInfo *root, Path *path,
39423943
mpath->param_exprs,
39433944
mpath->hash_operators,
39443945
mpath->singlerow,
3946+
mpath->binary_mode,
39453947
mpath->calls);
39463948
}
39473949
default:

0 commit comments

Comments
 (0)