Skip to content

Commit adf97c1

Browse files
committed
Speed up Hash Join by making ExprStates support hashing
Here we add ExprState support for obtaining a 32-bit hash value from a list of expressions. This allows both faster hashing and also JIT compilation of these expressions. This is especially useful when hash joins have multiple join keys as the previous code called ExecEvalExpr on each hash join key individually and that was inefficient as tuple deformation would have only taken into account one key at a time, which could lead to walking the tuple once for each join key. With the new code, we'll determine the maximum attribute required and deform the tuple to that point only once. Some performance tests done with this change have shown up to a 20% performance increase of a query containing a Hash Join without JIT compilation and up to a 26% performance increase when JIT is enabled and optimization and inlining were performed by the JIT compiler. The performance increase with 1 join column was less with a 14% increase with and without JIT. This test was done using a fairly small hash table and a large number of hash probes. The increase will likely be less with large tables, especially ones larger than L3 cache as memory pressure is more likely to be the limiting factor there. This commit only addresses Hash Joins, but lays expression evaluation and JIT compilation infrastructure for other hashing needs such as Hash Aggregate. Author: David Rowley Reviewed-by: Alexey Dvoichenkov <alexey@hyperplane.net> Reviewed-by: Tels <nospam-pg-abuse@bloodgate.com> Discussion: https://postgr.es/m/CAApHDvoexAxgQFNQD_GRkr2O_eJUD1-wUGm%3Dm0L%2BGc%3DT%3DkEa4g%40mail.gmail.com
1 parent 9380e5f commit adf97c1

File tree

10 files changed

+650
-202
lines changed

10 files changed

+650
-202
lines changed

src/backend/executor/execExpr.c

+141
Original file line numberDiff line numberDiff line change
@@ -3969,6 +3969,147 @@ ExecBuildAggTransCall(ExprState *state, AggState *aggstate,
39693969
}
39703970
}
39713971

3972+
/*
3973+
* Build an ExprState that calls the given hash function(s) on the given
3974+
* 'hash_exprs'. When multiple expressions are present, the hash values
3975+
* returned by each hash function are combined to produce a single hash value.
3976+
*
3977+
* desc: tuple descriptor for the to-be-hashed expressions
3978+
* ops: TupleTableSlotOps for the TupleDesc
3979+
* hashfunc_oids: Oid for each hash function to call, one for each 'hash_expr'
3980+
* collations: collation to use when calling the hash function.
3981+
* hash_expr: list of expressions to hash the value of
3982+
* opstrict: array corresponding to the 'hashfunc_oids' to store op_strict()
3983+
* parent: PlanState node that the 'hash_exprs' will be evaluated at
3984+
* init_value: Normally 0, but can be set to other values to seed the hash
3985+
* with some other value. Using non-zero is slightly less efficient but can
3986+
* be useful.
3987+
* keep_nulls: if true, evaluation of the returned ExprState will abort early
3988+
* returning NULL if the given hash function is strict and the Datum to hash
3989+
* is null. When set to false, any NULL input Datums are skipped.
3990+
*/
3991+
ExprState *
3992+
ExecBuildHash32Expr(TupleDesc desc, const TupleTableSlotOps *ops,
3993+
const Oid *hashfunc_oids, const List *collations,
3994+
const List *hash_exprs, const bool *opstrict,
3995+
PlanState *parent, uint32 init_value, bool keep_nulls)
3996+
{
3997+
ExprState *state = makeNode(ExprState);
3998+
ExprEvalStep scratch = {0};
3999+
List *adjust_jumps = NIL;
4000+
ListCell *lc;
4001+
ListCell *lc2;
4002+
intptr_t strict_opcode;
4003+
intptr_t opcode;
4004+
4005+
Assert(list_length(hash_exprs) == list_length(collations));
4006+
4007+
state->parent = parent;
4008+
4009+
/* Insert setup steps as needed. */
4010+
ExecCreateExprSetupSteps(state, (Node *) hash_exprs);
4011+
4012+
if (init_value == 0)
4013+
{
4014+
/*
4015+
* No initial value, so we can assign the result of the hash function
4016+
* for the first hash_expr without having to concern ourselves with
4017+
* combining the result with any initial value.
4018+
*/
4019+
strict_opcode = EEOP_HASHDATUM_FIRST_STRICT;
4020+
opcode = EEOP_HASHDATUM_FIRST;
4021+
}
4022+
else
4023+
{
4024+
/* Set up operation to set the initial value. */
4025+
scratch.opcode = EEOP_HASHDATUM_SET_INITVAL;
4026+
scratch.d.hashdatum_initvalue.init_value = UInt32GetDatum(init_value);
4027+
scratch.resvalue = &state->resvalue;
4028+
scratch.resnull = &state->resnull;
4029+
4030+
ExprEvalPushStep(state, &scratch);
4031+
4032+
/*
4033+
* When using an initial value use the NEXT32/NEXT32_STRICT ops as the
4034+
* FIRST/FIRST_STRICT ops would overwrite the stored initial value.
4035+
*/
4036+
strict_opcode = EEOP_HASHDATUM_NEXT32_STRICT;
4037+
opcode = EEOP_HASHDATUM_NEXT32;
4038+
}
4039+
4040+
forboth(lc, hash_exprs, lc2, collations)
4041+
{
4042+
Expr *expr = (Expr *) lfirst(lc);
4043+
FmgrInfo *finfo;
4044+
FunctionCallInfo fcinfo;
4045+
int i = foreach_current_index(lc);
4046+
Oid funcid;
4047+
Oid inputcollid = lfirst_oid(lc2);
4048+
4049+
funcid = hashfunc_oids[i];
4050+
4051+
/* Allocate hash function lookup data. */
4052+
finfo = palloc0(sizeof(FmgrInfo));
4053+
fcinfo = palloc0(SizeForFunctionCallInfo(1));
4054+
4055+
fmgr_info(funcid, finfo);
4056+
4057+
/*
4058+
* Build the steps to evaluate the hash function's argument have it so
4059+
* the value of that is stored in the 0th argument of the hash func.
4060+
*/
4061+
ExecInitExprRec(expr,
4062+
state,
4063+
&fcinfo->args[0].value,
4064+
&fcinfo->args[0].isnull);
4065+
4066+
scratch.resvalue = &state->resvalue;
4067+
scratch.resnull = &state->resnull;
4068+
4069+
/* Initialize function call parameter structure too */
4070+
InitFunctionCallInfoData(*fcinfo, finfo, 1, inputcollid, NULL, NULL);
4071+
4072+
scratch.d.hashdatum.finfo = finfo;
4073+
scratch.d.hashdatum.fcinfo_data = fcinfo;
4074+
scratch.d.hashdatum.fn_addr = finfo->fn_addr;
4075+
4076+
scratch.opcode = opstrict[i] && !keep_nulls ? strict_opcode : opcode;
4077+
scratch.d.hashdatum.jumpdone = -1;
4078+
4079+
ExprEvalPushStep(state, &scratch);
4080+
adjust_jumps = lappend_int(adjust_jumps, state->steps_len - 1);
4081+
4082+
/*
4083+
* For subsequent keys we must combine the hash value with the
4084+
* previous hashes.
4085+
*/
4086+
strict_opcode = EEOP_HASHDATUM_NEXT32_STRICT;
4087+
opcode = EEOP_HASHDATUM_NEXT32;
4088+
}
4089+
4090+
/* adjust jump targets */
4091+
foreach(lc, adjust_jumps)
4092+
{
4093+
ExprEvalStep *as = &state->steps[lfirst_int(lc)];
4094+
4095+
Assert(as->opcode == EEOP_HASHDATUM_FIRST ||
4096+
as->opcode == EEOP_HASHDATUM_FIRST_STRICT ||
4097+
as->opcode == EEOP_HASHDATUM_NEXT32 ||
4098+
as->opcode == EEOP_HASHDATUM_NEXT32_STRICT);
4099+
Assert(as->d.hashdatum.jumpdone == -1);
4100+
as->d.hashdatum.jumpdone = state->steps_len;
4101+
}
4102+
4103+
scratch.resvalue = NULL;
4104+
scratch.resnull = NULL;
4105+
scratch.opcode = EEOP_DONE;
4106+
ExprEvalPushStep(state, &scratch);
4107+
4108+
ExecReadyExpr(state);
4109+
4110+
return state;
4111+
}
4112+
39724113
/*
39734114
* Build equality expression that can be evaluated using ExecQual(), returning
39744115
* true if the expression context's inner/outer tuple are NOT DISTINCT. I.e

src/backend/executor/execExprInterp.c

+110
Original file line numberDiff line numberDiff line change
@@ -477,6 +477,11 @@ ExecInterpExpr(ExprState *state, ExprContext *econtext, bool *isnull)
477477
&&CASE_EEOP_DOMAIN_TESTVAL,
478478
&&CASE_EEOP_DOMAIN_NOTNULL,
479479
&&CASE_EEOP_DOMAIN_CHECK,
480+
&&CASE_EEOP_HASHDATUM_SET_INITVAL,
481+
&&CASE_EEOP_HASHDATUM_FIRST,
482+
&&CASE_EEOP_HASHDATUM_FIRST_STRICT,
483+
&&CASE_EEOP_HASHDATUM_NEXT32,
484+
&&CASE_EEOP_HASHDATUM_NEXT32_STRICT,
480485
&&CASE_EEOP_CONVERT_ROWTYPE,
481486
&&CASE_EEOP_SCALARARRAYOP,
482487
&&CASE_EEOP_HASHED_SCALARARRAYOP,
@@ -1543,6 +1548,111 @@ ExecInterpExpr(ExprState *state, ExprContext *econtext, bool *isnull)
15431548
EEO_NEXT();
15441549
}
15451550

1551+
EEO_CASE(EEOP_HASHDATUM_SET_INITVAL)
1552+
{
1553+
*op->resvalue = op->d.hashdatum_initvalue.init_value;
1554+
*op->resnull = false;
1555+
1556+
EEO_NEXT();
1557+
}
1558+
1559+
EEO_CASE(EEOP_HASHDATUM_FIRST)
1560+
{
1561+
FunctionCallInfo fcinfo = op->d.hashdatum.fcinfo_data;
1562+
1563+
/*
1564+
* Save the Datum on non-null inputs, otherwise store 0 so that
1565+
* subsequent NEXT32 operations combine with an initialized value.
1566+
*/
1567+
if (!fcinfo->args[0].isnull)
1568+
*op->resvalue = op->d.hashdatum.fn_addr(fcinfo);
1569+
else
1570+
*op->resvalue = (Datum) 0;
1571+
1572+
*op->resnull = false;
1573+
1574+
EEO_NEXT();
1575+
}
1576+
1577+
EEO_CASE(EEOP_HASHDATUM_FIRST_STRICT)
1578+
{
1579+
FunctionCallInfo fcinfo = op->d.hashdatum.fcinfo_data;
1580+
1581+
if (fcinfo->args[0].isnull)
1582+
{
1583+
/*
1584+
* With strict we have the expression return NULL instead of
1585+
* ignoring NULL input values. We've nothing more to do after
1586+
* finding a NULL.
1587+
*/
1588+
*op->resnull = true;
1589+
*op->resvalue = (Datum) 0;
1590+
EEO_JUMP(op->d.hashdatum.jumpdone);
1591+
}
1592+
1593+
/* execute the hash function and save the resulting value */
1594+
*op->resvalue = op->d.hashdatum.fn_addr(fcinfo);
1595+
*op->resnull = false;
1596+
1597+
EEO_NEXT();
1598+
}
1599+
1600+
EEO_CASE(EEOP_HASHDATUM_NEXT32)
1601+
{
1602+
FunctionCallInfo fcinfo = op->d.hashdatum.fcinfo_data;
1603+
uint32 existing_hash = DatumGetUInt32(*op->resvalue);
1604+
1605+
/* combine successive hash values by rotating */
1606+
existing_hash = pg_rotate_left32(existing_hash, 1);
1607+
1608+
/* leave the hash value alone on NULL inputs */
1609+
if (!fcinfo->args[0].isnull)
1610+
{
1611+
uint32 hashvalue;
1612+
1613+
/* execute hash func and combine with previous hash value */
1614+
hashvalue = DatumGetUInt32(op->d.hashdatum.fn_addr(fcinfo));
1615+
existing_hash = existing_hash ^ hashvalue;
1616+
}
1617+
1618+
*op->resvalue = UInt32GetDatum(existing_hash);
1619+
*op->resnull = false;
1620+
1621+
EEO_NEXT();
1622+
}
1623+
1624+
EEO_CASE(EEOP_HASHDATUM_NEXT32_STRICT)
1625+
{
1626+
FunctionCallInfo fcinfo = op->d.hashdatum.fcinfo_data;
1627+
1628+
if (fcinfo->args[0].isnull)
1629+
{
1630+
/*
1631+
* With strict we have the expression return NULL instead of
1632+
* ignoring NULL input values. We've nothing more to do after
1633+
* finding a NULL.
1634+
*/
1635+
*op->resnull = true;
1636+
*op->resvalue = (Datum) 0;
1637+
EEO_JUMP(op->d.hashdatum.jumpdone);
1638+
}
1639+
else
1640+
{
1641+
uint32 existing_hash = DatumGetUInt32(*op->resvalue);
1642+
uint32 hashvalue;
1643+
1644+
/* combine successive hash values by rotating */
1645+
existing_hash = pg_rotate_left32(existing_hash, 1);
1646+
1647+
/* execute hash func and combine with previous hash value */
1648+
hashvalue = DatumGetUInt32(op->d.hashdatum.fn_addr(fcinfo));
1649+
*op->resvalue = UInt32GetDatum(existing_hash ^ hashvalue);
1650+
*op->resnull = false;
1651+
}
1652+
1653+
EEO_NEXT();
1654+
}
1655+
15461656
EEO_CASE(EEOP_XMLEXPR)
15471657
{
15481658
/* too complex for an inline implementation */

0 commit comments

Comments
 (0)