Skip to content

Commit a3d2b1b

Browse files
committed
Disable anonymous record hash support except in special cases
Commit 01e658f added hash support for row types. This also added support for hashing anonymous record types, using the same approach that the type cache uses for comparison support for record types: It just reports that it works, but it might fail at run time if a component type doesn't actually support the operation. We get away with that for comparison because most types support that. But some types don't support hashing, so the current state can result in failures at run time where the planner chooses hashing over sorting, whereas that previously worked if only sorting was an option. We do, however, want the record hashing support for path tracking in recursive unions, and the SEARCH and CYCLE clauses built on that. In that case, hashing is the only plan option. So enable that, this commit implements the following approach: The type cache does not report that hashing is available for the record type. This undoes that part of 01e658f. Instead, callers that require hashing no matter what can override that result themselves. This patch only touches the callers to make the aforementioned recursive query cases work, namely the parse analysis of unions, as well as the hash_array() function. Reported-by: Sait Talha Nisanci <sait.nisanci@microsoft.com> Bug: #17158 Discussion: https://www.postgresql.org/message-id/flat/17158-8a2ba823982537a4%40postgresql.org
1 parent 98dbef9 commit a3d2b1b

File tree

7 files changed

+117
-55
lines changed

7 files changed

+117
-55
lines changed

src/backend/parser/analyze.c

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1852,9 +1852,12 @@ transformSetOperationStmt(ParseState *pstate, SelectStmt *stmt)
18521852

18531853
/*
18541854
* Make a SortGroupClause node for a SetOperationStmt's groupClauses
1855+
*
1856+
* If require_hash is true, the caller is indicating that they need hash
1857+
* support or they will fail. So look extra hard for hash support.
18551858
*/
18561859
SortGroupClause *
1857-
makeSortGroupClauseForSetOp(Oid rescoltype)
1860+
makeSortGroupClauseForSetOp(Oid rescoltype, bool require_hash)
18581861
{
18591862
SortGroupClause *grpcl = makeNode(SortGroupClause);
18601863
Oid sortop;
@@ -1867,6 +1870,15 @@ makeSortGroupClauseForSetOp(Oid rescoltype)
18671870
&sortop, &eqop, NULL,
18681871
&hashable);
18691872

1873+
/*
1874+
* The type cache doesn't believe that record is hashable (see
1875+
* cache_record_field_properties()), but if the caller really needs hash
1876+
* support, we can assume it does. Worst case, if any components of the
1877+
* record don't support hashing, we will fail at execution.
1878+
*/
1879+
if (require_hash && (rescoltype == RECORDOID || rescoltype == RECORDARRAYOID))
1880+
hashable = true;
1881+
18701882
/* we don't have a tlist yet, so can't assign sortgrouprefs */
18711883
grpcl->tleSortGroupRef = 0;
18721884
grpcl->eqop = eqop;
@@ -2027,6 +2039,8 @@ transformSetOperationTree(ParseState *pstate, SelectStmt *stmt,
20272039
ListCell *ltl;
20282040
ListCell *rtl;
20292041
const char *context;
2042+
bool recursive = (pstate->p_parent_cte &&
2043+
pstate->p_parent_cte->cterecursive);
20302044

20312045
context = (stmt->op == SETOP_UNION ? "UNION" :
20322046
(stmt->op == SETOP_INTERSECT ? "INTERSECT" :
@@ -2048,9 +2062,7 @@ transformSetOperationTree(ParseState *pstate, SelectStmt *stmt,
20482062
* containing CTE as having those result columns. We should do this
20492063
* only at the topmost setop of the CTE, of course.
20502064
*/
2051-
if (isTopLevel &&
2052-
pstate->p_parent_cte &&
2053-
pstate->p_parent_cte->cterecursive)
2065+
if (isTopLevel && recursive)
20542066
determineRecursiveColTypes(pstate, op->larg, ltargetlist);
20552067

20562068
/*
@@ -2182,8 +2194,9 @@ transformSetOperationTree(ParseState *pstate, SelectStmt *stmt,
21822194
setup_parser_errposition_callback(&pcbstate, pstate,
21832195
bestlocation);
21842196

2197+
/* If it's a recursive union, we need to require hashing support. */
21852198
op->groupClauses = lappend(op->groupClauses,
2186-
makeSortGroupClauseForSetOp(rescoltype));
2199+
makeSortGroupClauseForSetOp(rescoltype, recursive));
21872200

21882201
cancel_parser_errposition_callback(&pcbstate);
21892202
}

src/backend/rewrite/rewriteSearchCycle.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -594,7 +594,7 @@ rewriteSearchAndCycle(CommonTableExpr *cte)
594594
sos->colCollations = lappend_oid(sos->colCollations, InvalidOid);
595595
if (!sos->all)
596596
sos->groupClauses = lappend(sos->groupClauses,
597-
makeSortGroupClauseForSetOp(search_seq_type));
597+
makeSortGroupClauseForSetOp(search_seq_type, true));
598598
}
599599
if (cte->cycle_clause)
600600
{
@@ -603,14 +603,14 @@ rewriteSearchAndCycle(CommonTableExpr *cte)
603603
sos->colCollations = lappend_oid(sos->colCollations, cte->cycle_clause->cycle_mark_collation);
604604
if (!sos->all)
605605
sos->groupClauses = lappend(sos->groupClauses,
606-
makeSortGroupClauseForSetOp(cte->cycle_clause->cycle_mark_type));
606+
makeSortGroupClauseForSetOp(cte->cycle_clause->cycle_mark_type, true));
607607

608608
sos->colTypes = lappend_oid(sos->colTypes, RECORDARRAYOID);
609609
sos->colTypmods = lappend_int(sos->colTypmods, -1);
610610
sos->colCollations = lappend_oid(sos->colCollations, InvalidOid);
611611
if (!sos->all)
612612
sos->groupClauses = lappend(sos->groupClauses,
613-
makeSortGroupClauseForSetOp(RECORDARRAYOID));
613+
makeSortGroupClauseForSetOp(RECORDARRAYOID, true));
614614
}
615615

616616
/*

src/backend/utils/adt/arrayfuncs.c

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#include "utils/arrayaccess.h"
3030
#include "utils/builtins.h"
3131
#include "utils/datum.h"
32+
#include "utils/fmgroids.h"
3233
#include "utils/lsyscache.h"
3334
#include "utils/memutils.h"
3435
#include "utils/selfuncs.h"
@@ -3973,13 +3974,46 @@ hash_array(PG_FUNCTION_ARGS)
39733974
{
39743975
typentry = lookup_type_cache(element_type,
39753976
TYPECACHE_HASH_PROC_FINFO);
3976-
if (!OidIsValid(typentry->hash_proc_finfo.fn_oid))
3977+
if (!OidIsValid(typentry->hash_proc_finfo.fn_oid) && element_type != RECORDOID)
39773978
ereport(ERROR,
39783979
(errcode(ERRCODE_UNDEFINED_FUNCTION),
39793980
errmsg("could not identify a hash function for type %s",
39803981
format_type_be(element_type))));
3982+
3983+
/*
3984+
* The type cache doesn't believe that record is hashable (see
3985+
* cache_record_field_properties()), but since we're here, we're
3986+
* committed to hashing, so we can assume it does. Worst case, if any
3987+
* components of the record don't support hashing, we will fail at
3988+
* execution.
3989+
*/
3990+
if (element_type == RECORDOID)
3991+
{
3992+
MemoryContext oldcontext;
3993+
TypeCacheEntry *record_typentry;
3994+
3995+
oldcontext = MemoryContextSwitchTo(CacheMemoryContext);
3996+
3997+
/*
3998+
* Make fake type cache entry structure. Note that we can't just
3999+
* modify typentry, since that points directly into the type cache.
4000+
*/
4001+
record_typentry = palloc(sizeof(*record_typentry));
4002+
4003+
/* fill in what we need below */
4004+
record_typentry->typlen = typentry->typlen;
4005+
record_typentry->typbyval = typentry->typbyval;
4006+
record_typentry->typalign = typentry->typalign;
4007+
fmgr_info(F_HASH_RECORD, &record_typentry->hash_proc_finfo);
4008+
4009+
MemoryContextSwitchTo(oldcontext);
4010+
4011+
typentry = record_typentry;
4012+
}
4013+
39814014
fcinfo->flinfo->fn_extra = (void *) typentry;
39824015
}
4016+
39834017
typlen = typentry->typlen;
39844018
typbyval = typentry->typbyval;
39854019
typalign = typentry->typalign;

src/backend/utils/cache/typcache.c

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1515,14 +1515,17 @@ cache_record_field_properties(TypeCacheEntry *typentry)
15151515
/*
15161516
* For type RECORD, we can't really tell what will work, since we don't
15171517
* have access here to the specific anonymous type. Just assume that
1518-
* everything will (we may get a failure at runtime ...)
1518+
* equality and comparison will (we may get a failure at runtime). We
1519+
* could also claim that hashing works, but then if code that has the
1520+
* option between a comparison-based (sort-based) and a hash-based plan
1521+
* chooses hashing, stuff could fail that would otherwise work if it chose
1522+
* a comparison-based plan. In practice more types support comparison
1523+
* than hashing.
15191524
*/
15201525
if (typentry->type_id == RECORDOID)
15211526
{
15221527
typentry->flags |= (TCFLAGS_HAVE_FIELD_EQUALITY |
1523-
TCFLAGS_HAVE_FIELD_COMPARE |
1524-
TCFLAGS_HAVE_FIELD_HASHING |
1525-
TCFLAGS_HAVE_FIELD_EXTENDED_HASHING);
1528+
TCFLAGS_HAVE_FIELD_COMPARE);
15261529
}
15271530
else if (typentry->typtype == TYPTYPE_COMPOSITE)
15281531
{

src/include/parser/analyze.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,6 @@ extern void applyLockingClause(Query *qry, Index rtindex,
4848
extern List *BuildOnConflictExcludedTargetlist(Relation targetrel,
4949
Index exclRelIndex);
5050

51-
extern SortGroupClause *makeSortGroupClauseForSetOp(Oid rescoltype);
51+
extern SortGroupClause *makeSortGroupClauseForSetOp(Oid rescoltype, bool require_hash);
5252

5353
#endif /* ANALYZE_H */

src/test/regress/expected/union.out

Lines changed: 51 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -648,34 +648,37 @@ reset enable_hashagg;
648648
set enable_hashagg to on;
649649
explain (costs off)
650650
select x from (values (row(1, 2)), (row(1, 3))) _(x) union select x from (values (row(1, 2)), (row(1, 4))) _(x);
651-
QUERY PLAN
652-
-----------------------------------------
653-
HashAggregate
654-
Group Key: "*VALUES*".column1
655-
-> Append
656-
-> Values Scan on "*VALUES*"
657-
-> Values Scan on "*VALUES*_1"
658-
(5 rows)
651+
QUERY PLAN
652+
-----------------------------------------------
653+
Unique
654+
-> Sort
655+
Sort Key: "*VALUES*".column1
656+
-> Append
657+
-> Values Scan on "*VALUES*"
658+
-> Values Scan on "*VALUES*_1"
659+
(6 rows)
659660

660661
select x from (values (row(1, 2)), (row(1, 3))) _(x) union select x from (values (row(1, 2)), (row(1, 4))) _(x);
661662
x
662663
-------
663-
(1,4)
664-
(1,3)
665664
(1,2)
665+
(1,3)
666+
(1,4)
666667
(3 rows)
667668

668669
explain (costs off)
669670
select x from (values (row(1, 2)), (row(1, 3))) _(x) intersect select x from (values (row(1, 2)), (row(1, 4))) _(x);
670-
QUERY PLAN
671-
-----------------------------------------------
672-
HashSetOp Intersect
673-
-> Append
674-
-> Subquery Scan on "*SELECT* 1"
675-
-> Values Scan on "*VALUES*"
676-
-> Subquery Scan on "*SELECT* 2"
677-
-> Values Scan on "*VALUES*_1"
678-
(6 rows)
671+
QUERY PLAN
672+
-----------------------------------------------------
673+
SetOp Intersect
674+
-> Sort
675+
Sort Key: "*SELECT* 1".x
676+
-> Append
677+
-> Subquery Scan on "*SELECT* 1"
678+
-> Values Scan on "*VALUES*"
679+
-> Subquery Scan on "*SELECT* 2"
680+
-> Values Scan on "*VALUES*_1"
681+
(8 rows)
679682

680683
select x from (values (row(1, 2)), (row(1, 3))) _(x) intersect select x from (values (row(1, 2)), (row(1, 4))) _(x);
681684
x
@@ -685,15 +688,17 @@ select x from (values (row(1, 2)), (row(1, 3))) _(x) intersect select x from (va
685688

686689
explain (costs off)
687690
select x from (values (row(1, 2)), (row(1, 3))) _(x) except select x from (values (row(1, 2)), (row(1, 4))) _(x);
688-
QUERY PLAN
689-
-----------------------------------------------
690-
HashSetOp Except
691-
-> Append
692-
-> Subquery Scan on "*SELECT* 1"
693-
-> Values Scan on "*VALUES*"
694-
-> Subquery Scan on "*SELECT* 2"
695-
-> Values Scan on "*VALUES*_1"
696-
(6 rows)
691+
QUERY PLAN
692+
-----------------------------------------------------
693+
SetOp Except
694+
-> Sort
695+
Sort Key: "*SELECT* 1".x
696+
-> Append
697+
-> Subquery Scan on "*SELECT* 1"
698+
-> Values Scan on "*VALUES*"
699+
-> Subquery Scan on "*SELECT* 2"
700+
-> Values Scan on "*VALUES*_1"
701+
(8 rows)
697702

698703
select x from (values (row(1, 2)), (row(1, 3))) _(x) except select x from (values (row(1, 2)), (row(1, 4))) _(x);
699704
x
@@ -702,21 +707,28 @@ select x from (values (row(1, 2)), (row(1, 3))) _(x) except select x from (value
702707
(1 row)
703708

704709
-- non-hashable type
705-
-- With an anonymous row type, the typcache reports that the type is
706-
-- hashable, but then it will fail at run time.
710+
-- With an anonymous row type, the typcache does not report that the
711+
-- type is hashable. (Otherwise, this would fail at execution time.)
707712
explain (costs off)
708713
select x from (values (row(100::money)), (row(200::money))) _(x) union select x from (values (row(100::money)), (row(300::money))) _(x);
709-
QUERY PLAN
710-
-----------------------------------------
711-
HashAggregate
712-
Group Key: "*VALUES*".column1
713-
-> Append
714-
-> Values Scan on "*VALUES*"
715-
-> Values Scan on "*VALUES*_1"
716-
(5 rows)
714+
QUERY PLAN
715+
-----------------------------------------------
716+
Unique
717+
-> Sort
718+
Sort Key: "*VALUES*".column1
719+
-> Append
720+
-> Values Scan on "*VALUES*"
721+
-> Values Scan on "*VALUES*_1"
722+
(6 rows)
717723

718724
select x from (values (row(100::money)), (row(200::money))) _(x) union select x from (values (row(100::money)), (row(300::money))) _(x);
719-
ERROR: could not identify a hash function for type money
725+
x
726+
-----------
727+
($100.00)
728+
($200.00)
729+
($300.00)
730+
(3 rows)
731+
720732
-- With a defined row type, the typcache can inspect the type's fields
721733
-- for hashability.
722734
create type ct1 as (f1 money);

src/test/regress/sql/union.sql

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -218,8 +218,8 @@ select x from (values (row(1, 2)), (row(1, 3))) _(x) except select x from (value
218218

219219
-- non-hashable type
220220

221-
-- With an anonymous row type, the typcache reports that the type is
222-
-- hashable, but then it will fail at run time.
221+
-- With an anonymous row type, the typcache does not report that the
222+
-- type is hashable. (Otherwise, this would fail at execution time.)
223223
explain (costs off)
224224
select x from (values (row(100::money)), (row(200::money))) _(x) union select x from (values (row(100::money)), (row(300::money))) _(x);
225225
select x from (values (row(100::money)), (row(200::money))) _(x) union select x from (values (row(100::money)), (row(300::money))) _(x);

0 commit comments

Comments
 (0)