Skip to content

Commit 247dea8

Browse files
author
Richard Guo
committed
Introduce an RTE for the grouping step
If there are subqueries in the grouping expressions, each of these subqueries in the targetlist and HAVING clause is expanded into distinct SubPlan nodes. As a result, only one of these SubPlan nodes would be converted to reference to the grouping key column output by the Agg node; others would have to get evaluated afresh. This is not efficient, and with grouping sets this can cause wrong results issues in cases where they should go to NULL because they are from the wrong grouping set. Furthermore, during re-evaluation, these SubPlan nodes might use nulled column values from grouping sets, which is not correct. This issue is not limited to subqueries. For other types of expressions that are part of grouping items, if they are transformed into another form during preprocessing, they may fail to match lower target items. This can also lead to wrong results with grouping sets. To fix this issue, we introduce a new kind of RTE representing the output of the grouping step, with columns that are the Vars or expressions being grouped on. In the parser, we replace the grouping expressions in the targetlist and HAVING clause with Vars referencing this new RTE, so that the output of the parser directly expresses the semantic requirement that the grouping expressions be gotten from the grouping output rather than computed some other way. In the planner, we first preprocess all the columns of this new RTE and then replace any Vars in the targetlist and HAVING clause that reference this new RTE with the underlying grouping expressions, so that we will have only one instance of a SubPlan node for each subquery contained in the grouping expressions. Bump catversion because this changes the querytree produced by the parser. Thanks to Tom Lane for the idea to invent a new kind of RTE. Per reports from Geoff Winkless, Tobias Wendorff, Richard Guo from various threads. Author: Richard Guo Reviewed-by: Ashutosh Bapat, Sutou Kouhei Discussion: https://postgr.es/m/CAMbWs4_dp7e7oTwaiZeBX8+P1rXw4ThkZxh1QG81rhu9Z47VsQ@mail.gmail.com
1 parent fba49d5 commit 247dea8

File tree

25 files changed

+731
-86
lines changed

25 files changed

+731
-86
lines changed

src/backend/commands/explain.c

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -879,6 +879,7 @@ ExplainPrintPlan(ExplainState *es, QueryDesc *queryDesc)
879879
{
880880
Bitmapset *rels_used = NULL;
881881
PlanState *ps;
882+
ListCell *lc;
882883

883884
/* Set up ExplainState fields associated with this plan tree */
884885
Assert(queryDesc->plannedstmt != NULL);
@@ -889,6 +890,17 @@ ExplainPrintPlan(ExplainState *es, QueryDesc *queryDesc)
889890
es->deparse_cxt = deparse_context_for_plan_tree(queryDesc->plannedstmt,
890891
es->rtable_names);
891892
es->printed_subplans = NULL;
893+
es->rtable_size = list_length(es->rtable);
894+
foreach(lc, es->rtable)
895+
{
896+
RangeTblEntry *rte = lfirst_node(RangeTblEntry, lc);
897+
898+
if (rte->rtekind == RTE_GROUP)
899+
{
900+
es->rtable_size--;
901+
break;
902+
}
903+
}
892904

893905
/*
894906
* Sometimes we mark a Gather node as "invisible", which means that it's
@@ -2474,7 +2486,7 @@ show_plan_tlist(PlanState *planstate, List *ancestors, ExplainState *es)
24742486
context = set_deparse_context_plan(es->deparse_cxt,
24752487
plan,
24762488
ancestors);
2477-
useprefix = list_length(es->rtable) > 1;
2489+
useprefix = es->rtable_size > 1;
24782490

24792491
/* Deparse each result column (we now include resjunk ones) */
24802492
foreach(lc, plan->targetlist)
@@ -2558,7 +2570,7 @@ show_upper_qual(List *qual, const char *qlabel,
25582570
{
25592571
bool useprefix;
25602572

2561-
useprefix = (list_length(es->rtable) > 1 || es->verbose);
2573+
useprefix = (es->rtable_size > 1 || es->verbose);
25622574
show_qual(qual, qlabel, planstate, ancestors, useprefix, es);
25632575
}
25642576

@@ -2648,7 +2660,7 @@ show_grouping_sets(PlanState *planstate, Agg *agg,
26482660
context = set_deparse_context_plan(es->deparse_cxt,
26492661
planstate->plan,
26502662
ancestors);
2651-
useprefix = (list_length(es->rtable) > 1 || es->verbose);
2663+
useprefix = (es->rtable_size > 1 || es->verbose);
26522664

26532665
ExplainOpenGroup("Grouping Sets", "Grouping Sets", false, es);
26542666

@@ -2788,7 +2800,7 @@ show_sort_group_keys(PlanState *planstate, const char *qlabel,
27882800
context = set_deparse_context_plan(es->deparse_cxt,
27892801
plan,
27902802
ancestors);
2791-
useprefix = (list_length(es->rtable) > 1 || es->verbose);
2803+
useprefix = (es->rtable_size > 1 || es->verbose);
27922804

27932805
for (keyno = 0; keyno < nkeys; keyno++)
27942806
{
@@ -2900,7 +2912,7 @@ show_tablesample(TableSampleClause *tsc, PlanState *planstate,
29002912
context = set_deparse_context_plan(es->deparse_cxt,
29012913
planstate->plan,
29022914
ancestors);
2903-
useprefix = list_length(es->rtable) > 1;
2915+
useprefix = es->rtable_size > 1;
29042916

29052917
/* Get the tablesample method name */
29062918
method_name = get_func_name(tsc->tsmhandler);
@@ -3386,7 +3398,7 @@ show_memoize_info(MemoizeState *mstate, List *ancestors, ExplainState *es)
33863398
* It's hard to imagine having a memoize node with fewer than 2 RTEs, but
33873399
* let's just keep the same useprefix logic as elsewhere in this file.
33883400
*/
3389-
useprefix = list_length(es->rtable) > 1 || es->verbose;
3401+
useprefix = es->rtable_size > 1 || es->verbose;
33903402

33913403
/* Set up deparsing context */
33923404
context = set_deparse_context_plan(es->deparse_cxt,

src/backend/nodes/nodeFuncs.c

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2854,6 +2854,11 @@ range_table_entry_walker_impl(RangeTblEntry *rte,
28542854
case RTE_RESULT:
28552855
/* nothing to do */
28562856
break;
2857+
case RTE_GROUP:
2858+
if (!(flags & QTW_IGNORE_GROUPEXPRS))
2859+
if (WALK(rte->groupexprs))
2860+
return true;
2861+
break;
28572862
}
28582863

28592864
if (WALK(rte->securityQuals))
@@ -3891,6 +3896,15 @@ range_table_mutator_impl(List *rtable,
38913896
case RTE_RESULT:
38923897
/* nothing to do */
38933898
break;
3899+
case RTE_GROUP:
3900+
if (!(flags & QTW_IGNORE_GROUPEXPRS))
3901+
MUTATE(newrte->groupexprs, rte->groupexprs, List *);
3902+
else
3903+
{
3904+
/* else, copy grouping exprs as-is */
3905+
newrte->groupexprs = copyObject(rte->groupexprs);
3906+
}
3907+
break;
38943908
}
38953909
MUTATE(newrte->securityQuals, rte->securityQuals, List *);
38963910
newrt = lappend(newrt, newrte);

src/backend/nodes/outfuncs.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -562,6 +562,9 @@ _outRangeTblEntry(StringInfo str, const RangeTblEntry *node)
562562
case RTE_RESULT:
563563
/* no extra fields */
564564
break;
565+
case RTE_GROUP:
566+
WRITE_NODE_FIELD(groupexprs);
567+
break;
565568
default:
566569
elog(ERROR, "unrecognized RTE kind: %d", (int) node->rtekind);
567570
break;

src/backend/nodes/print.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -300,6 +300,10 @@ print_rt(const List *rtable)
300300
printf("%d\t%s\t[result]",
301301
i, rte->eref->aliasname);
302302
break;
303+
case RTE_GROUP:
304+
printf("%d\t%s\t[group]",
305+
i, rte->eref->aliasname);
306+
break;
303307
default:
304308
printf("%d\t%s\t[unknown rtekind]",
305309
i, rte->eref->aliasname);

src/backend/nodes/readfuncs.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -422,6 +422,9 @@ _readRangeTblEntry(void)
422422
case RTE_RESULT:
423423
/* no extra fields */
424424
break;
425+
case RTE_GROUP:
426+
READ_NODE_FIELD(groupexprs);
427+
break;
425428
default:
426429
elog(ERROR, "unrecognized RTE kind: %d",
427430
(int) local_node->rtekind);

src/backend/optimizer/path/allpaths.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -731,6 +731,10 @@ set_rel_consider_parallel(PlannerInfo *root, RelOptInfo *rel,
731731
case RTE_RESULT:
732732
/* RESULT RTEs, in themselves, are no problem. */
733733
break;
734+
case RTE_GROUP:
735+
/* Shouldn't happen; we're only considering baserels here. */
736+
Assert(false);
737+
return;
734738
}
735739

736740
/*

src/backend/optimizer/plan/planner.c

Lines changed: 61 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ create_upper_paths_hook_type create_upper_paths_hook = NULL;
8888
#define EXPRKIND_ARBITER_ELEM 10
8989
#define EXPRKIND_TABLEFUNC 11
9090
#define EXPRKIND_TABLEFUNC_LATERAL 12
91+
#define EXPRKIND_GROUPEXPR 13
9192

9293
/*
9394
* Data specific to grouping sets
@@ -748,6 +749,7 @@ subquery_planner(PlannerGlobal *glob, Query *parse, PlannerInfo *parent_root,
748749
*/
749750
root->hasJoinRTEs = false;
750751
root->hasLateralRTEs = false;
752+
root->group_rtindex = 0;
751753
hasOuterJoins = false;
752754
hasResultRTEs = false;
753755
foreach(l, parse->rtable)
@@ -781,6 +783,10 @@ subquery_planner(PlannerGlobal *glob, Query *parse, PlannerInfo *parent_root,
781783
case RTE_RESULT:
782784
hasResultRTEs = true;
783785
break;
786+
case RTE_GROUP:
787+
Assert(parse->hasGroupRTE);
788+
root->group_rtindex = list_cell_number(parse->rtable, l) + 1;
789+
break;
784790
default:
785791
/* No work here for other RTE types */
786792
break;
@@ -836,10 +842,6 @@ subquery_planner(PlannerGlobal *glob, Query *parse, PlannerInfo *parent_root,
836842
preprocess_expression(root, (Node *) parse->targetList,
837843
EXPRKIND_TARGET);
838844

839-
/* Constant-folding might have removed all set-returning functions */
840-
if (parse->hasTargetSRFs)
841-
parse->hasTargetSRFs = expression_returns_set((Node *) parse->targetList);
842-
843845
newWithCheckOptions = NIL;
844846
foreach(l, parse->withCheckOptions)
845847
{
@@ -969,6 +971,13 @@ subquery_planner(PlannerGlobal *glob, Query *parse, PlannerInfo *parent_root,
969971
rte->values_lists = (List *)
970972
preprocess_expression(root, (Node *) rte->values_lists, kind);
971973
}
974+
else if (rte->rtekind == RTE_GROUP)
975+
{
976+
/* Preprocess the groupexprs list fully */
977+
rte->groupexprs = (List *)
978+
preprocess_expression(root, (Node *) rte->groupexprs,
979+
EXPRKIND_GROUPEXPR);
980+
}
972981

973982
/*
974983
* Process each element of the securityQuals list as if it were a
@@ -1005,6 +1014,27 @@ subquery_planner(PlannerGlobal *glob, Query *parse, PlannerInfo *parent_root,
10051014
}
10061015
}
10071016

1017+
/*
1018+
* Replace any Vars in the subquery's targetlist and havingQual that
1019+
* reference GROUP outputs with the underlying grouping expressions.
1020+
*
1021+
* Note that we need to perform this replacement after we've preprocessed
1022+
* the grouping expressions. This is to ensure that there is only one
1023+
* instance of SubPlan for each SubLink contained within the grouping
1024+
* expressions.
1025+
*/
1026+
if (parse->hasGroupRTE)
1027+
{
1028+
parse->targetList = (List *)
1029+
flatten_group_exprs(root, root->parse, (Node *) parse->targetList);
1030+
parse->havingQual =
1031+
flatten_group_exprs(root, root->parse, parse->havingQual);
1032+
}
1033+
1034+
/* Constant-folding might have removed all set-returning functions */
1035+
if (parse->hasTargetSRFs)
1036+
parse->hasTargetSRFs = expression_returns_set((Node *) parse->targetList);
1037+
10081038
/*
10091039
* In some cases we may want to transfer a HAVING clause into WHERE. We
10101040
* cannot do so if the HAVING clause contains aggregates (obviously) or
@@ -1032,6 +1062,16 @@ subquery_planner(PlannerGlobal *glob, Query *parse, PlannerInfo *parent_root,
10321062
* don't emit a bogus aggregated row. (This could be done better, but it
10331063
* seems not worth optimizing.)
10341064
*
1065+
* Note that a HAVING clause may contain expressions that are not fully
1066+
* preprocessed. This can happen if these expressions are part of
1067+
* grouping items. In such cases, they are replaced with GROUP Vars in
1068+
* the parser and then replaced back after we've done with expression
1069+
* preprocessing on havingQual. This is not an issue if the clause
1070+
* remains in HAVING, because these expressions will be matched to lower
1071+
* target items in setrefs.c. However, if the clause is moved or copied
1072+
* into WHERE, we need to ensure that these expressions are fully
1073+
* preprocessed.
1074+
*
10351075
* Note that both havingQual and parse->jointree->quals are in
10361076
* implicitly-ANDed-list form at this point, even though they are declared
10371077
* as Node *.
@@ -1051,16 +1091,28 @@ subquery_planner(PlannerGlobal *glob, Query *parse, PlannerInfo *parent_root,
10511091
}
10521092
else if (parse->groupClause && !parse->groupingSets)
10531093
{
1054-
/* move it to WHERE */
1094+
Node *whereclause;
1095+
1096+
/* Preprocess the HAVING clause fully */
1097+
whereclause = preprocess_expression(root, havingclause,
1098+
EXPRKIND_QUAL);
1099+
/* ... and move it to WHERE */
10551100
parse->jointree->quals = (Node *)
1056-
lappend((List *) parse->jointree->quals, havingclause);
1101+
list_concat((List *) parse->jointree->quals,
1102+
(List *) whereclause);
10571103
}
10581104
else
10591105
{
1060-
/* put a copy in WHERE, keep it in HAVING */
1106+
Node *whereclause;
1107+
1108+
/* Preprocess the HAVING clause fully */
1109+
whereclause = preprocess_expression(root, copyObject(havingclause),
1110+
EXPRKIND_QUAL);
1111+
/* ... and put a copy in WHERE */
10611112
parse->jointree->quals = (Node *)
1062-
lappend((List *) parse->jointree->quals,
1063-
copyObject(havingclause));
1113+
list_concat((List *) parse->jointree->quals,
1114+
(List *) whereclause);
1115+
/* ... and also keep it in HAVING */
10641116
newHaving = lappend(newHaving, havingclause);
10651117
}
10661118
}

src/backend/optimizer/plan/setrefs.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -557,6 +557,7 @@ add_rte_to_flat_rtable(PlannerGlobal *glob, List *rteperminfos,
557557
newrte->coltypes = NIL;
558558
newrte->coltypmods = NIL;
559559
newrte->colcollations = NIL;
560+
newrte->groupexprs = NIL;
560561
newrte->securityQuals = NIL;
561562

562563
glob->finalrtable = lappend(glob->finalrtable, newrte);

src/backend/optimizer/prep/prepjointree.c

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1235,6 +1235,7 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte,
12351235
case RTE_CTE:
12361236
case RTE_NAMEDTUPLESTORE:
12371237
case RTE_RESULT:
1238+
case RTE_GROUP:
12381239
/* these can't contain any lateral references */
12391240
break;
12401241
}
@@ -2218,7 +2219,8 @@ perform_pullup_replace_vars(PlannerInfo *root,
22182219
}
22192220

22202221
/*
2221-
* Replace references in the joinaliasvars lists of join RTEs.
2222+
* Replace references in the joinaliasvars lists of join RTEs and the
2223+
* groupexprs list of group RTE.
22222224
*/
22232225
foreach(lc, parse->rtable)
22242226
{
@@ -2228,6 +2230,10 @@ perform_pullup_replace_vars(PlannerInfo *root,
22282230
otherrte->joinaliasvars = (List *)
22292231
pullup_replace_vars((Node *) otherrte->joinaliasvars,
22302232
rvcontext);
2233+
else if (otherrte->rtekind == RTE_GROUP)
2234+
otherrte->groupexprs = (List *)
2235+
pullup_replace_vars((Node *) otherrte->groupexprs,
2236+
rvcontext);
22312237
}
22322238
}
22332239

@@ -2293,6 +2299,7 @@ replace_vars_in_jointree(Node *jtnode,
22932299
case RTE_CTE:
22942300
case RTE_NAMEDTUPLESTORE:
22952301
case RTE_RESULT:
2302+
case RTE_GROUP:
22962303
/* these shouldn't be marked LATERAL */
22972304
Assert(false);
22982305
break;

0 commit comments

Comments
 (0)