Skip to content

Commit f5050f7

Browse files
author
Richard Guo
committed
Mark expressions nullable by grouping sets
When generating window_pathkeys, distinct_pathkeys, or sort_pathkeys, we failed to realize that the grouping/ordering expressions might be nullable by grouping sets. As a result, we may incorrectly deem that the PathKeys are redundant by EquivalenceClass processing and thus remove them from the pathkeys list. That would lead to wrong results in some cases. To fix this issue, we mark the grouping expressions nullable by grouping sets if that is the case. If the grouping expression is a Var or PlaceHolderVar or constructed from those, we can just add the RT index of the RTE_GROUP RTE to the existing nullingrels field(s); otherwise we have to add a PlaceHolderVar to carry on the nullingrel bit. However, we have to manually remove this nullingrel bit from expressions in various cases where these expressions are logically below the grouping step, such as when we generate groupClause pathkeys for grouping sets, or when we generate PathTarget for initial input to grouping nodes. Furthermore, in set_upper_references, the targetlist and quals of an Agg node should have nullingrels that include the effects of the grouping step, ie they will have nullingrels equal to the input Vars/PHVs' nullingrels plus the nullingrel bit that references the grouping RTE. In order to perform exact nullingrels matches, we also need to manually remove this nullingrel bit. Bump catversion because this changes the querytree produced by the parser. Thanks to Tom Lane for the idea to invent a new kind of RTE. Per reports from Geoff Winkless, Tobias Wendorff, Richard Guo from various threads. Author: Richard Guo Reviewed-by: Ashutosh Bapat, Sutou Kouhei Discussion: https://postgr.es/m/CAMbWs4_dp7e7oTwaiZeBX8+P1rXw4ThkZxh1QG81rhu9Z47VsQ@mail.gmail.com
1 parent 247dea8 commit f5050f7

File tree

11 files changed

+411
-33
lines changed

11 files changed

+411
-33
lines changed

src/backend/optimizer/path/equivclass.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -726,6 +726,10 @@ get_eclass_for_sort_expr(PlannerInfo *root,
726726
{
727727
RelOptInfo *rel = root->simple_rel_array[i];
728728

729+
/* ignore the RTE_GROUP RTE */
730+
if (i == root->group_rtindex)
731+
continue;
732+
729733
if (rel == NULL) /* must be an outer join */
730734
{
731735
Assert(bms_is_member(i, root->outer_join_rels));
@@ -1087,6 +1091,10 @@ generate_base_implied_equalities(PlannerInfo *root)
10871091
{
10881092
RelOptInfo *rel = root->simple_rel_array[i];
10891093

1094+
/* ignore the RTE_GROUP RTE */
1095+
if (i == root->group_rtindex)
1096+
continue;
1097+
10901098
if (rel == NULL) /* must be an outer join */
10911099
{
10921100
Assert(bms_is_member(i, root->outer_join_rels));
@@ -3354,6 +3362,10 @@ get_eclass_indexes_for_relids(PlannerInfo *root, Relids relids)
33543362
{
33553363
RelOptInfo *rel = root->simple_rel_array[i];
33563364

3365+
/* ignore the RTE_GROUP RTE */
3366+
if (i == root->group_rtindex)
3367+
continue;
3368+
33573369
if (rel == NULL) /* must be an outer join */
33583370
{
33593371
Assert(bms_is_member(i, root->outer_join_rels));

src/backend/optimizer/path/pathkeys.c

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include "optimizer/pathnode.h"
2626
#include "optimizer/paths.h"
2727
#include "partitioning/partbounds.h"
28+
#include "rewrite/rewriteManip.h"
2829
#include "utils/lsyscache.h"
2930

3031
/* Consider reordering of GROUP BY keys? */
@@ -1341,6 +1342,7 @@ make_pathkeys_for_sortclauses(PlannerInfo *root,
13411342
&sortclauses,
13421343
tlist,
13431344
false,
1345+
false,
13441346
&sortable,
13451347
false);
13461348
/* It's caller error if not all clauses were sortable */
@@ -1359,6 +1361,9 @@ make_pathkeys_for_sortclauses(PlannerInfo *root,
13591361
* give rise to redundant pathkeys are removed from the sortclauses list
13601362
* (which therefore must be pass-by-reference in this version).
13611363
*
1364+
* If remove_group_rtindex is true, then we need to remove the RT index of the
1365+
* grouping step from the sort expressions before we make PathKeys for them.
1366+
*
13621367
* *sortable is set to true if all the sort clauses are in fact sortable.
13631368
* If any are not, they are ignored except for setting *sortable false.
13641369
* (In that case, the output pathkey list isn't really useful. However,
@@ -1375,6 +1380,7 @@ make_pathkeys_for_sortclauses_extended(PlannerInfo *root,
13751380
List **sortclauses,
13761381
List *tlist,
13771382
bool remove_redundant,
1383+
bool remove_group_rtindex,
13781384
bool *sortable,
13791385
bool set_ec_sortref)
13801386
{
@@ -1394,6 +1400,14 @@ make_pathkeys_for_sortclauses_extended(PlannerInfo *root,
13941400
*sortable = false;
13951401
continue;
13961402
}
1403+
if (remove_group_rtindex)
1404+
{
1405+
Assert(root->group_rtindex > 0);
1406+
sortkey = (Expr *)
1407+
remove_nulling_relids((Node *) sortkey,
1408+
bms_make_singleton(root->group_rtindex),
1409+
NULL);
1410+
}
13971411
pathkey = make_pathkey_from_sortop(root,
13981412
sortkey,
13991413
sortcl->sortop,

src/backend/optimizer/plan/initsplan.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1328,6 +1328,10 @@ mark_rels_nulled_by_join(PlannerInfo *root, Index ojrelid,
13281328
{
13291329
RelOptInfo *rel = root->simple_rel_array[relid];
13301330

1331+
/* ignore the RTE_GROUP RTE */
1332+
if (relid == root->group_rtindex)
1333+
continue;
1334+
13311335
if (rel == NULL) /* must be an outer join */
13321336
{
13331337
Assert(bms_is_member(relid, root->outer_join_rels));

src/backend/optimizer/plan/planner.c

Lines changed: 46 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@
5858
#include "parser/parse_relation.h"
5959
#include "parser/parsetree.h"
6060
#include "partitioning/partdesc.h"
61+
#include "rewrite/rewriteManip.h"
6162
#include "utils/lsyscache.h"
6263
#include "utils/rel.h"
6364
#include "utils/selfuncs.h"
@@ -3506,9 +3507,23 @@ standard_qp_callback(PlannerInfo *root, void *extra)
35063507

35073508
if (grouping_is_sortable(groupClause))
35083509
{
3509-
root->group_pathkeys = make_pathkeys_for_sortclauses(root,
3510-
groupClause,
3511-
tlist);
3510+
bool sortable;
3511+
3512+
/*
3513+
* The groupClause is logically below the grouping step. So if
3514+
* there is an RTE entry for the grouping step, we need to remove
3515+
* its RT index from the sort expressions before we make PathKeys
3516+
* for them.
3517+
*/
3518+
root->group_pathkeys =
3519+
make_pathkeys_for_sortclauses_extended(root,
3520+
&groupClause,
3521+
tlist,
3522+
false,
3523+
parse->hasGroupRTE,
3524+
&sortable,
3525+
false);
3526+
Assert(sortable);
35123527
root->num_groupby_pathkeys = list_length(root->group_pathkeys);
35133528
}
35143529
else
@@ -3538,6 +3553,7 @@ standard_qp_callback(PlannerInfo *root, void *extra)
35383553
&root->processed_groupClause,
35393554
tlist,
35403555
true,
3556+
false,
35413557
&sortable,
35423558
true);
35433559
if (!sortable)
@@ -3589,6 +3605,7 @@ standard_qp_callback(PlannerInfo *root, void *extra)
35893605
&root->processed_distinctClause,
35903606
tlist,
35913607
true,
3608+
false,
35923609
&sortable,
35933610
false);
35943611
if (!sortable)
@@ -3616,6 +3633,7 @@ standard_qp_callback(PlannerInfo *root, void *extra)
36163633
&groupClauses,
36173634
tlist,
36183635
false,
3636+
false,
36193637
&sortable,
36203638
false);
36213639
if (!sortable)
@@ -5526,7 +5544,19 @@ make_group_input_target(PlannerInfo *root, PathTarget *final_target)
55265544
{
55275545
/*
55285546
* It's a grouping column, so add it to the input target as-is.
5547+
*
5548+
* Note that the target is logically below the grouping step. So
5549+
* with grouping sets we need to remove the RT index of the
5550+
* grouping step if there is any from the target expression.
55295551
*/
5552+
if (parse->hasGroupRTE && parse->groupingSets != NIL)
5553+
{
5554+
Assert(root->group_rtindex > 0);
5555+
expr = (Expr *)
5556+
remove_nulling_relids((Node *) expr,
5557+
bms_make_singleton(root->group_rtindex),
5558+
NULL);
5559+
}
55305560
add_column_to_pathtarget(input_target, expr, sgref);
55315561
}
55325562
else
@@ -5554,11 +5584,23 @@ make_group_input_target(PlannerInfo *root, PathTarget *final_target)
55545584
* includes Vars used in resjunk items, so we are covering the needs of
55555585
* ORDER BY and window specifications. Vars used within Aggrefs and
55565586
* WindowFuncs will be pulled out here, too.
5587+
*
5588+
* Note that the target is logically below the grouping step. So with
5589+
* grouping sets we need to remove the RT index of the grouping step if
5590+
* there is any from the non-group Vars.
55575591
*/
55585592
non_group_vars = pull_var_clause((Node *) non_group_cols,
55595593
PVC_RECURSE_AGGREGATES |
55605594
PVC_RECURSE_WINDOWFUNCS |
55615595
PVC_INCLUDE_PLACEHOLDERS);
5596+
if (parse->hasGroupRTE && parse->groupingSets != NIL)
5597+
{
5598+
Assert(root->group_rtindex > 0);
5599+
non_group_vars = (List *)
5600+
remove_nulling_relids((Node *) non_group_vars,
5601+
bms_make_singleton(root->group_rtindex),
5602+
NULL);
5603+
}
55625604
add_new_columns_to_pathtarget(input_target, non_group_vars);
55635605

55645606
/* clean up cruft */
@@ -6207,6 +6249,7 @@ make_pathkeys_for_window(PlannerInfo *root, WindowClause *wc,
62076249
&wc->partitionClause,
62086250
tlist,
62096251
true,
6252+
false,
62106253
&sortable,
62116254
false);
62126255

src/backend/optimizer/plan/setrefs.c

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#include "optimizer/subselect.h"
2727
#include "optimizer/tlist.h"
2828
#include "parser/parse_relation.h"
29+
#include "rewrite/rewriteManip.h"
2930
#include "tcop/utility.h"
3031
#include "utils/syscache.h"
3132

@@ -2426,6 +2427,28 @@ set_upper_references(PlannerInfo *root, Plan *plan, int rtoffset)
24262427

24272428
subplan_itlist = build_tlist_index(subplan->targetlist);
24282429

2430+
/*
2431+
* If it's a grouping node with grouping sets, any Vars and PHVs appearing
2432+
* in the targetlist and quals should have nullingrels that include the
2433+
* effects of the grouping step, ie they will have nullingrels equal to
2434+
* the input Vars/PHVs' nullingrels plus the RT index of the grouping
2435+
* step. In order to perform exact nullingrels matches, we remove the RT
2436+
* index of the grouping step first.
2437+
*/
2438+
if (IsA(plan, Agg) &&
2439+
root->group_rtindex > 0 &&
2440+
((Agg *) plan)->groupingSets)
2441+
{
2442+
plan->targetlist = (List *)
2443+
remove_nulling_relids((Node *) plan->targetlist,
2444+
bms_make_singleton(root->group_rtindex),
2445+
NULL);
2446+
plan->qual = (List *)
2447+
remove_nulling_relids((Node *) plan->qual,
2448+
bms_make_singleton(root->group_rtindex),
2449+
NULL);
2450+
}
2451+
24292452
output_targetlist = NIL;
24302453
foreach(l, plan->targetlist)
24312454
{

src/backend/optimizer/util/var.c

Lines changed: 87 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222

2323
#include "access/sysattr.h"
2424
#include "nodes/nodeFuncs.h"
25+
#include "optimizer/clauses.h"
2526
#include "optimizer/optimizer.h"
2627
#include "optimizer/placeholder.h"
2728
#include "optimizer/prep.h"
@@ -83,6 +84,8 @@ static Node *flatten_join_alias_vars_mutator(Node *node,
8384
flatten_join_alias_vars_context *context);
8485
static Node *flatten_group_exprs_mutator(Node *node,
8586
flatten_join_alias_vars_context *context);
87+
static Node *mark_nullable_by_grouping(PlannerInfo *root, Node *newnode,
88+
Var *oldvar);
8689
static Node *add_nullingrels_if_needed(PlannerInfo *root, Node *newnode,
8790
Var *oldvar);
8891
static bool is_standard_join_alias_expression(Node *newnode, Var *oldvar);
@@ -909,6 +912,18 @@ flatten_join_alias_vars_mutator(Node *node,
909912
* flatten_group_exprs
910913
* Replace Vars that reference GROUP outputs with the underlying grouping
911914
* expressions.
915+
*
916+
* We have to preserve any varnullingrels info attached to the group Vars we're
917+
* replacing. If the replacement expression is a Var or PlaceHolderVar or
918+
* constructed from those, we can just add the varnullingrels bits to the
919+
* existing nullingrels field(s); otherwise we have to add a PlaceHolderVar
920+
* wrapper.
921+
*
922+
* NOTE: this is also used by ruleutils.c, to deparse one query parsetree back
923+
* to source text. For that use-case, root will be NULL, which is why we have
924+
* to pass the Query separately. We need the root itself only for preserving
925+
* varnullingrels. We can avoid preserving varnullingrels in the ruleutils.c's
926+
* usage because it does not make any difference to the deparsed source text.
912927
*/
913928
Node *
914929
flatten_group_exprs(PlannerInfo *root, Query *query, Node *node)
@@ -973,7 +988,8 @@ flatten_group_exprs_mutator(Node *node,
973988
if (context->possible_sublink && !context->inserted_sublink)
974989
context->inserted_sublink = checkExprHasSubLink(newvar);
975990

976-
return newvar;
991+
/* Lastly, add any varnullingrels to the replacement expression */
992+
return mark_nullable_by_grouping(context->root, newvar, var);
977993
}
978994

979995
if (IsA(node, Aggref))
@@ -1040,6 +1056,76 @@ flatten_group_exprs_mutator(Node *node,
10401056
(void *) context);
10411057
}
10421058

1059+
/*
1060+
* Add oldvar's varnullingrels, if any, to a flattened grouping expression.
1061+
* The newnode has been copied, so we can modify it freely.
1062+
*/
1063+
static Node *
1064+
mark_nullable_by_grouping(PlannerInfo *root, Node *newnode, Var *oldvar)
1065+
{
1066+
Relids relids;
1067+
1068+
if (root == NULL)
1069+
return newnode;
1070+
if (oldvar->varnullingrels == NULL)
1071+
return newnode; /* nothing to do */
1072+
1073+
Assert(bms_equal(oldvar->varnullingrels,
1074+
bms_make_singleton(root->group_rtindex)));
1075+
1076+
relids = pull_varnos_of_level(root, newnode, oldvar->varlevelsup);
1077+
1078+
if (!bms_is_empty(relids))
1079+
{
1080+
/*
1081+
* If the newnode is not variable-free, we set the nullingrels of Vars
1082+
* or PHVs that are contained in the expression. This is not really
1083+
* 'correct' in theory, because it is the whole expression that can be
1084+
* nullable by grouping sets, not its individual vars. But it works
1085+
* in practice, because what we need is that the expression can be
1086+
* somehow distinguished from the same expression in ECs, and marking
1087+
* its vars is sufficient for this purpose.
1088+
*/
1089+
newnode = add_nulling_relids(newnode,
1090+
relids,
1091+
oldvar->varnullingrels);
1092+
}
1093+
else /* variable-free? */
1094+
{
1095+
/*
1096+
* If the newnode is variable-free and does not contain volatile
1097+
* functions or set-returning functions, it can be treated as a member
1098+
* of EC that is redundant. So wrap it in a new PlaceHolderVar to
1099+
* carry the nullingrels. Otherwise we do not bother to make any
1100+
* changes.
1101+
*
1102+
* Aggregate functions and window functions are not allowed in
1103+
* grouping expressions.
1104+
*/
1105+
Assert(!contain_agg_clause(newnode));
1106+
Assert(!contain_window_function(newnode));
1107+
1108+
if (!contain_volatile_functions(newnode) &&
1109+
!expression_returns_set(newnode))
1110+
{
1111+
PlaceHolderVar *newphv;
1112+
Relids phrels;
1113+
1114+
phrels = get_relids_in_jointree((Node *) root->parse->jointree,
1115+
true, false);
1116+
Assert(!bms_is_empty(phrels));
1117+
1118+
newphv = make_placeholder_expr(root, (Expr *) newnode, phrels);
1119+
/* newphv has zero phlevelsup and NULL phnullingrels; fix it */
1120+
newphv->phlevelsup = oldvar->varlevelsup;
1121+
newphv->phnullingrels = bms_copy(oldvar->varnullingrels);
1122+
newnode = (Node *) newphv;
1123+
}
1124+
}
1125+
1126+
return newnode;
1127+
}
1128+
10431129
/*
10441130
* Add oldvar's varnullingrels, if any, to a flattened join alias expression.
10451131
* The newnode has been copied, so we can modify it freely.

src/backend/parser/parse_agg.c

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1333,9 +1333,6 @@ substitute_grouped_columns_mutator(Node *node,
13331333

13341334
if (node == NULL)
13351335
return NULL;
1336-
if (IsA(node, Const) ||
1337-
IsA(node, Param))
1338-
return node; /* constants are always acceptable */
13391336

13401337
if (IsA(node, Aggref))
13411338
{
@@ -1409,6 +1406,16 @@ substitute_grouped_columns_mutator(Node *node,
14091406
}
14101407
}
14111408

1409+
/*
1410+
* Constants are always acceptable. We have to do this after we checked
1411+
* the subexpression as a whole for a match, because it is possible that
1412+
* we have GROUP BY items that are constants, and the constants would
1413+
* become not so constant after the grouping step.
1414+
*/
1415+
if (IsA(node, Const) ||
1416+
IsA(node, Param))
1417+
return node;
1418+
14121419
/*
14131420
* If we have an ungrouped Var of the original query level, we have a
14141421
* failure. Vars below the original query level are not a problem, and

src/include/catalog/catversion.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,6 @@
5757
*/
5858

5959
/* yyyymmddN */
60-
#define CATALOG_VERSION_NO 202409101
60+
#define CATALOG_VERSION_NO 202409102
6161

6262
#endif

0 commit comments

Comments
 (0)