Skip to content

Commit be45be9

Browse files
committed
Implement GROUP BY DISTINCT
With grouping sets, it's possible that some of the grouping sets are duplicate. This is especially common with CUBE and ROLLUP clauses. For example GROUP BY CUBE (a,b), CUBE (b,c) is equivalent to GROUP BY GROUPING SETS ( (a, b, c), (a, b, c), (a, b, c), (a, b), (a, b), (a, b), (a), (a), (a), (c, a), (c, a), (c, a), (c), (b, c), (b), () ) Some of the grouping sets are calculated multiple times, which is mostly unnecessary. This commit implements a new GROUP BY DISTINCT feature, as defined in the SQL standard, which eliminates the duplicate sets. Author: Vik Fearing Reviewed-by: Erik Rijkers, Georgios Kokolatos, Tomas Vondra Discussion: https://postgr.es/m/bf3805a8-d7d1-ae61-fece-761b7ff41ecc@postgresfriends.org
1 parent cd91de0 commit be45be9

File tree

18 files changed

+333
-27
lines changed

18 files changed

+333
-27
lines changed

doc/src/sgml/queries.sgml

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1372,6 +1372,55 @@ GROUP BY GROUPING SETS (
13721372
</programlisting>
13731373
</para>
13741374

1375+
<para>
1376+
<indexterm zone="queries-grouping-sets">
1377+
<primary>ALL</primary>
1378+
<secondary>GROUP BY ALL</secondary>
1379+
</indexterm>
1380+
<indexterm zone="queries-grouping-sets">
1381+
<primary>DISTINCT</primary>
1382+
<secondary>GROUP BY DISTINCT</secondary>
1383+
</indexterm>
1384+
When specifying multiple grouping items together, the final set of grouping
1385+
sets might contain duplicates. For example:
1386+
<programlisting>
1387+
GROUP BY ROLLUP (a, b), ROLLUP (a, c)
1388+
</programlisting>
1389+
is equivalent to
1390+
<programlisting>
1391+
GROUP BY GROUPING SETS (
1392+
(a, b, c),
1393+
(a, b),
1394+
(a, b),
1395+
(a, c),
1396+
(a),
1397+
(a),
1398+
(a, c),
1399+
(a),
1400+
()
1401+
)
1402+
</programlisting>
1403+
If these duplicates are undesirable, they can be removed using the
1404+
<literal>DISTINCT</literal> clause directly on the <literal>GROUP BY</literal>.
1405+
Therefore:
1406+
<programlisting>
1407+
GROUP BY <emphasis>DISTINCT</emphasis> ROLLUP (a, b), ROLLUP (a, c)
1408+
</programlisting>
1409+
is equivalent to
1410+
<programlisting>
1411+
GROUP BY GROUPING SETS (
1412+
(a, b, c),
1413+
(a, b),
1414+
(a, c),
1415+
(a),
1416+
()
1417+
)
1418+
</programlisting>
1419+
This is not the same as using <literal>SELECT DISTINCT</literal> because the output
1420+
rows may still contain duplicates. If any of the ungrouped columns contains NULL,
1421+
it will be indistinguishable from the NULL used when that same column is grouped.
1422+
</para>
1423+
13751424
<note>
13761425
<para>
13771426
The construct <literal>(a, b)</literal> is normally recognized in expressions as
@@ -1560,8 +1609,13 @@ SELECT a "from", b + c AS sum FROM ...
15601609
<sect2 id="queries-distinct">
15611610
<title><literal>DISTINCT</literal></title>
15621611

1612+
<indexterm zone="queries-distinct">
1613+
<primary>ALL</primary>
1614+
<secondary>SELECT ALL</secondary>
1615+
</indexterm>
15631616
<indexterm zone="queries-distinct">
15641617
<primary>DISTINCT</primary>
1618+
<secondary>SELECT DISTINCT</secondary>
15651619
</indexterm>
15661620

15671621
<indexterm zone="queries-distinct">

doc/src/sgml/ref/select.sgml

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ SELECT [ ALL | DISTINCT [ ON ( <replaceable class="parameter">expression</replac
3737
[ * | <replaceable class="parameter">expression</replaceable> [ [ AS ] <replaceable class="parameter">output_name</replaceable> ] [, ...] ]
3838
[ FROM <replaceable class="parameter">from_item</replaceable> [, ...] ]
3939
[ WHERE <replaceable class="parameter">condition</replaceable> ]
40-
[ GROUP BY <replaceable class="parameter">grouping_element</replaceable> [, ...] ]
40+
[ GROUP BY [ ALL | DISTINCT ] <replaceable class="parameter">grouping_element</replaceable> [, ...] ]
4141
[ HAVING <replaceable class="parameter">condition</replaceable> ]
4242
[ WINDOW <replaceable class="parameter">window_name</replaceable> AS ( <replaceable class="parameter">window_definition</replaceable> ) [, ...] ]
4343
[ { UNION | INTERSECT | EXCEPT } [ ALL | DISTINCT ] <replaceable class="parameter">select</replaceable> ]
@@ -778,7 +778,7 @@ WHERE <replaceable class="parameter">condition</replaceable>
778778
<para>
779779
The optional <literal>GROUP BY</literal> clause has the general form
780780
<synopsis>
781-
GROUP BY <replaceable class="parameter">grouping_element</replaceable> [, ...]
781+
GROUP BY [ ALL | DISTINCT ] <replaceable class="parameter">grouping_element</replaceable> [, ...]
782782
</synopsis>
783783
</para>
784784

@@ -802,7 +802,10 @@ GROUP BY <replaceable class="parameter">grouping_element</replaceable> [, ...]
802802
independent <replaceable>grouping sets</replaceable>. The effect of this is
803803
equivalent to constructing a <literal>UNION ALL</literal> between
804804
subqueries with the individual grouping sets as their
805-
<literal>GROUP BY</literal> clauses. For further details on the handling
805+
<literal>GROUP BY</literal> clauses. The optional <literal>DISTINCT</literal>
806+
clause removes duplicate sets before processing; it does <emphasis>not</emphasis>
807+
transform the <literal>UNION ALL</literal> into a <literal>UNION DISTINCT</literal>.
808+
For further details on the handling
806809
of grouping sets see <xref linkend="queries-grouping-sets"/>.
807810
</para>
808811

src/backend/catalog/sql_features.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -482,7 +482,7 @@ T351 Bracketed SQL comments (/*...*/ comments) YES
482482
T431 Extended grouping capabilities YES
483483
T432 Nested and concatenated GROUPING SETS YES
484484
T433 Multiargument GROUPING function YES
485-
T434 GROUP BY DISTINCT NO
485+
T434 GROUP BY DISTINCT YES
486486
T441 ABS and MOD functions YES
487487
T461 Symmetric BETWEEN predicate YES
488488
T471 Result sets return value NO

src/backend/nodes/copyfuncs.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3135,6 +3135,7 @@ _copyQuery(const Query *from)
31353135
COPY_NODE_FIELD(onConflict);
31363136
COPY_NODE_FIELD(returningList);
31373137
COPY_NODE_FIELD(groupClause);
3138+
COPY_SCALAR_FIELD(groupDistinct);
31383139
COPY_NODE_FIELD(groupingSets);
31393140
COPY_NODE_FIELD(havingQual);
31403141
COPY_NODE_FIELD(windowClause);
@@ -3221,6 +3222,7 @@ _copySelectStmt(const SelectStmt *from)
32213222
COPY_NODE_FIELD(fromClause);
32223223
COPY_NODE_FIELD(whereClause);
32233224
COPY_NODE_FIELD(groupClause);
3225+
COPY_SCALAR_FIELD(groupDistinct);
32243226
COPY_NODE_FIELD(havingClause);
32253227
COPY_NODE_FIELD(windowClause);
32263228
COPY_NODE_FIELD(valuesLists);

src/backend/nodes/equalfuncs.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -977,6 +977,7 @@ _equalQuery(const Query *a, const Query *b)
977977
COMPARE_NODE_FIELD(onConflict);
978978
COMPARE_NODE_FIELD(returningList);
979979
COMPARE_NODE_FIELD(groupClause);
980+
COMPARE_SCALAR_FIELD(groupDistinct);
980981
COMPARE_NODE_FIELD(groupingSets);
981982
COMPARE_NODE_FIELD(havingQual);
982983
COMPARE_NODE_FIELD(windowClause);
@@ -1053,6 +1054,7 @@ _equalSelectStmt(const SelectStmt *a, const SelectStmt *b)
10531054
COMPARE_NODE_FIELD(fromClause);
10541055
COMPARE_NODE_FIELD(whereClause);
10551056
COMPARE_NODE_FIELD(groupClause);
1057+
COMPARE_SCALAR_FIELD(groupDistinct);
10561058
COMPARE_NODE_FIELD(havingClause);
10571059
COMPARE_NODE_FIELD(windowClause);
10581060
COMPARE_NODE_FIELD(valuesLists);

src/backend/nodes/list.c

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1506,6 +1506,22 @@ list_sort(List *list, list_sort_comparator cmp)
15061506
qsort(list->elements, len, sizeof(ListCell), (qsort_comparator) cmp);
15071507
}
15081508

1509+
/*
1510+
* list_sort comparator for sorting a list into ascending int order.
1511+
*/
1512+
int
1513+
list_int_cmp(const ListCell *p1, const ListCell *p2)
1514+
{
1515+
int v1 = lfirst_int(p1);
1516+
int v2 = lfirst_int(p2);
1517+
1518+
if (v1 < v2)
1519+
return -1;
1520+
if (v1 > v2)
1521+
return 1;
1522+
return 0;
1523+
}
1524+
15091525
/*
15101526
* list_sort comparator for sorting a list into ascending OID order.
15111527
*/

src/backend/nodes/outfuncs.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2771,6 +2771,7 @@ _outSelectStmt(StringInfo str, const SelectStmt *node)
27712771
WRITE_NODE_FIELD(fromClause);
27722772
WRITE_NODE_FIELD(whereClause);
27732773
WRITE_NODE_FIELD(groupClause);
2774+
WRITE_BOOL_FIELD(groupDistinct);
27742775
WRITE_NODE_FIELD(havingClause);
27752776
WRITE_NODE_FIELD(windowClause);
27762777
WRITE_NODE_FIELD(valuesLists);
@@ -2996,6 +2997,7 @@ _outQuery(StringInfo str, const Query *node)
29962997
WRITE_NODE_FIELD(onConflict);
29972998
WRITE_NODE_FIELD(returningList);
29982999
WRITE_NODE_FIELD(groupClause);
3000+
WRITE_BOOL_FIELD(groupDistinct);
29993001
WRITE_NODE_FIELD(groupingSets);
30003002
WRITE_NODE_FIELD(havingQual);
30013003
WRITE_NODE_FIELD(windowClause);

src/backend/nodes/readfuncs.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,7 @@ _readQuery(void)
271271
READ_NODE_FIELD(onConflict);
272272
READ_NODE_FIELD(returningList);
273273
READ_NODE_FIELD(groupClause);
274+
READ_BOOL_FIELD(groupDistinct);
274275
READ_NODE_FIELD(groupingSets);
275276
READ_NODE_FIELD(havingQual);
276277
READ_NODE_FIELD(windowClause);

src/backend/optimizer/plan/planner.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2442,7 +2442,7 @@ preprocess_grouping_sets(PlannerInfo *root)
24422442
ListCell *lc_set;
24432443
grouping_sets_data *gd = palloc0(sizeof(grouping_sets_data));
24442444

2445-
parse->groupingSets = expand_grouping_sets(parse->groupingSets, -1);
2445+
parse->groupingSets = expand_grouping_sets(parse->groupingSets, parse->groupDistinct, -1);
24462446

24472447
gd->any_hashable = false;
24482448
gd->unhashable_refs = NULL;

src/backend/parser/analyze.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1264,6 +1264,7 @@ transformSelectStmt(ParseState *pstate, SelectStmt *stmt)
12641264
qry->sortClause,
12651265
EXPR_KIND_GROUP_BY,
12661266
false /* allow SQL92 rules */ );
1267+
qry->groupDistinct = stmt->groupDistinct;
12671268

12681269
if (stmt->distinctClause == NIL)
12691270
{

src/backend/parser/gram.y

Lines changed: 42 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,13 @@ typedef struct SelectLimit
134134
LimitOption limitOption;
135135
} SelectLimit;
136136

137+
/* Private struct for the result of group_clause production */
138+
typedef struct GroupClause
139+
{
140+
bool distinct;
141+
List *list;
142+
} GroupClause;
143+
137144
/* ConstraintAttributeSpec yields an integer bitmask of these flags: */
138145
#define CAS_NOT_DEFERRABLE 0x01
139146
#define CAS_DEFERRABLE 0x02
@@ -250,6 +257,8 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
250257
PartitionBoundSpec *partboundspec;
251258
RoleSpec *rolespec;
252259
struct SelectLimit *selectlimit;
260+
SetQuantifier setquantifier;
261+
struct GroupClause *groupclause;
253262
}
254263

255264
%type <node> stmt schema_stmt
@@ -405,7 +414,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
405414
target_list opt_target_list insert_column_list set_target_list
406415
set_clause_list set_clause
407416
def_list operator_def_list indirection opt_indirection
408-
reloption_list group_clause TriggerFuncArgs opclass_item_list opclass_drop_list
417+
reloption_list TriggerFuncArgs opclass_item_list opclass_drop_list
409418
opclass_purpose opt_opfamily transaction_mode_list_or_empty
410419
OptTableFuncElementList TableFuncElementList opt_type_modifiers
411420
prep_type_clause
@@ -418,6 +427,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
418427
vacuum_relation_list opt_vacuum_relation_list
419428
drop_option_list
420429

430+
%type <groupclause> group_clause
421431
%type <list> group_by_list
422432
%type <node> group_by_item empty_grouping_set rollup_clause cube_clause
423433
%type <node> grouping_sets_clause
@@ -443,7 +453,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
443453
%type <node> for_locking_item
444454
%type <list> for_locking_clause opt_for_locking_clause for_locking_items
445455
%type <list> locked_rels_list
446-
%type <boolean> all_or_distinct
456+
%type <setquantifier> set_quantifier
447457

448458
%type <node> join_qual
449459
%type <jtype> join_type
@@ -11294,7 +11304,8 @@ simple_select:
1129411304
n->intoClause = $4;
1129511305
n->fromClause = $5;
1129611306
n->whereClause = $6;
11297-
n->groupClause = $7;
11307+
n->groupClause = ($7)->list;
11308+
n->groupDistinct = ($7)->distinct;
1129811309
n->havingClause = $8;
1129911310
n->windowClause = $9;
1130011311
$$ = (Node *)n;
@@ -11309,7 +11320,8 @@ simple_select:
1130911320
n->intoClause = $4;
1131011321
n->fromClause = $5;
1131111322
n->whereClause = $6;
11312-
n->groupClause = $7;
11323+
n->groupClause = ($7)->list;
11324+
n->groupDistinct = ($7)->distinct;
1131311325
n->havingClause = $8;
1131411326
n->windowClause = $9;
1131511327
$$ = (Node *)n;
@@ -11334,17 +11346,17 @@ simple_select:
1133411346
n->fromClause = list_make1($2);
1133511347
$$ = (Node *)n;
1133611348
}
11337-
| select_clause UNION all_or_distinct select_clause
11349+
| select_clause UNION set_quantifier select_clause
1133811350
{
11339-
$$ = makeSetOp(SETOP_UNION, $3, $1, $4);
11351+
$$ = makeSetOp(SETOP_UNION, $3 == SET_QUANTIFIER_ALL, $1, $4);
1134011352
}
11341-
| select_clause INTERSECT all_or_distinct select_clause
11353+
| select_clause INTERSECT set_quantifier select_clause
1134211354
{
11343-
$$ = makeSetOp(SETOP_INTERSECT, $3, $1, $4);
11355+
$$ = makeSetOp(SETOP_INTERSECT, $3 == SET_QUANTIFIER_ALL, $1, $4);
1134411356
}
11345-
| select_clause EXCEPT all_or_distinct select_clause
11357+
| select_clause EXCEPT set_quantifier select_clause
1134611358
{
11347-
$$ = makeSetOp(SETOP_EXCEPT, $3, $1, $4);
11359+
$$ = makeSetOp(SETOP_EXCEPT, $3 == SET_QUANTIFIER_ALL, $1, $4);
1134811360
}
1134911361
;
1135011362

@@ -11542,10 +11554,10 @@ opt_table: TABLE
1154211554
| /*EMPTY*/
1154311555
;
1154411556

11545-
all_or_distinct:
11546-
ALL { $$ = true; }
11547-
| DISTINCT { $$ = false; }
11548-
| /*EMPTY*/ { $$ = false; }
11557+
set_quantifier:
11558+
ALL { $$ = SET_QUANTIFIER_ALL; }
11559+
| DISTINCT { $$ = SET_QUANTIFIER_DISTINCT; }
11560+
| /*EMPTY*/ { $$ = SET_QUANTIFIER_DEFAULT; }
1154911561
;
1155011562

1155111563
/* We use (NIL) as a placeholder to indicate that all target expressions
@@ -11771,8 +11783,20 @@ first_or_next: FIRST_P { $$ = 0; }
1177111783
* GroupingSet node of some type.
1177211784
*/
1177311785
group_clause:
11774-
GROUP_P BY group_by_list { $$ = $3; }
11775-
| /*EMPTY*/ { $$ = NIL; }
11786+
GROUP_P BY set_quantifier group_by_list
11787+
{
11788+
GroupClause *n = (GroupClause *) palloc(sizeof(GroupClause));
11789+
n->distinct = $3 == SET_QUANTIFIER_DISTINCT;
11790+
n->list = $4;
11791+
$$ = n;
11792+
}
11793+
| /*EMPTY*/
11794+
{
11795+
GroupClause *n = (GroupClause *) palloc(sizeof(GroupClause));
11796+
n->distinct = false;
11797+
n->list = NIL;
11798+
$$ = n;
11799+
}
1177611800
;
1177711801

1177811802
group_by_list:
@@ -15145,7 +15169,8 @@ PLpgSQL_Expr: opt_distinct_clause opt_target_list
1514515169
n->targetList = $2;
1514615170
n->fromClause = $3;
1514715171
n->whereClause = $4;
15148-
n->groupClause = $5;
15172+
n->groupClause = ($5)->list;
15173+
n->groupDistinct = ($5)->distinct;
1514915174
n->havingClause = $6;
1515015175
n->windowClause = $7;
1515115176
n->sortClause = $8;

0 commit comments

Comments
 (0)