Skip to content

Commit 41a2760

Browse files
committed
Fix collation assignment for aggregates with ORDER BY.
ORDER BY expressions were being treated the same as regular aggregate arguments for purposes of collation determination, but really they should not affect the aggregate's collation at all; only collations of the aggregate's regular arguments should affect it. In many cases this mistake would lead to incorrectly throwing a "collation conflict" error; but in some cases the corrected code will silently assign a different collation to the aggregate than before, for example agg(foo ORDER BY bar COLLATE "x") which will now use foo's collation rather than "x" for the aggregate. Given this risk and the lack of field complaints about the issue, it doesn't seem prudent to back-patch. In passing, rearrange code in assign_collations_walker so that we don't need multiple copies of the standard logic for computing collation of a node with children. (Previously, CaseExpr duplicated the standard logic, and we would have needed a third copy for Aggref without this change.) Andrew Gierth and David Fetter
1 parent b42ea79 commit 41a2760

File tree

3 files changed

+121
-84
lines changed

3 files changed

+121
-84
lines changed

src/backend/parser/parse_collate.c

Lines changed: 93 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -319,86 +319,6 @@ assign_collations_walker(Node *node, assign_collations_context *context)
319319
}
320320
}
321321
break;
322-
case T_CaseExpr:
323-
{
324-
/*
325-
* CaseExpr is a special case because we do not want to
326-
* recurse into the test expression (if any). It was already
327-
* marked with collations during transformCaseExpr, and
328-
* furthermore its collation is not relevant to the result of
329-
* the CASE --- only the output expressions are. So we can't
330-
* use expression_tree_walker here.
331-
*/
332-
CaseExpr *expr = (CaseExpr *) node;
333-
Oid typcollation;
334-
ListCell *lc;
335-
336-
foreach(lc, expr->args)
337-
{
338-
CaseWhen *when = (CaseWhen *) lfirst(lc);
339-
340-
Assert(IsA(when, CaseWhen));
341-
342-
/*
343-
* The condition expressions mustn't affect the CASE's
344-
* result collation either; but since they are known to
345-
* yield boolean, it's safe to recurse directly on them
346-
* --- they won't change loccontext.
347-
*/
348-
(void) assign_collations_walker((Node *) when->expr,
349-
&loccontext);
350-
(void) assign_collations_walker((Node *) when->result,
351-
&loccontext);
352-
}
353-
(void) assign_collations_walker((Node *) expr->defresult,
354-
&loccontext);
355-
356-
/*
357-
* Now determine the CASE's output collation. This is the
358-
* same as the general case below.
359-
*/
360-
typcollation = get_typcollation(exprType(node));
361-
if (OidIsValid(typcollation))
362-
{
363-
/* Node's result is collatable; what about its input? */
364-
if (loccontext.strength > COLLATE_NONE)
365-
{
366-
/* Collation state bubbles up from children. */
367-
collation = loccontext.collation;
368-
strength = loccontext.strength;
369-
location = loccontext.location;
370-
}
371-
else
372-
{
373-
/*
374-
* Collatable output produced without any collatable
375-
* input. Use the type's collation (which is usually
376-
* DEFAULT_COLLATION_OID, but might be different for a
377-
* domain).
378-
*/
379-
collation = typcollation;
380-
strength = COLLATE_IMPLICIT;
381-
location = exprLocation(node);
382-
}
383-
}
384-
else
385-
{
386-
/* Node's result type isn't collatable. */
387-
collation = InvalidOid;
388-
strength = COLLATE_NONE;
389-
location = -1; /* won't be used */
390-
}
391-
392-
/*
393-
* Save the state into the expression node. We know it
394-
* doesn't care about input collation.
395-
*/
396-
if (strength == COLLATE_CONFLICT)
397-
exprSetCollation(node, InvalidOid);
398-
else
399-
exprSetCollation(node, collation);
400-
}
401-
break;
402322
case T_RowExpr:
403323
{
404324
/*
@@ -630,14 +550,103 @@ assign_collations_walker(Node *node, assign_collations_context *context)
630550
{
631551
/*
632552
* General case for most expression nodes with children. First
633-
* recurse, then figure out what to assign here.
553+
* recurse, then figure out what to assign to this node.
634554
*/
635555
Oid typcollation;
636556

637-
(void) expression_tree_walker(node,
638-
assign_collations_walker,
639-
(void *) &loccontext);
557+
/*
558+
* For most node types, we want to treat all the child
559+
* expressions alike; but there are a few exceptions, hence
560+
* this inner switch.
561+
*/
562+
switch (nodeTag(node))
563+
{
564+
case T_Aggref:
565+
{
566+
/*
567+
* Aggref is a special case because expressions
568+
* used only for ordering shouldn't be taken to
569+
* conflict with each other or with regular args.
570+
* So we apply assign_expr_collations() to them
571+
* rather than passing down our loccontext.
572+
*
573+
* Note that we recurse to each TargetEntry, not
574+
* directly to its contained expression, so that
575+
* the case above for T_TargetEntry will apply
576+
* appropriate checks to agg ORDER BY items.
577+
*
578+
* We need not recurse into the aggorder or
579+
* aggdistinct lists, because those contain only
580+
* SortGroupClause nodes which we need not
581+
* process.
582+
*/
583+
Aggref *aggref = (Aggref *) node;
584+
ListCell *lc;
585+
586+
foreach(lc, aggref->args)
587+
{
588+
TargetEntry *tle = (TargetEntry *) lfirst(lc);
589+
590+
Assert(IsA(tle, TargetEntry));
591+
if (tle->resjunk)
592+
assign_expr_collations(context->pstate,
593+
(Node *) tle);
594+
else
595+
(void) assign_collations_walker((Node *) tle,
596+
&loccontext);
597+
}
598+
}
599+
break;
600+
case T_CaseExpr:
601+
{
602+
/*
603+
* CaseExpr is a special case because we do not
604+
* want to recurse into the test expression (if
605+
* any). It was already marked with collations
606+
* during transformCaseExpr, and furthermore its
607+
* collation is not relevant to the result of the
608+
* CASE --- only the output expressions are.
609+
*/
610+
CaseExpr *expr = (CaseExpr *) node;
611+
ListCell *lc;
612+
613+
foreach(lc, expr->args)
614+
{
615+
CaseWhen *when = (CaseWhen *) lfirst(lc);
616+
617+
Assert(IsA(when, CaseWhen));
618+
619+
/*
620+
* The condition expressions mustn't affect
621+
* the CASE's result collation either; but
622+
* since they are known to yield boolean, it's
623+
* safe to recurse directly on them --- they
624+
* won't change loccontext.
625+
*/
626+
(void) assign_collations_walker((Node *) when->expr,
627+
&loccontext);
628+
(void) assign_collations_walker((Node *) when->result,
629+
&loccontext);
630+
}
631+
(void) assign_collations_walker((Node *) expr->defresult,
632+
&loccontext);
633+
}
634+
break;
635+
default:
636+
637+
/*
638+
* Normal case: all child expressions contribute
639+
* equally to loccontext.
640+
*/
641+
(void) expression_tree_walker(node,
642+
assign_collations_walker,
643+
(void *) &loccontext);
644+
break;
645+
}
640646

647+
/*
648+
* Now figure out what collation to assign to this node.
649+
*/
641650
typcollation = get_typcollation(exprType(node));
642651
if (OidIsValid(typcollation))
643652
{

src/test/regress/expected/collate.out

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -362,6 +362,28 @@ SELECT array_agg(b ORDER BY b) FROM collate_test2;
362362
{ABD,Abc,abc,bbc}
363363
(1 row)
364364

365+
-- In aggregates, ORDER BY expressions don't affect aggregate's collation
366+
SELECT string_agg(x COLLATE "C", y COLLATE "POSIX") FROM collate_test10; -- fail
367+
ERROR: collation mismatch between explicit collations "C" and "POSIX"
368+
LINE 1: SELECT string_agg(x COLLATE "C", y COLLATE "POSIX") FROM col...
369+
^
370+
SELECT array_agg(x COLLATE "C" ORDER BY y COLLATE "POSIX") FROM collate_test10;
371+
array_agg
372+
-----------
373+
{HIJ,hij}
374+
(1 row)
375+
376+
SELECT array_agg(a ORDER BY x COLLATE "C", y COLLATE "POSIX") FROM collate_test10;
377+
array_agg
378+
-----------
379+
{2,1}
380+
(1 row)
381+
382+
SELECT array_agg(a ORDER BY x||y) FROM collate_test10; -- fail
383+
ERROR: collation mismatch between implicit collations "C" and "POSIX"
384+
LINE 1: SELECT array_agg(a ORDER BY x||y) FROM collate_test10;
385+
^
386+
HINT: You can choose the collation by applying the COLLATE clause to one or both expressions.
365387
SELECT a, b FROM collate_test1 UNION ALL SELECT a, b FROM collate_test1 ORDER BY 2;
366388
a | b
367389
---+-----

src/test/regress/sql/collate.sql

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,12 @@ SELECT min(b), max(b) FROM collate_test2;
128128
SELECT array_agg(b ORDER BY b) FROM collate_test1;
129129
SELECT array_agg(b ORDER BY b) FROM collate_test2;
130130

131+
-- In aggregates, ORDER BY expressions don't affect aggregate's collation
132+
SELECT string_agg(x COLLATE "C", y COLLATE "POSIX") FROM collate_test10; -- fail
133+
SELECT array_agg(x COLLATE "C" ORDER BY y COLLATE "POSIX") FROM collate_test10;
134+
SELECT array_agg(a ORDER BY x COLLATE "C", y COLLATE "POSIX") FROM collate_test10;
135+
SELECT array_agg(a ORDER BY x||y) FROM collate_test10; -- fail
136+
131137
SELECT a, b FROM collate_test1 UNION ALL SELECT a, b FROM collate_test1 ORDER BY 2;
132138
SELECT a, b FROM collate_test2 UNION SELECT a, b FROM collate_test2 ORDER BY 2;
133139
SELECT a, b FROM collate_test2 WHERE a < 4 INTERSECT SELECT a, b FROM collate_test2 WHERE a > 1 ORDER BY 2;

0 commit comments

Comments
 (0)