Skip to content

Commit 4de2d4f

Browse files
committed
Explicitly track whether aggregate final functions modify transition state.
Up to now, there's been hard-wired assumptions that normal aggregates' final functions never modify their transition states, while ordered-set aggregates' final functions always do. This has always been a bit limiting, and in particular it's getting in the way of improving the built-in ordered-set aggregates to allow merging of transition states. Therefore, let's introduce catalog and CREATE AGGREGATE infrastructure that lets the finalfn's behavior be declared explicitly. There are now three possibilities for the finalfn behavior: it's purely read-only, it trashes the transition state irrecoverably, or it changes the state in such a way that no more transfn calls are possible but the state can still be passed to other, compatible finalfns. There are no examples of this third case today, but we'll shortly make the built-in OSAs act like that. This change allows user-defined aggregates to explicitly disclaim support for use as window functions, and/or to prevent transition state merging, if their implementations cannot handle that. While it was previously possible to handle the window case with a run-time error check, there was not any way to prevent transition state merging, which in retrospect is something commit 804163b should have provided for. But better late than never. In passing, split out pg_aggregate.c's extern function declarations into a new header file pg_aggregate_fn.h, similarly to what we've done for some other catalog headers, so that pg_aggregate.h itself can be safe for frontend files to include. This lets pg_dump use the symbolic names for relevant constants. Discussion: https://postgr.es/m/4834.1507849699@sss.pgh.pa.us
1 parent 5f340cb commit 4de2d4f

File tree

16 files changed

+555
-251
lines changed

16 files changed

+555
-251
lines changed

doc/src/sgml/catalogs.sgml

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -486,6 +486,26 @@
486486
<entry></entry>
487487
<entry>True to pass extra dummy arguments to <structfield>aggmfinalfn</structfield></entry>
488488
</row>
489+
<row>
490+
<entry><structfield>aggfinalmodify</structfield></entry>
491+
<entry><type>char</type></entry>
492+
<entry></entry>
493+
<entry>Whether <structfield>aggfinalfn</structfield> modifies the
494+
transition state value:
495+
<literal>r</literal> if it is read-only,
496+
<literal>s</literal> if the <structfield>aggtransfn</structfield>
497+
cannot be applied after the <structfield>aggfinalfn</structfield>, or
498+
<literal>w</literal> if it writes on the value
499+
</entry>
500+
</row>
501+
<row>
502+
<entry><structfield>aggmfinalmodify</structfield></entry>
503+
<entry><type>char</type></entry>
504+
<entry></entry>
505+
<entry>Like <structfield>aggfinalmodify</structfield>, but for
506+
the <structfield>aggmfinalfn</structfield>
507+
</entry>
508+
</row>
489509
<row>
490510
<entry><structfield>aggsortop</structfield></entry>
491511
<entry><type>oid</type></entry>

doc/src/sgml/ref/create_aggregate.sgml

Lines changed: 66 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ CREATE AGGREGATE <replaceable class="parameter">name</replaceable> ( [ <replacea
2727
[ , SSPACE = <replaceable class="parameter">state_data_size</replaceable> ]
2828
[ , FINALFUNC = <replaceable class="parameter">ffunc</replaceable> ]
2929
[ , FINALFUNC_EXTRA ]
30+
[ , FINALFUNC_MODIFY = { READ_ONLY | SHARABLE | READ_WRITE } ]
3031
[ , COMBINEFUNC = <replaceable class="parameter">combinefunc</replaceable> ]
3132
[ , SERIALFUNC = <replaceable class="parameter">serialfunc</replaceable> ]
3233
[ , DESERIALFUNC = <replaceable class="parameter">deserialfunc</replaceable> ]
@@ -37,6 +38,7 @@ CREATE AGGREGATE <replaceable class="parameter">name</replaceable> ( [ <replacea
3738
[ , MSSPACE = <replaceable class="parameter">mstate_data_size</replaceable> ]
3839
[ , MFINALFUNC = <replaceable class="parameter">mffunc</replaceable> ]
3940
[ , MFINALFUNC_EXTRA ]
41+
[ , MFINALFUNC_MODIFY = { READ_ONLY | SHARABLE | READ_WRITE } ]
4042
[ , MINITCOND = <replaceable class="parameter">minitial_condition</replaceable> ]
4143
[ , SORTOP = <replaceable class="parameter">sort_operator</replaceable> ]
4244
[ , PARALLEL = { SAFE | RESTRICTED | UNSAFE } ]
@@ -49,6 +51,7 @@ CREATE AGGREGATE <replaceable class="parameter">name</replaceable> ( [ [ <replac
4951
[ , SSPACE = <replaceable class="parameter">state_data_size</replaceable> ]
5052
[ , FINALFUNC = <replaceable class="parameter">ffunc</replaceable> ]
5153
[ , FINALFUNC_EXTRA ]
54+
[ , FINALFUNC_MODIFY = { READ_ONLY | SHARABLE | READ_WRITE } ]
5255
[ , INITCOND = <replaceable class="parameter">initial_condition</replaceable> ]
5356
[ , PARALLEL = { SAFE | RESTRICTED | UNSAFE } ]
5457
[ , HYPOTHETICAL ]
@@ -63,6 +66,7 @@ CREATE AGGREGATE <replaceable class="parameter">name</replaceable> (
6366
[ , SSPACE = <replaceable class="parameter">state_data_size</replaceable> ]
6467
[ , FINALFUNC = <replaceable class="parameter">ffunc</replaceable> ]
6568
[ , FINALFUNC_EXTRA ]
69+
[ , FINALFUNC_MODIFY = { READ_ONLY | SHARABLE | READ_WRITE } ]
6670
[ , COMBINEFUNC = <replaceable class="parameter">combinefunc</replaceable> ]
6771
[ , SERIALFUNC = <replaceable class="parameter">serialfunc</replaceable> ]
6872
[ , DESERIALFUNC = <replaceable class="parameter">deserialfunc</replaceable> ]
@@ -73,6 +77,7 @@ CREATE AGGREGATE <replaceable class="parameter">name</replaceable> (
7377
[ , MSSPACE = <replaceable class="parameter">mstate_data_size</replaceable> ]
7478
[ , MFINALFUNC = <replaceable class="parameter">mffunc</replaceable> ]
7579
[ , MFINALFUNC_EXTRA ]
80+
[ , MFINALFUNC_MODIFY = { READ_ONLY | SHARABLE | READ_WRITE } ]
7681
[ , MINITCOND = <replaceable class="parameter">minitial_condition</replaceable> ]
7782
[ , SORTOP = <replaceable class="parameter">sort_operator</replaceable> ]
7883
)
@@ -197,7 +202,8 @@ CREATE AGGREGATE <replaceable class="parameter">name</replaceable> (
197202
as described in <xref linkend="xaggr-moving-aggregates">. This requires
198203
specifying the <literal>MSFUNC</>, <literal>MINVFUNC</>,
199204
and <literal>MSTYPE</> parameters, and optionally
200-
the <literal>MSPACE</>, <literal>MFINALFUNC</>, <literal>MFINALFUNC_EXTRA</>,
205+
the <literal>MSPACE</>, <literal>MFINALFUNC</>,
206+
<literal>MFINALFUNC_EXTRA</>, <literal>MFINALFUNC_MODIFY</>,
201207
and <literal>MINITCOND</> parameters. Except for <literal>MINVFUNC</>,
202208
these parameters work like the corresponding simple-aggregate parameters
203209
without <literal>M</>; they define a separate implementation of the
@@ -412,6 +418,21 @@ SELECT col FROM tab ORDER BY col USING sortop LIMIT 1;
412418
</listitem>
413419
</varlistentry>
414420

421+
<varlistentry>
422+
<term><literal>FINALFUNC_MODIFY</> = { <literal>READ_ONLY</> | <literal>SHARABLE</> | <literal>READ_WRITE</> }</term>
423+
<listitem>
424+
<para>
425+
This option specifies whether the final function is a pure function
426+
that does not modify its arguments. <literal>READ_ONLY</> indicates
427+
it does not; the other two values indicate that it may change the
428+
transition state value. See <xref linkend="sql-createaggregate-notes"
429+
endterm="sql-createaggregate-notes-title"> below for more detail. The
430+
default is <literal>READ_ONLY</>, except for ordered-set aggregates,
431+
for which the default is <literal>READ_WRITE</>.
432+
</para>
433+
</listitem>
434+
</varlistentry>
435+
415436
<varlistentry>
416437
<term><replaceable class="parameter">combinefunc</replaceable></term>
417438
<listitem>
@@ -563,6 +584,16 @@ SELECT col FROM tab ORDER BY col USING sortop LIMIT 1;
563584
</listitem>
564585
</varlistentry>
565586

587+
<varlistentry>
588+
<term><literal>MFINALFUNC_MODIFY</> = { <literal>READ_ONLY</> | <literal>SHARABLE</> | <literal>READ_WRITE</> }</term>
589+
<listitem>
590+
<para>
591+
This option is like <literal>FINALFUNC_MODIFY</>, but it describes
592+
the behavior of the moving-aggregate final function.
593+
</para>
594+
</listitem>
595+
</varlistentry>
596+
566597
<varlistentry>
567598
<term><replaceable class="parameter">minitial_condition</replaceable></term>
568599
<listitem>
@@ -587,12 +618,12 @@ SELECT col FROM tab ORDER BY col USING sortop LIMIT 1;
587618
</varlistentry>
588619

589620
<varlistentry>
590-
<term><literal>PARALLEL</literal></term>
621+
<term><literal>PARALLEL =</> { <literal>SAFE</> | <literal>RESTRICTED</> | <literal>UNSAFE</> }</term>
591622
<listitem>
592623
<para>
593624
The meanings of <literal>PARALLEL SAFE</>, <literal>PARALLEL
594625
RESTRICTED</>, and <literal>PARALLEL UNSAFE</> are the same as
595-
for <xref linkend="sql-createfunction">. An aggregate will not be
626+
in <xref linkend="sql-createfunction">. An aggregate will not be
596627
considered for parallelization if it is marked <literal>PARALLEL
597628
UNSAFE</> (which is the default!) or <literal>PARALLEL RESTRICTED</>.
598629
Note that the parallel-safety markings of the aggregate's support
@@ -624,8 +655,8 @@ SELECT col FROM tab ORDER BY col USING sortop LIMIT 1;
624655
</para>
625656
</refsect1>
626657

627-
<refsect1>
628-
<title>Notes</title>
658+
<refsect1 id="sql-createaggregate-notes">
659+
<title id="sql-createaggregate-notes-title">Notes</title>
629660

630661
<para>
631662
In parameters that specify support function names, you can write
@@ -634,6 +665,34 @@ SELECT col FROM tab ORDER BY col USING sortop LIMIT 1;
634665
of the support functions are determined from other parameters.
635666
</para>
636667

668+
<para>
669+
Ordinarily, Postgres functions are expected to be true functions that
670+
do not modify their input values. However, an aggregate transition
671+
function, <emphasis>when used in the context of an aggregate</>,
672+
is allowed to cheat and modify its transition-state argument in place.
673+
This can provide substantial performance benefits compared to making
674+
a fresh copy of the transition state each time.
675+
</para>
676+
677+
<para>
678+
Likewise, while an aggregate final function is normally expected not to
679+
modify its input values, sometimes it is impractical to avoid modifying
680+
the transition-state argument. Such behavior must be declared using
681+
the <literal>FINALFUNC_MODIFY</> parameter. The <literal>READ_WRITE</>
682+
value indicates that the final function modifies the transition state in
683+
unspecified ways. This value prevents use of the aggregate as a window
684+
function, and it also prevents merging of transition states for aggregate
685+
calls that share the same input values and transition functions.
686+
The <literal>SHARABLE</> value indicates that the transition function
687+
cannot be applied after the final function, but multiple final-function
688+
calls can be performed on the ending transition state value. This value
689+
prevents use of the aggregate as a window function, but it allows merging
690+
of transition states. (That is, the optimization of interest here is not
691+
applying the same final function repeatedly, but applying different final
692+
functions to the same ending transition state value. This is allowed as
693+
long as none of the final functions are marked <literal>READ_WRITE</>.)
694+
</para>
695+
637696
<para>
638697
If an aggregate supports moving-aggregate mode, it will improve
639698
calculation efficiency when the aggregate is used as a window function
@@ -671,7 +730,8 @@ SELECT col FROM tab ORDER BY col USING sortop LIMIT 1;
671730
Note that whether or not the aggregate supports moving-aggregate
672731
mode, <productname>PostgreSQL</productname> can handle a moving frame
673732
end without recalculation; this is done by continuing to add new values
674-
to the aggregate's state. It is assumed that the final function does
733+
to the aggregate's state. This is why use of an aggregate as a window
734+
function requires that the final function be read-only: it must
675735
not damage the aggregate's state value, so that the aggregation can be
676736
continued even after an aggregate result value has been obtained for
677737
one set of frame boundaries.

doc/src/sgml/xaggr.sgml

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -487,6 +487,13 @@ SELECT percentile_disc(0.5) WITHIN GROUP (ORDER BY income) FROM households;
487487
C, since their state values aren't definable as any SQL data type.
488488
(In the above example, notice that the state value is declared as
489489
type <type>internal</> &mdash; this is typical.)
490+
Also, because the final function performs the sort, it is not possible
491+
to continue adding input rows by executing the transition function again
492+
later. This means the final function is not <literal>READ_ONLY</>;
493+
it must be declared in <xref linkend="sql-createaggregate">
494+
as <literal>READ_WRITE</>, or as <literal>SHARABLE</> if it's
495+
possible for additional final-function calls to make use of the
496+
already-sorted state.
490497
</para>
491498

492499
<para>
@@ -622,16 +629,15 @@ SELECT percentile_disc(0.5) WITHIN GROUP (ORDER BY income) FROM households;
622629
<programlisting>
623630
if (AggCheckCallContext(fcinfo, NULL))
624631
</programlisting>
625-
One reason for checking this is that when it is true for a transition
626-
function, the first input
632+
One reason for checking this is that when it is true, the first input
627633
must be a temporary state value and can therefore safely be modified
628634
in-place rather than allocating a new copy.
629635
See <function>int8inc()</> for an example.
630-
(This is the <emphasis>only</>
631-
case where it is safe for a function to modify a pass-by-reference input.
632-
In particular, final functions for normal aggregates must not
633-
modify their inputs in any case, because in some cases they will be
634-
re-executed on the same final state value.)
636+
(While aggregate transition functions are always allowed to modify
637+
the transition value in-place, aggregate final functions are generally
638+
discouraged from doing so; if they do so, the behavior must be declared
639+
when creating the aggregate. See <xref linkend="sql-createaggregate">
640+
for more detail.)
635641
</para>
636642

637643
<para>

src/backend/catalog/pg_aggregate.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include "catalog/dependency.h"
2020
#include "catalog/indexing.h"
2121
#include "catalog/pg_aggregate.h"
22+
#include "catalog/pg_aggregate_fn.h"
2223
#include "catalog/pg_language.h"
2324
#include "catalog/pg_operator.h"
2425
#include "catalog/pg_proc.h"
@@ -65,6 +66,8 @@ AggregateCreate(const char *aggName,
6566
List *aggmfinalfnName,
6667
bool finalfnExtraArgs,
6768
bool mfinalfnExtraArgs,
69+
char finalfnModify,
70+
char mfinalfnModify,
6871
List *aggsortopName,
6972
Oid aggTransType,
7073
int32 aggTransSpace,
@@ -656,6 +659,8 @@ AggregateCreate(const char *aggName,
656659
values[Anum_pg_aggregate_aggmfinalfn - 1] = ObjectIdGetDatum(mfinalfn);
657660
values[Anum_pg_aggregate_aggfinalextra - 1] = BoolGetDatum(finalfnExtraArgs);
658661
values[Anum_pg_aggregate_aggmfinalextra - 1] = BoolGetDatum(mfinalfnExtraArgs);
662+
values[Anum_pg_aggregate_aggfinalmodify - 1] = CharGetDatum(finalfnModify);
663+
values[Anum_pg_aggregate_aggmfinalmodify - 1] = CharGetDatum(mfinalfnModify);
659664
values[Anum_pg_aggregate_aggsortop - 1] = ObjectIdGetDatum(sortop);
660665
values[Anum_pg_aggregate_aggtranstype - 1] = ObjectIdGetDatum(aggTransType);
661666
values[Anum_pg_aggregate_aggtransspace - 1] = Int32GetDatum(aggTransSpace);

src/backend/commands/aggregatecmds.c

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#include "catalog/dependency.h"
2727
#include "catalog/indexing.h"
2828
#include "catalog/pg_aggregate.h"
29+
#include "catalog/pg_aggregate_fn.h"
2930
#include "catalog/pg_proc.h"
3031
#include "catalog/pg_type.h"
3132
#include "commands/alter.h"
@@ -39,6 +40,9 @@
3940
#include "utils/syscache.h"
4041

4142

43+
static char extractModify(DefElem *defel);
44+
45+
4246
/*
4347
* DefineAggregate
4448
*
@@ -67,6 +71,8 @@ DefineAggregate(ParseState *pstate, List *name, List *args, bool oldstyle, List
6771
List *mfinalfuncName = NIL;
6872
bool finalfuncExtraArgs = false;
6973
bool mfinalfuncExtraArgs = false;
74+
char finalfuncModify = 0;
75+
char mfinalfuncModify = 0;
7076
List *sortoperatorName = NIL;
7177
TypeName *baseType = NULL;
7278
TypeName *transType = NULL;
@@ -143,6 +149,10 @@ DefineAggregate(ParseState *pstate, List *name, List *args, bool oldstyle, List
143149
finalfuncExtraArgs = defGetBoolean(defel);
144150
else if (pg_strcasecmp(defel->defname, "mfinalfunc_extra") == 0)
145151
mfinalfuncExtraArgs = defGetBoolean(defel);
152+
else if (pg_strcasecmp(defel->defname, "finalfunc_modify") == 0)
153+
finalfuncModify = extractModify(defel);
154+
else if (pg_strcasecmp(defel->defname, "mfinalfunc_modify") == 0)
155+
mfinalfuncModify = extractModify(defel);
146156
else if (pg_strcasecmp(defel->defname, "sortop") == 0)
147157
sortoperatorName = defGetQualifiedName(defel);
148158
else if (pg_strcasecmp(defel->defname, "basetype") == 0)
@@ -235,6 +245,15 @@ DefineAggregate(ParseState *pstate, List *name, List *args, bool oldstyle, List
235245
errmsg("aggregate minitcond must not be specified without mstype")));
236246
}
237247

248+
/*
249+
* Default values for modify flags can only be determined once we know the
250+
* aggKind.
251+
*/
252+
if (finalfuncModify == 0)
253+
finalfuncModify = (aggKind == AGGKIND_NORMAL) ? AGGMODIFY_READ_ONLY : AGGMODIFY_READ_WRITE;
254+
if (mfinalfuncModify == 0)
255+
mfinalfuncModify = (aggKind == AGGKIND_NORMAL) ? AGGMODIFY_READ_ONLY : AGGMODIFY_READ_WRITE;
256+
238257
/*
239258
* look up the aggregate's input datatype(s).
240259
*/
@@ -437,6 +456,8 @@ DefineAggregate(ParseState *pstate, List *name, List *args, bool oldstyle, List
437456
mfinalfuncName, /* final function name */
438457
finalfuncExtraArgs,
439458
mfinalfuncExtraArgs,
459+
finalfuncModify,
460+
mfinalfuncModify,
440461
sortoperatorName, /* sort operator name */
441462
transTypeId, /* transition data type */
442463
transSpace, /* transition space */
@@ -446,3 +467,24 @@ DefineAggregate(ParseState *pstate, List *name, List *args, bool oldstyle, List
446467
minitval, /* initial condition */
447468
proparallel); /* parallel safe? */
448469
}
470+
471+
/*
472+
* Convert the string form of [m]finalfunc_modify to the catalog representation
473+
*/
474+
static char
475+
extractModify(DefElem *defel)
476+
{
477+
char *val = defGetString(defel);
478+
479+
if (strcmp(val, "read_only") == 0)
480+
return AGGMODIFY_READ_ONLY;
481+
if (strcmp(val, "sharable") == 0)
482+
return AGGMODIFY_SHARABLE;
483+
if (strcmp(val, "read_write") == 0)
484+
return AGGMODIFY_READ_WRITE;
485+
ereport(ERROR,
486+
(errcode(ERRCODE_SYNTAX_ERROR),
487+
errmsg("parameter \"%s\" must be READ_ONLY, SHARABLE, or READ_WRITE",
488+
defel->defname)));
489+
return 0; /* keep compiler quiet */
490+
}

0 commit comments

Comments
 (0)