Skip to content

Commit fa5e440

Browse files
committed
Adjust the API for aggregate function calls so that a C-coded function
can tell whether it is being used as an aggregate or not. This allows such a function to avoid re-pallocing a pass-by-reference transition value; normally it would be unsafe for a function to scribble on an input, but in the aggregate case it's safe to reuse the old transition value. Make int8inc() do this. This gets a useful improvement in the speed of COUNT(*), at least on narrow tables (it seems to be swamped by I/O when the table rows are wide). Per a discussion in early December with Neil Conway. I also fixed int_aggregate.c to check this, thereby turning it into something approaching a supportable technique instead of being a crude hack.
1 parent de004e4 commit fa5e440

File tree

6 files changed

+162
-73
lines changed

6 files changed

+162
-73
lines changed

contrib/intagg/int_aggregate.c

Lines changed: 70 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,12 @@
3434
#include "utils/lsyscache.h"
3535

3636

37-
/* This is actually a postgres version of a one dimensional array */
38-
37+
/*
38+
* This is actually a postgres version of a one dimensional array.
39+
* We cheat a little by using the lower-bound field as an indicator
40+
* of the physically allocated size, while the dimensionality is the
41+
* count of items accumulated so far.
42+
*/
3943
typedef struct
4044
{
4145
ArrayType a;
@@ -56,7 +60,7 @@ typedef struct callContext
5660
#define START_NUM 8 /* initial size of arrays */
5761
#define PGARRAY_SIZE(n) (sizeof(PGARRAY) + (((n)-1)*sizeof(int4)))
5862

59-
static PGARRAY *GetPGArray(PGARRAY *p, int fAdd);
63+
static PGARRAY *GetPGArray(PGARRAY *p, AggState *aggstate, bool fAdd);
6064
static PGARRAY *ShrinkPGArray(PGARRAY *p);
6165

6266
Datum int_agg_state(PG_FUNCTION_ARGS);
@@ -68,72 +72,68 @@ PG_FUNCTION_INFO_V1(int_agg_final_array);
6872
PG_FUNCTION_INFO_V1(int_enum);
6973

7074
/*
71-
* Manage the aggregation state of the array
75+
* Manage the allocation state of the array
7276
*
73-
* Need to specify a suitably long-lived memory context, or it will vanish!
74-
* PortalContext isn't really right, but it's close enough.
77+
* Note that the array needs to be in a reasonably long-lived context,
78+
* ie the Agg node's aggcontext.
7579
*/
7680
static PGARRAY *
77-
GetPGArray(PGARRAY *p, int fAdd)
81+
GetPGArray(PGARRAY *p, AggState *aggstate, bool fAdd)
7882
{
7983
if (!p)
8084
{
8185
/* New array */
8286
int cb = PGARRAY_SIZE(START_NUM);
8387

84-
p = (PGARRAY *) MemoryContextAlloc(PortalContext, cb);
88+
p = (PGARRAY *) MemoryContextAlloc(aggstate->aggcontext, cb);
8589
p->a.size = cb;
86-
p->a.ndim = 0;
90+
p->a.ndim = 1;
8791
p->a.flags = 0;
8892
p->a.elemtype = INT4OID;
8993
p->items = 0;
9094
p->lower = START_NUM;
9195
}
9296
else if (fAdd)
93-
{ /* Ensure array has space */
97+
{
98+
/* Ensure array has space for another item */
9499
if (p->items >= p->lower)
95100
{
96-
PGARRAY *pn;
97-
int n = p->lower + p->lower;
101+
PGARRAY *pn;
102+
int n = p->lower * 2;
98103
int cbNew = PGARRAY_SIZE(n);
99104

100-
pn = (PGARRAY *) repalloc(p, cbNew);
105+
pn = (PGARRAY *) MemoryContextAlloc(aggstate->aggcontext, cbNew);
106+
memcpy(pn, p, p->a.size);
101107
pn->a.size = cbNew;
102108
pn->lower = n;
103-
return pn;
109+
/* do not pfree(p), because nodeAgg.c will */
110+
p = pn;
104111
}
105112
}
106113
return p;
107114
}
108115

109-
/* Shrinks the array to its actual size and moves it into the standard
110-
* memory allocation context, frees working memory
116+
/*
117+
* Shrinks the array to its actual size and moves it into the standard
118+
* memory allocation context
111119
*/
112120
static PGARRAY *
113-
ShrinkPGArray(PGARRAY * p)
121+
ShrinkPGArray(PGARRAY *p)
114122
{
115-
PGARRAY *pnew = NULL;
123+
PGARRAY *pnew;
124+
/* get target size */
125+
int cb = PGARRAY_SIZE(p->items);
126+
127+
/* use current transaction context */
128+
pnew = palloc(cb);
129+
memcpy(pnew, p, cb);
130+
131+
/* fix up the fields in the new array to match normal conventions */
132+
pnew->a.size = cb;
133+
pnew->lower = 1;
134+
135+
/* do not pfree(p), because nodeAgg.c will */
116136

117-
if (p)
118-
{
119-
/* get target size */
120-
int cb = PGARRAY_SIZE(p->items);
121-
122-
/* use current transaction context */
123-
pnew = palloc(cb);
124-
125-
/*
126-
* Fix up the fields in the new structure, so Postgres understands
127-
*/
128-
memcpy(pnew, p, cb);
129-
pnew->a.size = cb;
130-
pnew->a.ndim = 1;
131-
pnew->a.flags = 0;
132-
pnew->a.elemtype = INT4OID;
133-
pnew->lower = 1;
134-
135-
pfree(p);
136-
}
137137
return pnew;
138138
}
139139

@@ -144,11 +144,18 @@ int_agg_state(PG_FUNCTION_ARGS)
144144
PGARRAY *state;
145145
PGARRAY *p;
146146

147+
/*
148+
* As of PG 8.1 we can actually verify that we are being used as an
149+
* aggregate function, and so it is safe to scribble on our left input.
150+
*/
151+
if (!(fcinfo->context && IsA(fcinfo->context, AggState)))
152+
elog(ERROR, "int_agg_state may only be used as an aggregate");
153+
147154
if (PG_ARGISNULL(0))
148-
state = NULL;
155+
state = NULL; /* first time through */
149156
else
150157
state = (PGARRAY *) PG_GETARG_POINTER(0);
151-
p = GetPGArray(state, 1);
158+
p = GetPGArray(state, (AggState *) fcinfo->context, true);
152159

153160
if (!PG_ARGISNULL(1))
154161
{
@@ -164,22 +171,38 @@ int_agg_state(PG_FUNCTION_ARGS)
164171
PG_RETURN_POINTER(p);
165172
}
166173

167-
/* This is the final function used for the integer aggregator. It returns all
174+
/*
175+
* This is the final function used for the integer aggregator. It returns all
168176
* the integers collected as a one dimensional integer array
169177
*/
170178
Datum
171179
int_agg_final_array(PG_FUNCTION_ARGS)
172180
{
173-
PGARRAY *state = (PGARRAY *) PG_GETARG_POINTER(0);
174-
PGARRAY *pnew = ShrinkPGArray(GetPGArray(state, 0));
181+
PGARRAY *state;
182+
PGARRAY *p;
183+
PGARRAY *pnew;
175184

176-
if (pnew)
177-
PG_RETURN_POINTER(pnew);
185+
/*
186+
* As of PG 8.1 we can actually verify that we are being used as an
187+
* aggregate function, and so it is safe to scribble on our left input.
188+
*/
189+
if (!(fcinfo->context && IsA(fcinfo->context, AggState)))
190+
elog(ERROR, "int_agg_final_array may only be used as an aggregate");
191+
192+
if (PG_ARGISNULL(0))
193+
state = NULL; /* zero items in aggregation */
178194
else
179-
PG_RETURN_NULL();
195+
state = (PGARRAY *) PG_GETARG_POINTER(0);
196+
p = GetPGArray(state, (AggState *) fcinfo->context, false);
197+
198+
pnew = ShrinkPGArray(p);
199+
PG_RETURN_POINTER(pnew);
180200
}
181201

182-
/* This function accepts an array, and returns one item for each entry in the array */
202+
/*
203+
* This function accepts an array, and returns one item for each entry in the
204+
* array
205+
*/
183206
Datum
184207
int_enum(PG_FUNCTION_ARGS)
185208
{

contrib/intagg/int_aggregate.sql.in

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,14 @@ SET search_path = public;
66
CREATE OR REPLACE FUNCTION int_agg_state (int4[], int4)
77
RETURNS int4[]
88
AS 'MODULE_PATHNAME','int_agg_state'
9-
LANGUAGE 'C';
9+
LANGUAGE C;
1010

1111
-- Internal function for the aggregate
1212
-- Is called at the end of the aggregation, and returns an array.
1313
CREATE OR REPLACE FUNCTION int_agg_final_array (int4[])
1414
RETURNS int4[]
1515
AS 'MODULE_PATHNAME','int_agg_final_array'
16-
LANGUAGE 'C' STRICT;
16+
LANGUAGE C;
1717

1818
-- The aggregate function itself
1919
-- uses the above functions to create an array of integers from an aggregation.
@@ -24,15 +24,10 @@ CREATE AGGREGATE int_array_aggregate (
2424
FINALFUNC = int_agg_final_array
2525
);
2626

27-
-- The aggregate component functions are not designed to be called
28-
-- independently, so disable public access to them
29-
REVOKE ALL ON FUNCTION int_agg_state (int4[], int4) FROM PUBLIC;
30-
REVOKE ALL ON FUNCTION int_agg_final_array (int4[]) FROM PUBLIC;
31-
3227
-- The enumeration function
3328
-- returns each element in a one dimensional integer array
3429
-- as a row.
3530
CREATE OR REPLACE FUNCTION int_array_enum(int4[])
3631
RETURNS setof integer
3732
AS 'MODULE_PATHNAME','int_enum'
38-
LANGUAGE 'C' IMMUTABLE STRICT;
33+
LANGUAGE C IMMUTABLE STRICT;

doc/src/sgml/xaggr.sgml

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
<!--
2-
$PostgreSQL: pgsql/doc/src/sgml/xaggr.sgml,v 1.26 2005/01/22 22:56:36 momjian Exp $
2+
$PostgreSQL: pgsql/doc/src/sgml/xaggr.sgml,v 1.27 2005/03/12 20:25:06 tgl Exp $
33
-->
44

55
<sect1 id="xaggr">
@@ -161,6 +161,21 @@ SELECT attrelid::regclass, array_accum(atttypid)
161161
</programlisting>
162162
</para>
163163

164+
<para>
165+
A function written in C can detect that it is being called as an
166+
aggregate transition or final function by seeing if it was passed
167+
an <structname>AggState</> node as the function call <quote>context</>,
168+
for example by
169+
<programlisting>
170+
if (fcinfo->context &amp;&amp; IsA(fcinfo->context, AggState))
171+
</programlisting>
172+
One reason for checking this is that when it is true, the left input
173+
must be a temporary transition value and can therefore safely be modified
174+
in-place rather than allocating a new copy. (This is the <emphasis>only</>
175+
case where it is safe for a function to modify a pass-by-reference input.)
176+
See <literal>int8inc()></> for an example.
177+
</para>
178+
164179
<para>
165180
For further details see the
166181
<xref linkend="sql-createaggregate" endterm="sql-createaggregate-title">

doc/src/sgml/xfunc.sgml

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
<!--
2-
$PostgreSQL: pgsql/doc/src/sgml/xfunc.sgml,v 1.99 2005/02/21 06:12:14 neilc Exp $
2+
$PostgreSQL: pgsql/doc/src/sgml/xfunc.sgml,v 1.100 2005/03/12 20:25:06 tgl Exp $
33
-->
44

55
<sect1 id="xfunc">
@@ -1188,10 +1188,10 @@ typedef struct
11881188
them in and out of <productname>PostgreSQL</productname> functions.
11891189
To return a value of such a type, allocate the right amount of
11901190
memory with <literal>palloc</literal>, fill in the allocated memory,
1191-
and return a pointer to it. (You can also return an input value
1192-
that has the same type as the return value directly by returning
1193-
the pointer to the input value. <emphasis>Never</> modify the
1194-
contents of a pass-by-reference input value, however.)
1191+
and return a pointer to it. (Also, if you just want to return the
1192+
same value as one of your input arguments that's of the same data type,
1193+
you can skip the extra <literal>palloc</literal> and just return the
1194+
pointer to the input value.)
11951195
</para>
11961196

11971197
<para>
@@ -1205,6 +1205,16 @@ typedef struct
12051205
itself.
12061206
</para>
12071207

1208+
<warning>
1209+
<para>
1210+
<emphasis>Never</> modify the contents of a pass-by-reference input
1211+
value. If you do so you are likely to corrupt on-disk data, since
1212+
the pointer you are given may well point directly into a disk buffer.
1213+
The sole exception to this rule is explained in
1214+
<xref linkend="xaggr">.
1215+
</para>
1216+
</warning>
1217+
12081218
<para>
12091219
As an example, we can define the type <type>text</type> as
12101220
follows:

src/backend/executor/nodeAgg.c

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,12 +40,28 @@
4040
* is used to run finalize functions and compute the output tuple;
4141
* this context can be reset once per output tuple.
4242
*
43+
* Beginning in PostgreSQL 8.1, the executor's AggState node is passed as
44+
* the fmgr "context" value in all transfunc and finalfunc calls. It is
45+
* not really intended that the transition functions will look into the
46+
* AggState node, but they can use code like
47+
* if (fcinfo->context && IsA(fcinfo->context, AggState))
48+
* to verify that they are being called by nodeAgg.c and not as ordinary
49+
* SQL functions. The main reason a transition function might want to know
50+
* that is that it can avoid palloc'ing a fixed-size pass-by-ref transition
51+
* value on every call: it can instead just scribble on and return its left
52+
* input. Ordinarily it is completely forbidden for functions to modify
53+
* pass-by-ref inputs, but in the aggregate case we know the left input is
54+
* either the initial transition value or a previous function result, and
55+
* in either case its value need not be preserved. See int8inc() for an
56+
* example. Notice that advance_transition_function() is coded to avoid a
57+
* data copy step when the previous transition value pointer is returned.
58+
*
4359
*
4460
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
4561
* Portions Copyright (c) 1994, Regents of the University of California
4662
*
4763
* IDENTIFICATION
48-
* $PostgreSQL: pgsql/src/backend/executor/nodeAgg.c,v 1.128 2005/01/28 19:33:56 tgl Exp $
64+
* $PostgreSQL: pgsql/src/backend/executor/nodeAgg.c,v 1.129 2005/03/12 20:25:06 tgl Exp $
4965
*
5066
*-------------------------------------------------------------------------
5167
*/
@@ -363,7 +379,7 @@ advance_transition_function(AggState *aggstate,
363379
*/
364380

365381
/* MemSet(&fcinfo, 0, sizeof(fcinfo)); */
366-
fcinfo.context = NULL;
382+
fcinfo.context = (void *) aggstate;
367383
fcinfo.resultinfo = NULL;
368384
fcinfo.isnull = false;
369385

@@ -541,6 +557,7 @@ finalize_aggregate(AggState *aggstate,
541557
FunctionCallInfoData fcinfo;
542558

543559
MemSet(&fcinfo, 0, sizeof(fcinfo));
560+
fcinfo.context = (void *) aggstate;
544561
fcinfo.flinfo = &peraggstate->finalfn;
545562
fcinfo.nargs = 1;
546563
fcinfo.arg[0] = pergroupstate->transValue;

0 commit comments

Comments
 (0)