Skip to content

Commit 9717353

Browse files
committed
Add a planner support function for numeric generate_series().
This allows the planner to estimate the number of rows returned by generate_series(numeric, numeric[, numeric]), when the input values can be estimated at plan time. Song Jinzhou, reviewed by Dean Rasheed and David Rowley. Discussion: https://postgr.es/m/tencent_F43E7F4DD50EF5986D1051DE8DE547910206%40qq.com Discussion: https://postgr.es/m/tencent_1F6D5B9A1545E02FD7D0EE508DFD056DE50A%40qq.com
1 parent 3315235 commit 9717353

File tree

5 files changed

+232
-3
lines changed

5 files changed

+232
-3
lines changed

src/backend/utils/adt/numeric.c

+121
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
#include "miscadmin.h"
3535
#include "nodes/nodeFuncs.h"
3636
#include "nodes/supportnodes.h"
37+
#include "optimizer/optimizer.h"
3738
#include "utils/array.h"
3839
#include "utils/builtins.h"
3940
#include "utils/float.h"
@@ -1827,6 +1828,126 @@ generate_series_step_numeric(PG_FUNCTION_ARGS)
18271828
SRF_RETURN_DONE(funcctx);
18281829
}
18291830

1831+
/*
1832+
* Planner support function for generate_series(numeric, numeric [, numeric])
1833+
*/
1834+
Datum
1835+
generate_series_numeric_support(PG_FUNCTION_ARGS)
1836+
{
1837+
Node *rawreq = (Node *) PG_GETARG_POINTER(0);
1838+
Node *ret = NULL;
1839+
1840+
if (IsA(rawreq, SupportRequestRows))
1841+
{
1842+
/* Try to estimate the number of rows returned */
1843+
SupportRequestRows *req = (SupportRequestRows *) rawreq;
1844+
1845+
if (is_funcclause(req->node)) /* be paranoid */
1846+
{
1847+
List *args = ((FuncExpr *) req->node)->args;
1848+
Node *arg1,
1849+
*arg2,
1850+
*arg3;
1851+
1852+
/* We can use estimated argument values here */
1853+
arg1 = estimate_expression_value(req->root, linitial(args));
1854+
arg2 = estimate_expression_value(req->root, lsecond(args));
1855+
if (list_length(args) >= 3)
1856+
arg3 = estimate_expression_value(req->root, lthird(args));
1857+
else
1858+
arg3 = NULL;
1859+
1860+
/*
1861+
* If any argument is constant NULL, we can safely assume that
1862+
* zero rows are returned. Otherwise, if they're all non-NULL
1863+
* constants, we can calculate the number of rows that will be
1864+
* returned.
1865+
*/
1866+
if ((IsA(arg1, Const) &&
1867+
((Const *) arg1)->constisnull) ||
1868+
(IsA(arg2, Const) &&
1869+
((Const *) arg2)->constisnull) ||
1870+
(arg3 != NULL && IsA(arg3, Const) &&
1871+
((Const *) arg3)->constisnull))
1872+
{
1873+
req->rows = 0;
1874+
ret = (Node *) req;
1875+
}
1876+
else if (IsA(arg1, Const) &&
1877+
IsA(arg2, Const) &&
1878+
(arg3 == NULL || IsA(arg3, Const)))
1879+
{
1880+
Numeric start_num;
1881+
Numeric stop_num;
1882+
NumericVar step = const_one;
1883+
1884+
/*
1885+
* If any argument is NaN or infinity, generate_series() will
1886+
* error out, so we needn't produce an estimate.
1887+
*/
1888+
start_num = DatumGetNumeric(((Const *) arg1)->constvalue);
1889+
stop_num = DatumGetNumeric(((Const *) arg2)->constvalue);
1890+
1891+
if (NUMERIC_IS_SPECIAL(start_num) ||
1892+
NUMERIC_IS_SPECIAL(stop_num))
1893+
PG_RETURN_POINTER(NULL);
1894+
1895+
if (arg3)
1896+
{
1897+
Numeric step_num;
1898+
1899+
step_num = DatumGetNumeric(((Const *) arg3)->constvalue);
1900+
1901+
if (NUMERIC_IS_SPECIAL(step_num))
1902+
PG_RETURN_POINTER(NULL);
1903+
1904+
init_var_from_num(step_num, &step);
1905+
}
1906+
1907+
/*
1908+
* The number of rows that will be returned is given by
1909+
* floor((stop - start) / step) + 1, if the sign of step
1910+
* matches the sign of stop - start. Otherwise, no rows will
1911+
* be returned.
1912+
*/
1913+
if (cmp_var(&step, &const_zero) != 0)
1914+
{
1915+
NumericVar start;
1916+
NumericVar stop;
1917+
NumericVar res;
1918+
1919+
init_var_from_num(start_num, &start);
1920+
init_var_from_num(stop_num, &stop);
1921+
1922+
init_var(&res);
1923+
sub_var(&stop, &start, &res);
1924+
1925+
if (step.sign != res.sign)
1926+
{
1927+
/* no rows will be returned */
1928+
req->rows = 0;
1929+
ret = (Node *) req;
1930+
}
1931+
else
1932+
{
1933+
if (arg3)
1934+
div_var(&res, &step, &res, 0, false, false);
1935+
else
1936+
trunc_var(&res, 0); /* step = 1 */
1937+
1938+
req->rows = numericvar_to_double_no_overflow(&res) + 1;
1939+
ret = (Node *) req;
1940+
}
1941+
1942+
free_var(&res);
1943+
}
1944+
}
1945+
}
1946+
}
1947+
1948+
PG_RETURN_POINTER(ret);
1949+
}
1950+
18301951

18311952
/*
18321953
* Implements the numeric version of the width_bucket() function

src/include/catalog/catversion.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,6 @@
5757
*/
5858

5959
/* yyyymmddN */
60-
#define CATALOG_VERSION_NO 202411111
60+
#define CATALOG_VERSION_NO 202412021
6161

6262
#endif

src/include/catalog/pg_proc.dat

+7-2
Original file line numberDiff line numberDiff line change
@@ -8464,13 +8464,18 @@
84648464
proname => 'generate_series_int8_support', prorettype => 'internal',
84658465
proargtypes => 'internal', prosrc => 'generate_series_int8_support' },
84668466
{ oid => '3259', descr => 'non-persistent series generator',
8467-
proname => 'generate_series', prorows => '1000', proretset => 't',
8467+
proname => 'generate_series', prorows => '1000',
8468+
prosupport => 'generate_series_numeric_support', proretset => 't',
84688469
prorettype => 'numeric', proargtypes => 'numeric numeric numeric',
84698470
prosrc => 'generate_series_step_numeric' },
84708471
{ oid => '3260', descr => 'non-persistent series generator',
8471-
proname => 'generate_series', prorows => '1000', proretset => 't',
8472+
proname => 'generate_series', prorows => '1000',
8473+
prosupport => 'generate_series_numeric_support', proretset => 't',
84728474
prorettype => 'numeric', proargtypes => 'numeric numeric',
84738475
prosrc => 'generate_series_numeric' },
8476+
{ oid => '8405', descr => 'planner support for generate_series',
8477+
proname => 'generate_series_numeric_support', prorettype => 'internal',
8478+
proargtypes => 'internal', prosrc => 'generate_series_numeric_support' },
84748479
{ oid => '938', descr => 'non-persistent series generator',
84758480
proname => 'generate_series', prorows => '1000',
84768481
prosupport => 'generate_series_timestamp_support', proretset => 't',

src/test/regress/expected/misc_functions.out

+65
Original file line numberDiff line numberDiff line change
@@ -712,6 +712,71 @@ false, true, false, true);
712712
-- the support function.
713713
SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '0 day') g(s);
714714
ERROR: step size cannot equal zero
715+
--
716+
-- Test the SupportRequestRows support function for generate_series_numeric()
717+
--
718+
-- Ensure the row estimate matches the actual rows
719+
SELECT explain_mask_costs($$
720+
SELECT * FROM generate_series(1.0, 25.0) g(s);$$,
721+
true, true, false, true);
722+
explain_mask_costs
723+
------------------------------------------------------------------------------------------
724+
Function Scan on generate_series g (cost=N..N rows=25 width=N) (actual rows=25 loops=1)
725+
(1 row)
726+
727+
-- As above but with non-default step
728+
SELECT explain_mask_costs($$
729+
SELECT * FROM generate_series(1.0, 25.0, 2.0) g(s);$$,
730+
true, true, false, true);
731+
explain_mask_costs
732+
------------------------------------------------------------------------------------------
733+
Function Scan on generate_series g (cost=N..N rows=13 width=N) (actual rows=13 loops=1)
734+
(1 row)
735+
736+
-- Ensure the estimates match when step is decreasing
737+
SELECT explain_mask_costs($$
738+
SELECT * FROM generate_series(25.0, 1.0, -1.0) g(s);$$,
739+
true, true, false, true);
740+
explain_mask_costs
741+
------------------------------------------------------------------------------------------
742+
Function Scan on generate_series g (cost=N..N rows=25 width=N) (actual rows=25 loops=1)
743+
(1 row)
744+
745+
-- Ensure an empty range estimates 1 row
746+
SELECT explain_mask_costs($$
747+
SELECT * FROM generate_series(25.0, 1.0, 1.0) g(s);$$,
748+
true, true, false, true);
749+
explain_mask_costs
750+
----------------------------------------------------------------------------------------
751+
Function Scan on generate_series g (cost=N..N rows=1 width=N) (actual rows=0 loops=1)
752+
(1 row)
753+
754+
-- Ensure we get the default row estimate for error cases (infinity/NaN values
755+
-- and zero step size)
756+
SELECT explain_mask_costs($$
757+
SELECT * FROM generate_series('-infinity'::NUMERIC, 'infinity'::NUMERIC, 1.0) g(s);$$,
758+
false, true, false, true);
759+
explain_mask_costs
760+
-------------------------------------------------------------------
761+
Function Scan on generate_series g (cost=N..N rows=1000 width=N)
762+
(1 row)
763+
764+
SELECT explain_mask_costs($$
765+
SELECT * FROM generate_series(1.0, 25.0, 'NaN'::NUMERIC) g(s);$$,
766+
false, true, false, true);
767+
explain_mask_costs
768+
-------------------------------------------------------------------
769+
Function Scan on generate_series g (cost=N..N rows=1000 width=N)
770+
(1 row)
771+
772+
SELECT explain_mask_costs($$
773+
SELECT * FROM generate_series(25.0, 2.0, 0.0) g(s);$$,
774+
false, true, false, true);
775+
explain_mask_costs
776+
-------------------------------------------------------------------
777+
Function Scan on generate_series g (cost=N..N rows=1000 width=N)
778+
(1 row)
779+
715780
-- Test functions for control data
716781
SELECT count(*) > 0 AS ok FROM pg_control_checkpoint();
717782
ok

src/test/regress/sql/misc_functions.sql

+38
Original file line numberDiff line numberDiff line change
@@ -311,6 +311,44 @@ false, true, false, true);
311311
-- the support function.
312312
SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '0 day') g(s);
313313

314+
--
315+
-- Test the SupportRequestRows support function for generate_series_numeric()
316+
--
317+
318+
-- Ensure the row estimate matches the actual rows
319+
SELECT explain_mask_costs($$
320+
SELECT * FROM generate_series(1.0, 25.0) g(s);$$,
321+
true, true, false, true);
322+
323+
-- As above but with non-default step
324+
SELECT explain_mask_costs($$
325+
SELECT * FROM generate_series(1.0, 25.0, 2.0) g(s);$$,
326+
true, true, false, true);
327+
328+
-- Ensure the estimates match when step is decreasing
329+
SELECT explain_mask_costs($$
330+
SELECT * FROM generate_series(25.0, 1.0, -1.0) g(s);$$,
331+
true, true, false, true);
332+
333+
-- Ensure an empty range estimates 1 row
334+
SELECT explain_mask_costs($$
335+
SELECT * FROM generate_series(25.0, 1.0, 1.0) g(s);$$,
336+
true, true, false, true);
337+
338+
-- Ensure we get the default row estimate for error cases (infinity/NaN values
339+
-- and zero step size)
340+
SELECT explain_mask_costs($$
341+
SELECT * FROM generate_series('-infinity'::NUMERIC, 'infinity'::NUMERIC, 1.0) g(s);$$,
342+
false, true, false, true);
343+
344+
SELECT explain_mask_costs($$
345+
SELECT * FROM generate_series(1.0, 25.0, 'NaN'::NUMERIC) g(s);$$,
346+
false, true, false, true);
347+
348+
SELECT explain_mask_costs($$
349+
SELECT * FROM generate_series(25.0, 2.0, 0.0) g(s);$$,
350+
false, true, false, true);
351+
314352
-- Test functions for control data
315353
SELECT count(*) > 0 AS ok FROM pg_control_checkpoint();
316354
SELECT count(*) > 0 AS ok FROM pg_control_init();

0 commit comments

Comments
 (0)