Skip to content

Commit 50ba70a

Browse files
committed
Identify simple column references in extended statistics
Until now, when defining extended statistics, everything except a plain column reference was treated as complex expression. So for example "a" was a column reference, but "(a)" would be an expression. In most cases this does not matter much, but there were a couple strange consequences. For example CREATE STATISTICS s ON a FROM t; would fail, because extended stats require at least two columns. But CREATE STATISTICS s ON (a) FROM t; would succeed, because that requirement does not apply to expressions. Moreover, that statistics object is useless - the optimizer will always use the regular statistics collected for attribute "a". So do a bit more work to identify those expressions referencing a single column, and translate them to a simple column reference. Backpatch to 14, where support for extended statistics on expressions was introduced. Reported-by: Justin Pryzby Backpatch-through: 14 Discussion: https://postgr.es/m/20210816013255.GS10479%40telsasoft.com
1 parent d760d94 commit 50ba70a

File tree

3 files changed

+34
-6
lines changed

3 files changed

+34
-6
lines changed

src/backend/commands/statscmds.c

Lines changed: 31 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
#include "optimizer/optimizer.h"
3434
#include "statistics/statistics.h"
3535
#include "utils/builtins.h"
36+
#include "utils/lsyscache.h"
3637
#include "utils/fmgroids.h"
3738
#include "utils/inval.h"
3839
#include "utils/memutils.h"
@@ -210,13 +211,15 @@ CreateStatistics(CreateStatsStmt *stmt)
210211
/*
211212
* Convert the expression list to a simple array of attnums, but also keep
212213
* a list of more complex expressions. While at it, enforce some
213-
* constraints.
214+
* constraints - we don't allow extended statistics on system attributes,
215+
* and we require the data type to have less-than operator.
214216
*
215-
* XXX We do only the bare minimum to separate simple attribute and
216-
* complex expressions - for example "(a)" will be treated as a complex
217-
* expression. No matter how elaborate the check is, there'll always be a
218-
* way around it, if the user is determined (consider e.g. "(a+0)"), so
219-
* it's not worth protecting against it.
217+
* There are many ways how to "mask" a simple attribute refenrece as an
218+
* expression, for example "(a+0)" etc. We can't possibly detect all of
219+
* them, but we handle at least the simple case with attribute in parens.
220+
* There'll always be a way around this, if the user is determined (like
221+
* the "(a+0)" example), but this makes it somewhat consistent with how
222+
* indexes treat attributes/expressions.
220223
*/
221224
foreach(cell, stmt->exprs)
222225
{
@@ -257,6 +260,28 @@ CreateStatistics(CreateStatsStmt *stmt)
257260
nattnums++;
258261
ReleaseSysCache(atttuple);
259262
}
263+
else if (IsA(selem->expr, Var)) /* column reference in parens */
264+
{
265+
Var *var = (Var *) selem->expr;
266+
TypeCacheEntry *type;
267+
268+
/* Disallow use of system attributes in extended stats */
269+
if (var->varattno <= 0)
270+
ereport(ERROR,
271+
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
272+
errmsg("statistics creation on system columns is not supported")));
273+
274+
/* Disallow data types without a less-than operator */
275+
type = lookup_type_cache(var->vartype, TYPECACHE_LT_OPR);
276+
if (type->lt_opr == InvalidOid)
277+
ereport(ERROR,
278+
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
279+
errmsg("column \"%s\" cannot be used in statistics because its type %s has no default btree operator class",
280+
get_attname(relid, var->varattno, false), format_type_be(var->vartype))));
281+
282+
attnums[nattnums] = var->varattno;
283+
nattnums++;
284+
}
260285
else /* expression */
261286
{
262287
Node *expr = selem->expr;

src/test/regress/expected/stats_ext.out

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,8 @@ ERROR: duplicate expression in statistics definition
5555
CREATE STATISTICS tst (unrecognized) ON x, y FROM ext_stats_test;
5656
ERROR: unrecognized statistics kind "unrecognized"
5757
-- incorrect expressions
58+
CREATE STATISTICS tst ON (y) FROM ext_stats_test; -- single column reference
59+
ERROR: extended statistics require at least 2 columns
5860
CREATE STATISTICS tst ON y + z FROM ext_stats_test; -- missing parentheses
5961
ERROR: syntax error at or near "+"
6062
LINE 1: CREATE STATISTICS tst ON y + z FROM ext_stats_test;

src/test/regress/sql/stats_ext.sql

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ CREATE STATISTICS tst ON (x || 'x'), (x || 'x'), (y + 1), (x || 'x'), (x || 'x')
4141
CREATE STATISTICS tst ON (x || 'x'), (x || 'x'), y FROM ext_stats_test;
4242
CREATE STATISTICS tst (unrecognized) ON x, y FROM ext_stats_test;
4343
-- incorrect expressions
44+
CREATE STATISTICS tst ON (y) FROM ext_stats_test; -- single column reference
4445
CREATE STATISTICS tst ON y + z FROM ext_stats_test; -- missing parentheses
4546
CREATE STATISTICS tst ON (x, y) FROM ext_stats_test; -- tuple expression
4647
DROP TABLE ext_stats_test;

0 commit comments

Comments
 (0)