Skip to content

Commit ccaa356

Browse files
committed
Recognize some OR clauses as compatible with functional dependencies
Since commit 8f321bd functional dependencies can handle IN clauses, which however introduced a possible (and surprising) inconsistency, because IN clauses may be expressed as an OR clause, which are still considered incompatible. For example a IN (1, 2, 3) may be rewritten as (a = 1 OR a = 2 OR a = 3) The IN clause will work fine with functional dependencies, but the OR clause will force the estimation to fall back to plain per-column estimates, possibly introducing significant estimation errors. This commit recognizes OR clauses equivalent to an IN clause (when all arugments are compatible and reference the same attribute) as a special case, compatible with functional dependencies. This allows applying functional dependencies, just like for IN clauses. This does not eliminate the difference in estimating the clause itself, i.e. IN clause and OR clause still use different formulas. It would be possible to change that (for these special OR clauses), but that's not really about extended statistics - it was always like this. Moreover the errors are usually much smaller compared to ignoring dependencies. Author: Tomas Vondra Reviewed-by: Dean Rasheed Discussion: https://www.postgresql.org/message-id/flat/13902317.Eha0YfKkKy%40pierred-pdoc
1 parent dbe0d98 commit ccaa356

File tree

3 files changed

+121
-16
lines changed

3 files changed

+121
-16
lines changed

src/backend/statistics/dependencies.c

Lines changed: 49 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -753,24 +753,27 @@ pg_dependencies_send(PG_FUNCTION_ARGS)
753753
static bool
754754
dependency_is_compatible_clause(Node *clause, Index relid, AttrNumber *attnum)
755755
{
756-
RestrictInfo *rinfo = (RestrictInfo *) clause;
757756
Var *var;
758757

759-
if (!IsA(rinfo, RestrictInfo))
760-
return false;
758+
if (IsA(clause, RestrictInfo))
759+
{
760+
RestrictInfo *rinfo = (RestrictInfo *) clause;
761761

762-
/* Pseudoconstants are not interesting (they couldn't contain a Var) */
763-
if (rinfo->pseudoconstant)
764-
return false;
762+
/* Pseudoconstants are not interesting (they couldn't contain a Var) */
763+
if (rinfo->pseudoconstant)
764+
return false;
765765

766-
/* Clauses referencing multiple, or no, varnos are incompatible */
767-
if (bms_membership(rinfo->clause_relids) != BMS_SINGLETON)
768-
return false;
766+
/* Clauses referencing multiple, or no, varnos are incompatible */
767+
if (bms_membership(rinfo->clause_relids) != BMS_SINGLETON)
768+
return false;
769769

770-
if (is_opclause(rinfo->clause))
770+
clause = (Node *) rinfo->clause;
771+
}
772+
773+
if (is_opclause(clause))
771774
{
772775
/* If it's an opclause, check for Var = Const or Const = Var. */
773-
OpExpr *expr = (OpExpr *) rinfo->clause;
776+
OpExpr *expr = (OpExpr *) clause;
774777

775778
/* Only expressions with two arguments are candidates. */
776779
if (list_length(expr->args) != 2)
@@ -801,10 +804,10 @@ dependency_is_compatible_clause(Node *clause, Index relid, AttrNumber *attnum)
801804

802805
/* OK to proceed with checking "var" */
803806
}
804-
else if (IsA(rinfo->clause, ScalarArrayOpExpr))
807+
else if (IsA(clause, ScalarArrayOpExpr))
805808
{
806809
/* If it's an scalar array operator, check for Var IN Const. */
807-
ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) rinfo->clause;
810+
ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause;
808811

809812
/*
810813
* Reject ALL() variant, we only care about ANY/IN.
@@ -839,21 +842,51 @@ dependency_is_compatible_clause(Node *clause, Index relid, AttrNumber *attnum)
839842

840843
/* OK to proceed with checking "var" */
841844
}
842-
else if (is_notclause(rinfo->clause))
845+
else if (is_orclause(clause))
846+
{
847+
BoolExpr *expr = (BoolExpr *) clause;
848+
ListCell *lc;
849+
850+
/* start with no attribute number */
851+
*attnum = InvalidAttrNumber;
852+
853+
foreach(lc, expr->args)
854+
{
855+
AttrNumber clause_attnum;
856+
857+
/*
858+
* Had we found incompatible clause in the arguments, treat the
859+
* whole clause as incompatible.
860+
*/
861+
if (!dependency_is_compatible_clause((Node *) lfirst(lc),
862+
relid, &clause_attnum))
863+
return false;
864+
865+
if (*attnum == InvalidAttrNumber)
866+
*attnum = clause_attnum;
867+
868+
if (*attnum != clause_attnum)
869+
return false;
870+
}
871+
872+
/* the Var is already checked by the recursive call */
873+
return true;
874+
}
875+
else if (is_notclause(clause))
843876
{
844877
/*
845878
* "NOT x" can be interpreted as "x = false", so get the argument and
846879
* proceed with seeing if it's a suitable Var.
847880
*/
848-
var = (Var *) get_notclausearg(rinfo->clause);
881+
var = (Var *) get_notclausearg(clause);
849882
}
850883
else
851884
{
852885
/*
853886
* A boolean expression "x" can be interpreted as "x = true", so
854887
* proceed with seeing if it's a suitable Var.
855888
*/
856-
var = (Var *) rinfo->clause;
889+
var = (Var *) clause;
857890
}
858891

859892
/*

src/test/regress/expected/stats_ext.out

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -458,6 +458,32 @@ SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE
458458
3 | 400
459459
(1 row)
460460

461+
-- OR clauses referencing the same attribute
462+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR a = 51) AND b = ''1''');
463+
estimated | actual
464+
-----------+--------
465+
2 | 100
466+
(1 row)
467+
468+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR a = 51) AND (b = ''1'' OR b = ''2'')');
469+
estimated | actual
470+
-----------+--------
471+
4 | 100
472+
(1 row)
473+
474+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR a = 2 OR a = 51 OR a = 52) AND (b = ''1'' OR b = ''2'')');
475+
estimated | actual
476+
-----------+--------
477+
8 | 200
478+
(1 row)
479+
480+
-- OR clauses referencing different attributes
481+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR b = ''1'') AND b = ''1''');
482+
estimated | actual
483+
-----------+--------
484+
3 | 100
485+
(1 row)
486+
461487
-- ANY
462488
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 51]) AND b = ''1''');
463489
estimated | actual
@@ -592,6 +618,32 @@ SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE
592618
400 | 400
593619
(1 row)
594620

621+
-- OR clauses referencing the same attribute
622+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR a = 51) AND b = ''1''');
623+
estimated | actual
624+
-----------+--------
625+
99 | 100
626+
(1 row)
627+
628+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR a = 51) AND (b = ''1'' OR b = ''2'')');
629+
estimated | actual
630+
-----------+--------
631+
99 | 100
632+
(1 row)
633+
634+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR a = 2 OR a = 51 OR a = 52) AND (b = ''1'' OR b = ''2'')');
635+
estimated | actual
636+
-----------+--------
637+
197 | 200
638+
(1 row)
639+
640+
-- OR clauses referencing different attributes are incompatible
641+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR b = ''1'') AND b = ''1''');
642+
estimated | actual
643+
-----------+--------
644+
3 | 100
645+
(1 row)
646+
595647
-- ANY
596648
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 51]) AND b = ''1''');
597649
estimated | actual

src/test/regress/sql/stats_ext.sql

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,16 @@ SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE
286286

287287
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 2, 26, 27, 51, 52, 76, 77) AND b IN (''1'', ''2'', ''26'', ''27'') AND c IN (1, 2)');
288288

289+
-- OR clauses referencing the same attribute
290+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR a = 51) AND b = ''1''');
291+
292+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR a = 51) AND (b = ''1'' OR b = ''2'')');
293+
294+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR a = 2 OR a = 51 OR a = 52) AND (b = ''1'' OR b = ''2'')');
295+
296+
-- OR clauses referencing different attributes
297+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR b = ''1'') AND b = ''1''');
298+
289299
-- ANY
290300
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 51]) AND b = ''1''');
291301

@@ -338,6 +348,16 @@ SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE
338348

339349
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 2, 26, 27, 51, 52, 76, 77) AND b IN (''1'', ''2'', ''26'', ''27'') AND c IN (1, 2)');
340350

351+
-- OR clauses referencing the same attribute
352+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR a = 51) AND b = ''1''');
353+
354+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR a = 51) AND (b = ''1'' OR b = ''2'')');
355+
356+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR a = 2 OR a = 51 OR a = 52) AND (b = ''1'' OR b = ''2'')');
357+
358+
-- OR clauses referencing different attributes are incompatible
359+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR b = ''1'') AND b = ''1''');
360+
341361
-- ANY
342362
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 51]) AND b = ''1''');
343363

0 commit comments

Comments
 (0)