Skip to content

Commit 8f321bd

Browse files
committed
Use functional dependencies to estimate ScalarArrayOpExpr
Until now functional dependencies supported only simple equality clauses and clauses that can be trivially translated to equalities. This commit allows estimation of some ScalarArrayOpExpr (IN/ANY) clauses. For IN clauses we can do this thanks to using operator with equality semantics, which means an IN clause WHERE c IN (1, 2, ..., N) can be translated to WHERE (c = 1 OR c = 2 OR ... OR c = N) IN clauses are now considered compatible with functional dependencies, and rely on the same assumption of consistency of queries with data (which is an assumption we already used for simple equality clauses). This applies also to ALL clauses with an equality operator, which can be considered equivalent to IN clause. ALL clauses are still considered incompatible, although there's some discussion about maybe relaxing this in the future. Author: Pierre Ducroquet Reviewed-by: Tomas Vondra, Dean Rasheed Discussion: https://www.postgresql.org/message-id/flat/13902317.Eha0YfKkKy%40pierred-pdoc
1 parent dbf95c8 commit 8f321bd

File tree

4 files changed

+344
-1
lines changed

4 files changed

+344
-1
lines changed

doc/src/sgml/perform.sgml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1203,7 +1203,8 @@ SELECT stxname, stxkeys, stxddependencies
12031203

12041204
<para>
12051205
Functional dependencies are currently only applied when considering
1206-
simple equality conditions that compare columns to constant values.
1206+
simple equality conditions that compare columns to constant values,
1207+
and <literal>IN</literal> clauses with constant values.
12071208
They are not used to improve estimates for equality conditions
12081209
comparing two columns or comparing a column to an expression, nor for
12091210
range clauses, <literal>LIKE</literal> or any other type of condition.

src/backend/statistics/dependencies.c

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -801,6 +801,44 @@ dependency_is_compatible_clause(Node *clause, Index relid, AttrNumber *attnum)
801801

802802
/* OK to proceed with checking "var" */
803803
}
804+
else if (IsA(rinfo->clause, ScalarArrayOpExpr))
805+
{
806+
/* If it's an scalar array operator, check for Var IN Const. */
807+
ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) rinfo->clause;
808+
809+
/*
810+
* Reject ALL() variant, we only care about ANY/IN.
811+
*
812+
* FIXME Maybe we should check if all the values are the same, and
813+
* allow ALL in that case? Doesn't seem very practical, though.
814+
*/
815+
if (!expr->useOr)
816+
return false;
817+
818+
/* Only expressions with two arguments are candidates. */
819+
if (list_length(expr->args) != 2)
820+
return false;
821+
822+
/*
823+
* We know it's always (Var IN Const), so we assume the var is the
824+
* first argument, and pseudoconstant is the second one.
825+
*/
826+
if (!is_pseudo_constant_clause(lsecond(expr->args)))
827+
return false;
828+
829+
var = linitial(expr->args);
830+
831+
/*
832+
* If it's not an "=" operator, just ignore the clause, as it's not
833+
* compatible with functional dependencies. The operator is identified
834+
* simply by looking at which function it uses to estimate selectivity.
835+
* That's a bit strange, but it's what other similar places do.
836+
*/
837+
if (get_oprrest(expr->opno) != F_EQSEL)
838+
return false;
839+
840+
/* OK to proceed with checking "var" */
841+
}
804842
else if (is_notclause(rinfo->clause))
805843
{
806844
/*

src/test/regress/expected/stats_ext.out

Lines changed: 224 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -421,6 +421,118 @@ SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE
421421
1 | 50
422422
(1 row)
423423

424+
-- IN
425+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 51) AND b = ''1''');
426+
estimated | actual
427+
-----------+--------
428+
2 | 100
429+
(1 row)
430+
431+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 51) AND b IN (''1'', ''2'')');
432+
estimated | actual
433+
-----------+--------
434+
4 | 100
435+
(1 row)
436+
437+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 2, 51, 52) AND b IN (''1'', ''2'')');
438+
estimated | actual
439+
-----------+--------
440+
8 | 200
441+
(1 row)
442+
443+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 26, 51, 76) AND b IN (''1'', ''26'') AND c = 1');
444+
estimated | actual
445+
-----------+--------
446+
1 | 200
447+
(1 row)
448+
449+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 26, 51, 76) AND b IN (''1'', ''26'') AND c IN (1)');
450+
estimated | actual
451+
-----------+--------
452+
1 | 200
453+
(1 row)
454+
455+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 2, 26, 27, 51, 52, 76, 77) AND b IN (''1'', ''2'', ''26'', ''27'') AND c IN (1, 2)');
456+
estimated | actual
457+
-----------+--------
458+
3 | 400
459+
(1 row)
460+
461+
-- ANY
462+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 51]) AND b = ''1''');
463+
estimated | actual
464+
-----------+--------
465+
2 | 100
466+
(1 row)
467+
468+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 51]) AND b = ANY (ARRAY[''1'', ''2''])');
469+
estimated | actual
470+
-----------+--------
471+
4 | 100
472+
(1 row)
473+
474+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 2, 51, 52]) AND b = ANY (ARRAY[''1'', ''2''])');
475+
estimated | actual
476+
-----------+--------
477+
8 | 200
478+
(1 row)
479+
480+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 26, 51, 76]) AND b = ANY (ARRAY[''1'', ''26'']) AND c = 1');
481+
estimated | actual
482+
-----------+--------
483+
1 | 200
484+
(1 row)
485+
486+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 26, 51, 76]) AND b = ANY (ARRAY[''1'', ''26'']) AND c = ANY (ARRAY[1])');
487+
estimated | actual
488+
-----------+--------
489+
1 | 200
490+
(1 row)
491+
492+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 2, 26, 27, 51, 52, 76, 77]) AND b = ANY (ARRAY[''1'', ''2'', ''26'', ''27'']) AND c = ANY (ARRAY[1, 2])');
493+
estimated | actual
494+
-----------+--------
495+
3 | 400
496+
(1 row)
497+
498+
-- ANY with inequalities should not benefit from functional dependencies
499+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a < ANY (ARRAY[1, 51]) AND b > ''1''');
500+
estimated | actual
501+
-----------+--------
502+
2472 | 2400
503+
(1 row)
504+
505+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a >= ANY (ARRAY[1, 51]) AND b <= ANY (ARRAY[''1'', ''2''])');
506+
estimated | actual
507+
-----------+--------
508+
1441 | 1250
509+
(1 row)
510+
511+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a <= ANY (ARRAY[1, 2, 51, 52]) AND b >= ANY (ARRAY[''1'', ''2''])');
512+
estimated | actual
513+
-----------+--------
514+
3909 | 2550
515+
(1 row)
516+
517+
-- ALL (should not benefit from functional dependencies)
518+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 51) AND b = ALL (ARRAY[''1''])');
519+
estimated | actual
520+
-----------+--------
521+
2 | 100
522+
(1 row)
523+
524+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 51) AND b = ALL (ARRAY[''1'', ''2''])');
525+
estimated | actual
526+
-----------+--------
527+
1 | 0
528+
(1 row)
529+
530+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 2, 51, 52) AND b = ALL (ARRAY[''1'', ''2''])');
531+
estimated | actual
532+
-----------+--------
533+
1 | 0
534+
(1 row)
535+
424536
-- create statistics
425537
CREATE STATISTICS func_deps_stat (dependencies) ON a, b, c FROM functional_dependencies;
426538
ANALYZE functional_dependencies;
@@ -436,6 +548,118 @@ SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE
436548
50 | 50
437549
(1 row)
438550

551+
-- IN
552+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 51) AND b = ''1''');
553+
estimated | actual
554+
-----------+--------
555+
100 | 100
556+
(1 row)
557+
558+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 51) AND b IN (''1'', ''2'')');
559+
estimated | actual
560+
-----------+--------
561+
100 | 100
562+
(1 row)
563+
564+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 2, 51, 52) AND b IN (''1'', ''2'')');
565+
estimated | actual
566+
-----------+--------
567+
200 | 200
568+
(1 row)
569+
570+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 26, 51, 76) AND b IN (''1'', ''26'') AND c = 1');
571+
estimated | actual
572+
-----------+--------
573+
200 | 200
574+
(1 row)
575+
576+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 26, 51, 76) AND b IN (''1'', ''26'') AND c IN (1)');
577+
estimated | actual
578+
-----------+--------
579+
200 | 200
580+
(1 row)
581+
582+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 2, 26, 27, 51, 52, 76, 77) AND b IN (''1'', ''2'', ''26'', ''27'') AND c IN (1, 2)');
583+
estimated | actual
584+
-----------+--------
585+
400 | 400
586+
(1 row)
587+
588+
-- ANY
589+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 51]) AND b = ''1''');
590+
estimated | actual
591+
-----------+--------
592+
100 | 100
593+
(1 row)
594+
595+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 51]) AND b = ANY (ARRAY[''1'', ''2''])');
596+
estimated | actual
597+
-----------+--------
598+
100 | 100
599+
(1 row)
600+
601+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 2, 51, 52]) AND b = ANY (ARRAY[''1'', ''2''])');
602+
estimated | actual
603+
-----------+--------
604+
200 | 200
605+
(1 row)
606+
607+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 26, 51, 76]) AND b = ANY (ARRAY[''1'', ''26'']) AND c = 1');
608+
estimated | actual
609+
-----------+--------
610+
200 | 200
611+
(1 row)
612+
613+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 26, 51, 76]) AND b = ANY (ARRAY[''1'', ''26'']) AND c = ANY (ARRAY[1])');
614+
estimated | actual
615+
-----------+--------
616+
200 | 200
617+
(1 row)
618+
619+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 2, 26, 27, 51, 52, 76, 77]) AND b = ANY (ARRAY[''1'', ''2'', ''26'', ''27'']) AND c = ANY (ARRAY[1, 2])');
620+
estimated | actual
621+
-----------+--------
622+
400 | 400
623+
(1 row)
624+
625+
-- ANY with inequalities should not benefit from functional dependencies
626+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a < ANY (ARRAY[1, 51]) AND b > ''1''');
627+
estimated | actual
628+
-----------+--------
629+
2472 | 2400
630+
(1 row)
631+
632+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a >= ANY (ARRAY[1, 51]) AND b <= ANY (ARRAY[''1'', ''2''])');
633+
estimated | actual
634+
-----------+--------
635+
1441 | 1250
636+
(1 row)
637+
638+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a <= ANY (ARRAY[1, 2, 51, 52]) AND b >= ANY (ARRAY[''1'', ''2''])');
639+
estimated | actual
640+
-----------+--------
641+
3909 | 2550
642+
(1 row)
643+
644+
-- ALL (should not benefit from functional dependencies)
645+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 51) AND b = ALL (ARRAY[''1''])');
646+
estimated | actual
647+
-----------+--------
648+
2 | 100
649+
(1 row)
650+
651+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 51) AND b = ALL (ARRAY[''1'', ''2''])');
652+
estimated | actual
653+
-----------+--------
654+
1 | 0
655+
(1 row)
656+
657+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 2, 51, 52) AND b = ALL (ARRAY[''1'', ''2''])');
658+
estimated | actual
659+
-----------+--------
660+
1 | 0
661+
(1 row)
662+
439663
-- check change of column type doesn't break it
440664
ALTER TABLE functional_dependencies ALTER COLUMN c TYPE numeric;
441665
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1'' AND c = 1');

src/test/regress/sql/stats_ext.sql

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,46 @@ SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE
273273

274274
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1'' AND c = 1');
275275

276+
-- IN
277+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 51) AND b = ''1''');
278+
279+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 51) AND b IN (''1'', ''2'')');
280+
281+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 2, 51, 52) AND b IN (''1'', ''2'')');
282+
283+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 26, 51, 76) AND b IN (''1'', ''26'') AND c = 1');
284+
285+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 26, 51, 76) AND b IN (''1'', ''26'') AND c IN (1)');
286+
287+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 2, 26, 27, 51, 52, 76, 77) AND b IN (''1'', ''2'', ''26'', ''27'') AND c IN (1, 2)');
288+
289+
-- ANY
290+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 51]) AND b = ''1''');
291+
292+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 51]) AND b = ANY (ARRAY[''1'', ''2''])');
293+
294+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 2, 51, 52]) AND b = ANY (ARRAY[''1'', ''2''])');
295+
296+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 26, 51, 76]) AND b = ANY (ARRAY[''1'', ''26'']) AND c = 1');
297+
298+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 26, 51, 76]) AND b = ANY (ARRAY[''1'', ''26'']) AND c = ANY (ARRAY[1])');
299+
300+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 2, 26, 27, 51, 52, 76, 77]) AND b = ANY (ARRAY[''1'', ''2'', ''26'', ''27'']) AND c = ANY (ARRAY[1, 2])');
301+
302+
-- ANY with inequalities should not benefit from functional dependencies
303+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a < ANY (ARRAY[1, 51]) AND b > ''1''');
304+
305+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a >= ANY (ARRAY[1, 51]) AND b <= ANY (ARRAY[''1'', ''2''])');
306+
307+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a <= ANY (ARRAY[1, 2, 51, 52]) AND b >= ANY (ARRAY[''1'', ''2''])');
308+
309+
-- ALL (should not benefit from functional dependencies)
310+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 51) AND b = ALL (ARRAY[''1''])');
311+
312+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 51) AND b = ALL (ARRAY[''1'', ''2''])');
313+
314+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 2, 51, 52) AND b = ALL (ARRAY[''1'', ''2''])');
315+
276316
-- create statistics
277317
CREATE STATISTICS func_deps_stat (dependencies) ON a, b, c FROM functional_dependencies;
278318

@@ -282,6 +322,46 @@ SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE
282322

283323
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1'' AND c = 1');
284324

325+
-- IN
326+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 51) AND b = ''1''');
327+
328+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 51) AND b IN (''1'', ''2'')');
329+
330+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 2, 51, 52) AND b IN (''1'', ''2'')');
331+
332+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 26, 51, 76) AND b IN (''1'', ''26'') AND c = 1');
333+
334+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 26, 51, 76) AND b IN (''1'', ''26'') AND c IN (1)');
335+
336+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 2, 26, 27, 51, 52, 76, 77) AND b IN (''1'', ''2'', ''26'', ''27'') AND c IN (1, 2)');
337+
338+
-- ANY
339+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 51]) AND b = ''1''');
340+
341+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 51]) AND b = ANY (ARRAY[''1'', ''2''])');
342+
343+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 2, 51, 52]) AND b = ANY (ARRAY[''1'', ''2''])');
344+
345+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 26, 51, 76]) AND b = ANY (ARRAY[''1'', ''26'']) AND c = 1');
346+
347+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 26, 51, 76]) AND b = ANY (ARRAY[''1'', ''26'']) AND c = ANY (ARRAY[1])');
348+
349+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 2, 26, 27, 51, 52, 76, 77]) AND b = ANY (ARRAY[''1'', ''2'', ''26'', ''27'']) AND c = ANY (ARRAY[1, 2])');
350+
351+
-- ANY with inequalities should not benefit from functional dependencies
352+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a < ANY (ARRAY[1, 51]) AND b > ''1''');
353+
354+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a >= ANY (ARRAY[1, 51]) AND b <= ANY (ARRAY[''1'', ''2''])');
355+
356+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a <= ANY (ARRAY[1, 2, 51, 52]) AND b >= ANY (ARRAY[''1'', ''2''])');
357+
358+
-- ALL (should not benefit from functional dependencies)
359+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 51) AND b = ALL (ARRAY[''1''])');
360+
361+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 51) AND b = ALL (ARRAY[''1'', ''2''])');
362+
363+
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 2, 51, 52) AND b = ALL (ARRAY[''1'', ''2''])');
364+
285365
-- check change of column type doesn't break it
286366
ALTER TABLE functional_dependencies ALTER COLUMN c TYPE numeric;
287367

0 commit comments

Comments
 (0)