Skip to content

Commit ef3fed2

Browse files
committed
Fix choose_best_statistics to check clauses individually
When picking the best extended statistics object for a list of clauses, it's not enough to look at attnums extracted from the clause list as a whole. Consider for example this query with OR clauses: SELECT * FROM t WHERE (t.a = 1) OR (t.b = 1) OR (t.c = 1) with a statistics defined on columns (a,b). Relying on attnums extracted from the whole OR clause, we'd consider the statistics usable. That does not work, as we see the conditions as a single OR-clause, referencing an attribute not covered by the statistic, leading to empty list of clauses to be estimated using the statistics and an assert failure. This changes choose_best_statistics to check which clauses are actually covered, and only using attributes from the fully covered ones. For the previous example this means the statistics object will not be considered as compatible with the OR-clause. Backpatch to 12, where MCVs were introduced. The issue does not affect older versions because functional dependencies don't handle OR clauses. Author: Tomas Vondra Reviewed-by: Dean Rasheed Reported-By: Manuel Rigger Discussion: https://postgr.es/m/CA+u7OA7H5rcE2=8f263w4NZD6ipO_XOrYB816nuLXbmSTH9pQQ@mail.gmail.com Backpatch-through: 12
1 parent bf3cef2 commit ef3fed2

File tree

5 files changed

+81
-24
lines changed

5 files changed

+81
-24
lines changed

src/backend/statistics/dependencies.c

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -951,14 +951,14 @@ dependencies_clauselist_selectivity(PlannerInfo *root,
951951
Bitmapset *clauses_attnums = NULL;
952952
StatisticExtInfo *stat;
953953
MVDependencies *dependencies;
954-
AttrNumber *list_attnums;
954+
Bitmapset **list_attnums;
955955
int listidx;
956956

957957
/* check if there's any stats that might be useful for us. */
958958
if (!has_stats_of_kind(rel->statlist, STATS_EXT_DEPENDENCIES))
959959
return 1.0;
960960

961-
list_attnums = (AttrNumber *) palloc(sizeof(AttrNumber) *
961+
list_attnums = (Bitmapset **) palloc(sizeof(Bitmapset *) *
962962
list_length(clauses));
963963

964964
/*
@@ -981,11 +981,11 @@ dependencies_clauselist_selectivity(PlannerInfo *root,
981981
if (!bms_is_member(listidx, *estimatedclauses) &&
982982
dependency_is_compatible_clause(clause, rel->relid, &attnum))
983983
{
984-
list_attnums[listidx] = attnum;
984+
list_attnums[listidx] = bms_make_singleton(attnum);
985985
clauses_attnums = bms_add_member(clauses_attnums, attnum);
986986
}
987987
else
988-
list_attnums[listidx] = InvalidAttrNumber;
988+
list_attnums[listidx] = NULL;
989989

990990
listidx++;
991991
}
@@ -1002,8 +1002,8 @@ dependencies_clauselist_selectivity(PlannerInfo *root,
10021002
}
10031003

10041004
/* find the best suited statistics object for these attnums */
1005-
stat = choose_best_statistics(rel->statlist, clauses_attnums,
1006-
STATS_EXT_DEPENDENCIES);
1005+
stat = choose_best_statistics(rel->statlist, STATS_EXT_DEPENDENCIES,
1006+
list_attnums, list_length(clauses));
10071007

10081008
/* if no matching stats could be found then we've nothing to do */
10091009
if (!stat)
@@ -1043,15 +1043,21 @@ dependencies_clauselist_selectivity(PlannerInfo *root,
10431043
foreach(l, clauses)
10441044
{
10451045
Node *clause;
1046+
AttrNumber attnum;
10461047

10471048
listidx++;
10481049

10491050
/*
10501051
* Skip incompatible clauses, and ones we've already estimated on.
10511052
*/
1052-
if (list_attnums[listidx] == InvalidAttrNumber)
1053+
if (!list_attnums[listidx])
10531054
continue;
10541055

1056+
/*
1057+
* We expect the bitmaps ton contain a single attribute number.
1058+
*/
1059+
attnum = bms_singleton_member(list_attnums[listidx]);
1060+
10551061
/*
10561062
* Technically we could find more than one clause for a given
10571063
* attnum. Since these clauses must be equality clauses, we choose
@@ -1061,8 +1067,7 @@ dependencies_clauselist_selectivity(PlannerInfo *root,
10611067
* anyway. If it happens to be compared to the same Const, then
10621068
* ignoring the additional clause is just the thing to do.
10631069
*/
1064-
if (dependency_implies_attribute(dependency,
1065-
list_attnums[listidx]))
1070+
if (dependency_implies_attribute(dependency, attnum))
10661071
{
10671072
clause = (Node *) lfirst(l);
10681073

@@ -1077,8 +1082,7 @@ dependencies_clauselist_selectivity(PlannerInfo *root,
10771082
* We'll want to ignore this when looking for the next
10781083
* strongest dependency above.
10791084
*/
1080-
clauses_attnums = bms_del_member(clauses_attnums,
1081-
list_attnums[listidx]);
1085+
clauses_attnums = bms_del_member(clauses_attnums, attnum);
10821086
}
10831087
}
10841088

src/backend/statistics/extended_stats.c

Lines changed: 29 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -697,15 +697,20 @@ has_stats_of_kind(List *stats, char requiredkind)
697697
* there's no match.
698698
*
699699
* The current selection criteria is very simple - we choose the statistics
700-
* object referencing the most of the requested attributes, breaking ties
701-
* in favor of objects with fewer keys overall.
700+
* object referencing the most attributes in covered (and still unestimated
701+
* clauses), breaking ties in favor of objects with fewer keys overall.
702+
*
703+
* The clause_attnums is an array of bitmaps, storing attnums for individual
704+
* clauses. A NULL element means the clause is either incompatible or already
705+
* estimated.
702706
*
703707
* XXX If multiple statistics objects tie on both criteria, then which object
704708
* is chosen depends on the order that they appear in the stats list. Perhaps
705709
* further tiebreakers are needed.
706710
*/
707711
StatisticExtInfo *
708-
choose_best_statistics(List *stats, Bitmapset *attnums, char requiredkind)
712+
choose_best_statistics(List *stats, char requiredkind,
713+
Bitmapset **clause_attnums, int nclauses)
709714
{
710715
ListCell *lc;
711716
StatisticExtInfo *best_match = NULL;
@@ -714,17 +719,33 @@ choose_best_statistics(List *stats, Bitmapset *attnums, char requiredkind)
714719

715720
foreach(lc, stats)
716721
{
722+
int i;
717723
StatisticExtInfo *info = (StatisticExtInfo *) lfirst(lc);
724+
Bitmapset *matched = NULL;
718725
int num_matched;
719726
int numkeys;
720-
Bitmapset *matched;
721727

722728
/* skip statistics that are not of the correct type */
723729
if (info->kind != requiredkind)
724730
continue;
725731

726-
/* determine how many attributes of these stats can be matched to */
727-
matched = bms_intersect(attnums, info->keys);
732+
/*
733+
* Collect attributes in remaining (unestimated) clauses fully covered
734+
* by this statistic object.
735+
*/
736+
for (i = 0; i < nclauses; i++)
737+
{
738+
/* ignore incompatible/estimated clauses */
739+
if (!clause_attnums[i])
740+
continue;
741+
742+
/* ignore clauses that are not covered by this object */
743+
if (!bms_is_subset(clause_attnums[i], info->keys))
744+
continue;
745+
746+
matched = bms_add_members(matched, clause_attnums[i]);
747+
}
748+
728749
num_matched = bms_num_members(matched);
729750
bms_free(matched);
730751

@@ -1086,12 +1107,9 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli
10861107
listidx++;
10871108
}
10881109

1089-
/* We need at least two attributes for multivariate statistics. */
1090-
if (bms_membership(clauses_attnums) != BMS_MULTIPLE)
1091-
return 1.0;
1092-
10931110
/* find the best suited statistics object for these attnums */
1094-
stat = choose_best_statistics(rel->statlist, clauses_attnums, STATS_EXT_MCV);
1111+
stat = choose_best_statistics(rel->statlist, STATS_EXT_MCV,
1112+
list_attnums, list_length(clauses));
10951113

10961114
/* if no matching stats could be found then we've nothing to do */
10971115
if (!stat)

src/include/statistics/statistics.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,8 @@ extern Selectivity statext_clauselist_selectivity(PlannerInfo *root,
116116
RelOptInfo *rel,
117117
Bitmapset **estimatedclauses);
118118
extern bool has_stats_of_kind(List *stats, char requiredkind);
119-
extern StatisticExtInfo *choose_best_statistics(List *stats,
120-
Bitmapset *attnums, char requiredkind);
119+
extern StatisticExtInfo *choose_best_statistics(List *stats, char requiredkind,
120+
Bitmapset **clause_attnums,
121+
int nclauses);
121122

122123
#endif /* STATISTICS_H */

src/test/regress/expected/stats_ext.out

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -517,6 +517,18 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= 4 AND b <
517517
1 | 50
518518
(1 row)
519519

520+
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1');
521+
estimated | actual
522+
-----------+--------
523+
343 | 200
524+
(1 row)
525+
526+
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1 OR d IS NOT NULL');
527+
estimated | actual
528+
-----------+--------
529+
343 | 200
530+
(1 row)
531+
520532
-- create statistics
521533
CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, c FROM mcv_lists;
522534
ANALYZE mcv_lists;
@@ -556,6 +568,19 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= 4 AND b <
556568
50 | 50
557569
(1 row)
558570

571+
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1');
572+
estimated | actual
573+
-----------+--------
574+
200 | 200
575+
(1 row)
576+
577+
-- we can't use the statistic for OR clauses that are not fully covered (missing 'd' attribute)
578+
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1 OR d IS NOT NULL');
579+
estimated | actual
580+
-----------+--------
581+
343 | 200
582+
(1 row)
583+
559584
-- check change of unrelated column type does not reset the MCV statistics
560585
ALTER TABLE mcv_lists ALTER COLUMN d TYPE VARCHAR(64);
561586
SELECT d.stxdmcv IS NOT NULL

src/test/regress/sql/stats_ext.sql

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -332,6 +332,10 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < 5 AND b <
332332

333333
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= 4 AND b <= ''0'' AND c <= 4');
334334

335+
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1');
336+
337+
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1 OR d IS NOT NULL');
338+
335339
-- create statistics
336340
CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, c FROM mcv_lists;
337341

@@ -349,6 +353,11 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < 5 AND b <
349353

350354
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= 4 AND b <= ''0'' AND c <= 4');
351355

356+
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1');
357+
358+
-- we can't use the statistic for OR clauses that are not fully covered (missing 'd' attribute)
359+
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1 OR d IS NOT NULL');
360+
352361
-- check change of unrelated column type does not reset the MCV statistics
353362
ALTER TABLE mcv_lists ALTER COLUMN d TYPE VARCHAR(64);
354363

0 commit comments

Comments
 (0)