Skip to content

Commit e33ae53

Browse files
committed
Fix handling of bare boolean expressions in mcv_get_match_bitmap.
Since v14, the extended stats machinery will try to estimate for otherwise-unsupported boolean expressions if they match an expression available from an extended stats object. mcv.c did not get the memo about this, and would spit up with "unknown clause type". Fortunately the case is easy to handle, since we can expect the expression yields boolean. While here, replace some not-terribly-on-point assertions with simpler runtime tests for lookup failure. That seems appropriate so that we get an elog not a crash if we somehow get to the new it-should-be-a-bool-expression code with a subexpression that doesn't match any stats column. Per report from Danny Shemesh. Thanks to Justin Pryzby for preliminary investigation. Discussion: https://postgr.es/m/CAFZC=QqD6=27wQPOW1pbRa98KPyuyn+7cL_Ay_Ck-roZV84vHg@mail.gmail.com
1 parent 94da732 commit e33ae53

File tree

3 files changed

+60
-27
lines changed

3 files changed

+60
-27
lines changed

src/backend/statistics/mcv.c

Lines changed: 34 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1532,13 +1532,13 @@ pg_mcv_list_send(PG_FUNCTION_ARGS)
15321532
/*
15331533
* match the attribute/expression to a dimension of the statistic
15341534
*
1535-
* Match the attribute/expression to statistics dimension. Optionally
1536-
* determine the collation.
1535+
* Returns the zero-based index of the matching statistics dimension.
1536+
* Optionally determines the collation.
15371537
*/
15381538
static int
15391539
mcv_match_expression(Node *expr, Bitmapset *keys, List *exprs, Oid *collid)
15401540
{
1541-
int idx = -1;
1541+
int idx;
15421542

15431543
if (IsA(expr, Var))
15441544
{
@@ -1550,20 +1550,19 @@ mcv_match_expression(Node *expr, Bitmapset *keys, List *exprs, Oid *collid)
15501550

15511551
idx = bms_member_index(keys, var->varattno);
15521552

1553-
/* make sure the index is valid */
1554-
Assert((idx >= 0) && (idx <= bms_num_members(keys)));
1553+
if (idx < 0)
1554+
elog(ERROR, "variable not found in statistics object");
15551555
}
15561556
else
15571557
{
1558+
/* expression - lookup in stats expressions */
15581559
ListCell *lc;
15591560

1560-
/* expressions are stored after the simple columns */
1561-
idx = bms_num_members(keys);
1562-
15631561
if (collid)
15641562
*collid = exprCollation(expr);
15651563

1566-
/* expression - lookup in stats expressions */
1564+
/* expressions are stored after the simple columns */
1565+
idx = bms_num_members(keys);
15671566
foreach(lc, exprs)
15681567
{
15691568
Node *stat_expr = (Node *) lfirst(lc);
@@ -1574,13 +1573,10 @@ mcv_match_expression(Node *expr, Bitmapset *keys, List *exprs, Oid *collid)
15741573
idx++;
15751574
}
15761575

1577-
/* make sure the index is valid */
1578-
Assert((idx >= bms_num_members(keys)) &&
1579-
(idx <= bms_num_members(keys) + list_length(exprs)));
1576+
if (lc == NULL)
1577+
elog(ERROR, "expression not found in statistics object");
15801578
}
15811579

1582-
Assert((idx >= 0) && (idx < bms_num_members(keys) + list_length(exprs)));
1583-
15841580
return idx;
15851581
}
15861582

@@ -1659,8 +1655,6 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses,
16591655
/* match the attribute/expression to a dimension of the statistic */
16601656
idx = mcv_match_expression(clause_expr, keys, exprs, &collid);
16611657

1662-
Assert((idx >= 0) && (idx < bms_num_members(keys) + list_length(exprs)));
1663-
16641658
/*
16651659
* Walk through the MCV items and evaluate the current clause. We
16661660
* can skip items that were already ruled out, and terminate if
@@ -1944,7 +1938,30 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses,
19441938
}
19451939
}
19461940
else
1947-
elog(ERROR, "unknown clause type: %d", clause->type);
1941+
{
1942+
/* Otherwise, it must be a bare boolean-returning expression */
1943+
int idx;
1944+
1945+
/* match the expression to a dimension of the statistic */
1946+
idx = mcv_match_expression(clause, keys, exprs, NULL);
1947+
1948+
/*
1949+
* Walk through the MCV items and evaluate the current clause. We
1950+
* can skip items that were already ruled out, and terminate if
1951+
* there are no remaining MCV items that might possibly match.
1952+
*/
1953+
for (i = 0; i < mcvlist->nitems; i++)
1954+
{
1955+
bool match;
1956+
MCVItem *item = &mcvlist->items[i];
1957+
1958+
/* "match" just means it's bool TRUE */
1959+
match = !item->isnull[idx] && DatumGetBool(item->values[idx]);
1960+
1961+
/* now, update the match bitmap, depending on OR/AND type */
1962+
matches[i] = RESULT_MERGE(matches[i], is_or, match);
1963+
}
1964+
}
19481965
}
19491966

19501967
return matches;

src/test/regress/expected/stats_ext.out

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -271,14 +271,23 @@ SELECT stxkind FROM pg_statistic_ext WHERE stxname = 'ab1_exprstat_3';
271271
CREATE STATISTICS ab1_exprstat_4 ON date_trunc('day', d) FROM ab1;
272272
-- date_trunc on timestamp is immutable
273273
CREATE STATISTICS ab1_exprstat_5 ON date_trunc('day', c) FROM ab1;
274+
-- check use of a boolean-returning expression
275+
CREATE STATISTICS ab1_exprstat_6 ON
276+
(case a when 1 then true else false end), b FROM ab1;
274277
-- insert some data and run analyze, to test that these cases build properly
275278
INSERT INTO ab1
276-
SELECT
277-
generate_series(1,10),
278-
generate_series(1,10),
279-
generate_series('2020-10-01'::timestamp, '2020-10-10'::timestamp, interval '1 day'),
280-
generate_series('2020-10-01'::timestamptz, '2020-10-10'::timestamptz, interval '1 day');
279+
SELECT x / 10, x / 3,
280+
'2020-10-01'::timestamp + x * interval '1 day',
281+
'2020-10-01'::timestamptz + x * interval '1 day'
282+
FROM generate_series(1, 100) x;
281283
ANALYZE ab1;
284+
-- apply some stats
285+
SELECT * FROM check_estimated_rows('SELECT * FROM ab1 WHERE (case a when 1 then true else false end) AND b=2');
286+
estimated | actual
287+
-----------+--------
288+
1 | 0
289+
(1 row)
290+
282291
DROP TABLE ab1;
283292
-- Verify supported object types for extended statistics
284293
CREATE schema tststats;

src/test/regress/sql/stats_ext.sql

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -165,14 +165,21 @@ CREATE STATISTICS ab1_exprstat_4 ON date_trunc('day', d) FROM ab1;
165165
-- date_trunc on timestamp is immutable
166166
CREATE STATISTICS ab1_exprstat_5 ON date_trunc('day', c) FROM ab1;
167167

168+
-- check use of a boolean-returning expression
169+
CREATE STATISTICS ab1_exprstat_6 ON
170+
(case a when 1 then true else false end), b FROM ab1;
171+
168172
-- insert some data and run analyze, to test that these cases build properly
169173
INSERT INTO ab1
170-
SELECT
171-
generate_series(1,10),
172-
generate_series(1,10),
173-
generate_series('2020-10-01'::timestamp, '2020-10-10'::timestamp, interval '1 day'),
174-
generate_series('2020-10-01'::timestamptz, '2020-10-10'::timestamptz, interval '1 day');
174+
SELECT x / 10, x / 3,
175+
'2020-10-01'::timestamp + x * interval '1 day',
176+
'2020-10-01'::timestamptz + x * interval '1 day'
177+
FROM generate_series(1, 100) x;
175178
ANALYZE ab1;
179+
180+
-- apply some stats
181+
SELECT * FROM check_estimated_rows('SELECT * FROM ab1 WHERE (case a when 1 then true else false end) AND b=2');
182+
176183
DROP TABLE ab1;
177184

178185
-- Verify supported object types for extended statistics

0 commit comments

Comments
 (0)