Skip to content

Commit f7816ae

Browse files
committed
Extract column statistics from CTE references, if possible.
examine_simple_variable() left this as an unimplemented case years ago, with the result that plans for queries involving un-flattened CTEs might be much stupider than necessary. It's not hard to extend the existing logic for RTE_SUBQUERY cases to also be able to drill down into CTEs, so let's do that. There was some discussion of whether this patch breaks the idea of a MATERIALIZED CTE being an optimization fence. We concluded it's okay, because we already allow the outer planner level to see the estimated width and rowcount of the CTE result, and letting it see column statistics too seems fairly equivalent. Basically, what we expect of the optimization fence is that the outer query should not affect the plan chosen for the CTE query. Once that plan is chosen, it's okay for the outer planner level to make use of whatever information we have about it. Jian Guo and Tom Lane, per complaint from Hans Buschmann Discussion: https://postgr.es/m/4504e67078d648cdac3651b2960da6e7@nidsa.net
1 parent 06c7084 commit f7816ae

File tree

3 files changed

+124
-35
lines changed

3 files changed

+124
-35
lines changed

src/backend/utils/adt/selfuncs.c

Lines changed: 99 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -5363,7 +5363,7 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid,
53635363
* Handle a simple Var for examine_variable
53645364
*
53655365
* This is split out as a subroutine so that we can recurse to deal with
5366-
* Vars referencing subqueries.
5366+
* Vars referencing subqueries (either sub-SELECT-in-FROM or CTE style).
53675367
*
53685368
* We already filled in all the fields of *vardata except for the stats tuple.
53695369
*/
@@ -5497,13 +5497,19 @@ examine_simple_variable(PlannerInfo *root, Var *var,
54975497
vardata->acl_ok = true;
54985498
}
54995499
}
5500-
else if (rte->rtekind == RTE_SUBQUERY && !rte->inh)
5500+
else if ((rte->rtekind == RTE_SUBQUERY && !rte->inh) ||
5501+
(rte->rtekind == RTE_CTE && !rte->self_reference))
55015502
{
55025503
/*
5503-
* Plain subquery (not one that was converted to an appendrel).
5504+
* Plain subquery (not one that was converted to an appendrel) or
5505+
* non-recursive CTE. In either case, we can try to find out what the
5506+
* Var refers to within the subquery. We skip this for appendrel and
5507+
* recursive-CTE cases because any column stats we did find would
5508+
* likely not be very relevant.
55045509
*/
5505-
Query *subquery = rte->subquery;
5506-
RelOptInfo *rel;
5510+
PlannerInfo *subroot;
5511+
Query *subquery;
5512+
List *subtlist;
55075513
TargetEntry *ste;
55085514

55095515
/*
@@ -5512,6 +5518,85 @@ examine_simple_variable(PlannerInfo *root, Var *var,
55125518
if (var->varattno == InvalidAttrNumber)
55135519
return;
55145520

5521+
/*
5522+
* Otherwise, find the subquery's planner subroot.
5523+
*/
5524+
if (rte->rtekind == RTE_SUBQUERY)
5525+
{
5526+
RelOptInfo *rel;
5527+
5528+
/*
5529+
* Fetch RelOptInfo for subquery. Note that we don't change the
5530+
* rel returned in vardata, since caller expects it to be a rel of
5531+
* the caller's query level. Because we might already be
5532+
* recursing, we can't use that rel pointer either, but have to
5533+
* look up the Var's rel afresh.
5534+
*/
5535+
rel = find_base_rel(root, var->varno);
5536+
5537+
subroot = rel->subroot;
5538+
}
5539+
else
5540+
{
5541+
/* CTE case is more difficult */
5542+
PlannerInfo *cteroot;
5543+
Index levelsup;
5544+
int ndx;
5545+
int plan_id;
5546+
ListCell *lc;
5547+
5548+
/*
5549+
* Find the referenced CTE, and locate the subroot previously made
5550+
* for it.
5551+
*/
5552+
levelsup = rte->ctelevelsup;
5553+
cteroot = root;
5554+
while (levelsup-- > 0)
5555+
{
5556+
cteroot = cteroot->parent_root;
5557+
if (!cteroot) /* shouldn't happen */
5558+
elog(ERROR, "bad levelsup for CTE \"%s\"", rte->ctename);
5559+
}
5560+
5561+
/*
5562+
* Note: cte_plan_ids can be shorter than cteList, if we are still
5563+
* working on planning the CTEs (ie, this is a side-reference from
5564+
* another CTE). So we mustn't use forboth here.
5565+
*/
5566+
ndx = 0;
5567+
foreach(lc, cteroot->parse->cteList)
5568+
{
5569+
CommonTableExpr *cte = (CommonTableExpr *) lfirst(lc);
5570+
5571+
if (strcmp(cte->ctename, rte->ctename) == 0)
5572+
break;
5573+
ndx++;
5574+
}
5575+
if (lc == NULL) /* shouldn't happen */
5576+
elog(ERROR, "could not find CTE \"%s\"", rte->ctename);
5577+
if (ndx >= list_length(cteroot->cte_plan_ids))
5578+
elog(ERROR, "could not find plan for CTE \"%s\"", rte->ctename);
5579+
plan_id = list_nth_int(cteroot->cte_plan_ids, ndx);
5580+
if (plan_id <= 0)
5581+
elog(ERROR, "no plan was made for CTE \"%s\"", rte->ctename);
5582+
subroot = list_nth(root->glob->subroots, plan_id - 1);
5583+
}
5584+
5585+
/* If the subquery hasn't been planned yet, we have to punt */
5586+
if (subroot == NULL)
5587+
return;
5588+
Assert(IsA(subroot, PlannerInfo));
5589+
5590+
/*
5591+
* We must use the subquery parsetree as mangled by the planner, not
5592+
* the raw version from the RTE, because we need a Var that will refer
5593+
* to the subroot's live RelOptInfos. For instance, if any subquery
5594+
* pullup happened during planning, Vars in the targetlist might have
5595+
* gotten replaced, and we need to see the replacement expressions.
5596+
*/
5597+
subquery = subroot->parse;
5598+
Assert(IsA(subquery, Query));
5599+
55155600
/*
55165601
* Punt if subquery uses set operations or GROUP BY, as these will
55175602
* mash underlying columns' stats beyond recognition. (Set ops are
@@ -5525,33 +5610,12 @@ examine_simple_variable(PlannerInfo *root, Var *var,
55255610
subquery->groupingSets)
55265611
return;
55275612

5528-
/*
5529-
* OK, fetch RelOptInfo for subquery. Note that we don't change the
5530-
* rel returned in vardata, since caller expects it to be a rel of the
5531-
* caller's query level. Because we might already be recursing, we
5532-
* can't use that rel pointer either, but have to look up the Var's
5533-
* rel afresh.
5534-
*/
5535-
rel = find_base_rel(root, var->varno);
5536-
5537-
/* If the subquery hasn't been planned yet, we have to punt */
5538-
if (rel->subroot == NULL)
5539-
return;
5540-
Assert(IsA(rel->subroot, PlannerInfo));
5541-
5542-
/*
5543-
* Switch our attention to the subquery as mangled by the planner. It
5544-
* was okay to look at the pre-planning version for the tests above,
5545-
* but now we need a Var that will refer to the subroot's live
5546-
* RelOptInfos. For instance, if any subquery pullup happened during
5547-
* planning, Vars in the targetlist might have gotten replaced, and we
5548-
* need to see the replacement expressions.
5549-
*/
5550-
subquery = rel->subroot->parse;
5551-
Assert(IsA(subquery, Query));
5552-
55535613
/* Get the subquery output expression referenced by the upper Var */
5554-
ste = get_tle_by_resno(subquery->targetList, var->varattno);
5614+
if (subquery->returningList)
5615+
subtlist = subquery->returningList;
5616+
else
5617+
subtlist = subquery->targetList;
5618+
ste = get_tle_by_resno(subtlist, var->varattno);
55555619
if (ste == NULL || ste->resjunk)
55565620
elog(ERROR, "subquery %s does not have attribute %d",
55575621
rte->eref->aliasname, var->varattno);
@@ -5599,16 +5663,16 @@ examine_simple_variable(PlannerInfo *root, Var *var,
55995663
* if the underlying column is unique, the subquery may have
56005664
* joined to other tables in a way that creates duplicates.
56015665
*/
5602-
examine_simple_variable(rel->subroot, var, vardata);
5666+
examine_simple_variable(subroot, var, vardata);
56035667
}
56045668
}
56055669
else
56065670
{
56075671
/*
5608-
* Otherwise, the Var comes from a FUNCTION, VALUES, or CTE RTE. (We
5609-
* won't see RTE_JOIN here because join alias Vars have already been
5672+
* Otherwise, the Var comes from a FUNCTION or VALUES RTE. (We won't
5673+
* see RTE_JOIN here because join alias Vars have already been
56105674
* flattened.) There's not much we can do with function outputs, but
5611-
* maybe someday try to be smarter about VALUES and/or CTEs.
5675+
* maybe someday try to be smarter about VALUES.
56125676
*/
56135677
}
56145678
}

src/test/regress/expected/with.out

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -636,6 +636,24 @@ SELECT t1.id, t2.path, t2 FROM t AS t1 JOIN t AS t2 ON
636636
16 | {3,7,11,16} | (16,"{3,7,11,16}")
637637
(16 rows)
638638

639+
-- test that column statistics from a materialized CTE are available
640+
-- to upper planner (otherwise, we'd get a stupider plan)
641+
explain (costs off)
642+
with x as materialized (select unique1 from tenk1 b)
643+
select count(*) from tenk1 a
644+
where unique1 in (select * from x);
645+
QUERY PLAN
646+
------------------------------------------------------------
647+
Aggregate
648+
CTE x
649+
-> Index Only Scan using tenk1_unique1 on tenk1 b
650+
-> Hash Semi Join
651+
Hash Cond: (a.unique1 = x.unique1)
652+
-> Index Only Scan using tenk1_unique1 on tenk1 a
653+
-> Hash
654+
-> CTE Scan on x
655+
(8 rows)
656+
639657
-- SEARCH clause
640658
create temp table graph0( f int, t int, label text );
641659
insert into graph0 values

src/test/regress/sql/with.sql

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -347,6 +347,13 @@ UNION ALL
347347
SELECT t1.id, t2.path, t2 FROM t AS t1 JOIN t AS t2 ON
348348
(t1.id=t2.id);
349349

350+
-- test that column statistics from a materialized CTE are available
351+
-- to upper planner (otherwise, we'd get a stupider plan)
352+
explain (costs off)
353+
with x as materialized (select unique1 from tenk1 b)
354+
select count(*) from tenk1 a
355+
where unique1 in (select * from x);
356+
350357
-- SEARCH clause
351358

352359
create temp table graph0( f int, t int, label text );

0 commit comments

Comments
 (0)