Skip to content

Commit d7153c5

Browse files
committed
Fix best_inner_indexscan to return both the cheapest-total-cost and
cheapest-startup-cost innerjoin indexscans, and make joinpath.c consider both of these (when different) as the inside of a nestloop join. The original design was based on the assumption that indexscan paths always have negligible startup cost, and so total cost is the only important figure of merit; an assumption that's obviously broken by bitmap indexscans. This oversight could lead to choosing poor plans in cases where fast-start behavior is more important than total cost, such as LIMIT and IN queries. 8.1-vintage brain fade exposed by an example from Chuck D.
1 parent 2415ad9 commit d7153c5

File tree

5 files changed

+94
-63
lines changed

5 files changed

+94
-63
lines changed

src/backend/nodes/outfuncs.c

+3-2
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.306 2007/04/27 22:05:47 tgl Exp $
11+
* $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.307 2007/05/22 01:40:33 tgl Exp $
1212
*
1313
* NOTES
1414
* Every node type that can appear in stored rules' parsetrees *must*
@@ -1440,7 +1440,8 @@ _outInnerIndexscanInfo(StringInfo str, InnerIndexscanInfo *node)
14401440
WRITE_NODE_TYPE("INNERINDEXSCANINFO");
14411441
WRITE_BITMAPSET_FIELD(other_relids);
14421442
WRITE_BOOL_FIELD(isouterjoin);
1443-
WRITE_NODE_FIELD(best_innerpath);
1443+
WRITE_NODE_FIELD(cheapest_startup_innerpath);
1444+
WRITE_NODE_FIELD(cheapest_total_innerpath);
14441445
}
14451446

14461447
static void

src/backend/optimizer/path/indxpath.c

+39-30
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
*
1010
*
1111
* IDENTIFICATION
12-
* $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.221 2007/04/17 20:03:03 tgl Exp $
12+
* $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.222 2007/05/22 01:40:33 tgl Exp $
1313
*
1414
*-------------------------------------------------------------------------
1515
*/
@@ -1599,26 +1599,26 @@ eclass_matches_any_index(EquivalenceClass *ec, EquivalenceMember *em,
15991599

16001600
/*
16011601
* best_inner_indexscan
1602-
* Finds the best available inner indexscan for a nestloop join
1602+
* Finds the best available inner indexscans for a nestloop join
16031603
* with the given rel on the inside and the given outer_rel outside.
1604-
* May return NULL if there are no possible inner indexscans.
16051604
*
1606-
* We ignore ordering considerations (since a nestloop's inner scan's order
1607-
* is uninteresting). Also, we consider only total cost when deciding which
1608-
* of two possible paths is better --- this assumes that all indexpaths have
1609-
* negligible startup cost. (True today, but someday we might have to think
1610-
* harder.) Therefore, there is only one dimension of comparison and so it's
1611-
* sufficient to return a single "best" path.
1605+
* *cheapest_startup gets the path with least startup cost
1606+
* *cheapest_total gets the path with least total cost (often the same path)
1607+
* Both are set to NULL if there are no possible inner indexscans.
1608+
*
1609+
* We ignore ordering considerations, since a nestloop's inner scan's order
1610+
* is uninteresting. Hence startup cost and total cost are the only figures
1611+
* of merit to consider.
16121612
*
16131613
* Note: create_index_paths() must have been run previously for this rel,
1614-
* else the result will always be NULL.
1614+
* else the results will always be NULL.
16151615
*/
1616-
Path *
1616+
void
16171617
best_inner_indexscan(PlannerInfo *root, RelOptInfo *rel,
1618-
RelOptInfo *outer_rel, JoinType jointype)
1618+
RelOptInfo *outer_rel, JoinType jointype,
1619+
Path **cheapest_startup, Path **cheapest_total)
16191620
{
16201621
Relids outer_relids;
1621-
Path *cheapest;
16221622
bool isouterjoin;
16231623
List *clause_list;
16241624
List *indexpaths;
@@ -1627,6 +1627,9 @@ best_inner_indexscan(PlannerInfo *root, RelOptInfo *rel,
16271627
InnerIndexscanInfo *info;
16281628
MemoryContext oldcontext;
16291629

1630+
/* Initialize results for failure returns */
1631+
*cheapest_startup = *cheapest_total = NULL;
1632+
16301633
/*
16311634
* Nestloop only supports inner, left, and IN joins.
16321635
*/
@@ -1641,14 +1644,14 @@ best_inner_indexscan(PlannerInfo *root, RelOptInfo *rel,
16411644
isouterjoin = true;
16421645
break;
16431646
default:
1644-
return NULL;
1647+
return;
16451648
}
16461649

16471650
/*
16481651
* If there are no indexable joinclauses for this rel, exit quickly.
16491652
*/
16501653
if (bms_is_empty(rel->index_outer_relids))
1651-
return NULL;
1654+
return;
16521655

16531656
/*
16541657
* Otherwise, we have to do path selection in the main planning context,
@@ -1668,17 +1671,17 @@ best_inner_indexscan(PlannerInfo *root, RelOptInfo *rel,
16681671
{
16691672
bms_free(outer_relids);
16701673
MemoryContextSwitchTo(oldcontext);
1671-
return NULL;
1674+
return;
16721675
}
16731676

16741677
/*
16751678
* Look to see if we already computed the result for this set of relevant
16761679
* outerrels. (We include the isouterjoin status in the cache lookup key
16771680
* for safety. In practice I suspect this is not necessary because it
1678-
* should always be the same for a given innerrel.)
1681+
* should always be the same for a given combination of rels.)
16791682
*
16801683
* NOTE: because we cache on outer_relids rather than outer_rel->relids,
1681-
* we will report the same path and hence path cost for joins with
1684+
* we will report the same paths and hence path cost for joins with
16821685
* different sets of irrelevant rels on the outside. Now that cost_index
16831686
* is sensitive to outer_rel->rows, this is not really right. However the
16841687
* error is probably not large. Is it worth establishing a separate cache
@@ -1692,7 +1695,9 @@ best_inner_indexscan(PlannerInfo *root, RelOptInfo *rel,
16921695
{
16931696
bms_free(outer_relids);
16941697
MemoryContextSwitchTo(oldcontext);
1695-
return info->best_innerpath;
1698+
*cheapest_startup = info->cheapest_startup_innerpath;
1699+
*cheapest_total = info->cheapest_total_innerpath;
1700+
return;
16961701
}
16971702
}
16981703

@@ -1755,28 +1760,32 @@ best_inner_indexscan(PlannerInfo *root, RelOptInfo *rel,
17551760
}
17561761

17571762
/*
1758-
* Now choose the cheapest member of indexpaths.
1763+
* Now choose the cheapest members of indexpaths.
17591764
*/
1760-
cheapest = NULL;
1761-
foreach(l, indexpaths)
1765+
if (indexpaths != NIL)
17621766
{
1763-
Path *path = (Path *) lfirst(l);
1767+
*cheapest_startup = *cheapest_total = (Path *) linitial(indexpaths);
1768+
1769+
for_each_cell(l, lnext(list_head(indexpaths)))
1770+
{
1771+
Path *path = (Path *) lfirst(l);
17641772

1765-
if (cheapest == NULL ||
1766-
compare_path_costs(path, cheapest, TOTAL_COST) < 0)
1767-
cheapest = path;
1773+
if (compare_path_costs(path, *cheapest_startup, STARTUP_COST) < 0)
1774+
*cheapest_startup = path;
1775+
if (compare_path_costs(path, *cheapest_total, TOTAL_COST) < 0)
1776+
*cheapest_total = path;
1777+
}
17681778
}
17691779

1770-
/* Cache the result --- whether positive or negative */
1780+
/* Cache the results --- whether positive or negative */
17711781
info = makeNode(InnerIndexscanInfo);
17721782
info->other_relids = outer_relids;
17731783
info->isouterjoin = isouterjoin;
1774-
info->best_innerpath = cheapest;
1784+
info->cheapest_startup_innerpath = *cheapest_startup;
1785+
info->cheapest_total_innerpath = *cheapest_total;
17751786
rel->index_inner_paths = lcons(info, rel->index_inner_paths);
17761787

17771788
MemoryContextSwitchTo(oldcontext);
1778-
1779-
return cheapest;
17801789
}
17811790

17821791
/*

src/backend/optimizer/path/joinpath.c

+41-22
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.111 2007/01/20 20:45:39 tgl Exp $
11+
* $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.112 2007/05/22 01:40:33 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -286,10 +286,11 @@ sort_inner_and_outer(PlannerInfo *root,
286286
* only outer paths that are already ordered well enough for merging).
287287
*
288288
* We always generate a nestloop path for each available outer path.
289-
* In fact we may generate as many as four: one on the cheapest-total-cost
289+
* In fact we may generate as many as five: one on the cheapest-total-cost
290290
* inner path, one on the same with materialization, one on the
291-
* cheapest-startup-cost inner path (if different),
292-
* and one on the best inner-indexscan path (if any).
291+
* cheapest-startup-cost inner path (if different), one on the
292+
* cheapest-total inner-indexscan path (if any), and one on the
293+
* cheapest-startup inner-indexscan path (if different).
293294
*
294295
* We also consider mergejoins if mergejoin clauses are available. We have
295296
* two ways to generate the inner path for a mergejoin: sort the cheapest
@@ -325,7 +326,8 @@ match_unsorted_outer(PlannerInfo *root,
325326
Path *inner_cheapest_startup = innerrel->cheapest_startup_path;
326327
Path *inner_cheapest_total = innerrel->cheapest_total_path;
327328
Path *matpath = NULL;
328-
Path *bestinnerjoin = NULL;
329+
Path *index_cheapest_startup = NULL;
330+
Path *index_cheapest_total = NULL;
329331
ListCell *l;
330332

331333
/*
@@ -383,17 +385,20 @@ match_unsorted_outer(PlannerInfo *root,
383385
create_material_path(innerrel, inner_cheapest_total);
384386

385387
/*
386-
* Get the best innerjoin indexpath (if any) for this outer rel. It's
387-
* the same for all outer paths.
388+
* Get the best innerjoin indexpaths (if any) for this outer rel.
389+
* They're the same for all outer paths.
388390
*/
389391
if (innerrel->reloptkind != RELOPT_JOINREL)
390392
{
391393
if (IsA(inner_cheapest_total, AppendPath))
392-
bestinnerjoin = best_appendrel_indexscan(root, innerrel,
393-
outerrel, jointype);
394+
index_cheapest_total = best_appendrel_indexscan(root,
395+
innerrel,
396+
outerrel,
397+
jointype);
394398
else if (innerrel->rtekind == RTE_RELATION)
395-
bestinnerjoin = best_inner_indexscan(root, innerrel,
396-
outerrel, jointype);
399+
best_inner_indexscan(root, innerrel, outerrel, jointype,
400+
&index_cheapest_startup,
401+
&index_cheapest_total);
397402
}
398403
}
399404

@@ -435,8 +440,8 @@ match_unsorted_outer(PlannerInfo *root,
435440
* Always consider a nestloop join with this outer and
436441
* cheapest-total-cost inner. When appropriate, also consider
437442
* using the materialized form of the cheapest inner, the
438-
* cheapest-startup-cost inner path, and the best innerjoin
439-
* indexpath.
443+
* cheapest-startup-cost inner path, and the cheapest innerjoin
444+
* indexpaths.
440445
*/
441446
add_path(joinrel, (Path *)
442447
create_nestloop_path(root,
@@ -464,13 +469,23 @@ match_unsorted_outer(PlannerInfo *root,
464469
inner_cheapest_startup,
465470
restrictlist,
466471
merge_pathkeys));
467-
if (bestinnerjoin != NULL)
472+
if (index_cheapest_total != NULL)
468473
add_path(joinrel, (Path *)
469474
create_nestloop_path(root,
470475
joinrel,
471476
jointype,
472477
outerpath,
473-
bestinnerjoin,
478+
index_cheapest_total,
479+
restrictlist,
480+
merge_pathkeys));
481+
if (index_cheapest_startup != NULL &&
482+
index_cheapest_startup != index_cheapest_total)
483+
add_path(joinrel, (Path *)
484+
create_nestloop_path(root,
485+
joinrel,
486+
jointype,
487+
outerpath,
488+
index_cheapest_startup,
474489
restrictlist,
475490
merge_pathkeys));
476491
}
@@ -789,6 +804,9 @@ hash_inner_and_outer(PlannerInfo *root,
789804
* with the given append relation on the inside and the given outer_rel
790805
* outside. Returns an AppendPath comprising the best inner scans, or
791806
* NULL if there are no possible inner indexscans.
807+
*
808+
* Note that we currently consider only cheapest-total-cost. It's not
809+
* very clear what cheapest-startup-cost might mean for an AppendPath.
792810
*/
793811
static Path *
794812
best_appendrel_indexscan(PlannerInfo *root, RelOptInfo *rel,
@@ -804,7 +822,8 @@ best_appendrel_indexscan(PlannerInfo *root, RelOptInfo *rel,
804822
AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l);
805823
int childRTindex;
806824
RelOptInfo *childrel;
807-
Path *bestinnerjoin;
825+
Path *index_cheapest_startup;
826+
Path *index_cheapest_total;
808827

809828
/* append_rel_list contains all append rels; ignore others */
810829
if (appinfo->parent_relid != parentRTindex)
@@ -824,23 +843,23 @@ best_appendrel_indexscan(PlannerInfo *root, RelOptInfo *rel,
824843
continue; /* OK, we can ignore it */
825844

826845
/*
827-
* Get the best innerjoin indexpath (if any) for this child rel.
846+
* Get the best innerjoin indexpaths (if any) for this child rel.
828847
*/
829-
bestinnerjoin = best_inner_indexscan(root, childrel,
830-
outer_rel, jointype);
848+
best_inner_indexscan(root, childrel, outer_rel, jointype,
849+
&index_cheapest_startup, &index_cheapest_total);
831850

832851
/*
833852
* If no luck on an indexpath for this rel, we'll still consider an
834853
* Append substituting the cheapest-total inner path. However we must
835854
* find at least one indexpath, else there's not going to be any
836855
* improvement over the base path for the appendrel.
837856
*/
838-
if (bestinnerjoin)
857+
if (index_cheapest_total)
839858
found_indexscan = true;
840859
else
841-
bestinnerjoin = childrel->cheapest_total_path;
860+
index_cheapest_total = childrel->cheapest_total_path;
842861

843-
append_paths = lappend(append_paths, bestinnerjoin);
862+
append_paths = lappend(append_paths, index_cheapest_total);
844863
}
845864

846865
if (!found_indexscan)

src/include/nodes/relation.h

+7-6
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.141 2007/04/21 21:01:45 tgl Exp $
10+
* $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.142 2007/05/22 01:40:33 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -996,20 +996,20 @@ typedef struct MergeScanSelCache
996996
* relation includes all other relids appearing in those joinclauses.
997997
* The set of usable joinclauses, and thus the best inner indexscan,
998998
* thus varies depending on which outer relation we consider; so we have
999-
* to recompute the best such path for every join. To avoid lots of
999+
* to recompute the best such paths for every join. To avoid lots of
10001000
* redundant computation, we cache the results of such searches. For
10011001
* each relation we compute the set of possible otherrelids (all relids
10021002
* appearing in joinquals that could become indexquals for this table).
10031003
* Two outer relations whose relids have the same intersection with this
10041004
* set will have the same set of available joinclauses and thus the same
1005-
* best inner indexscan for the inner relation. By taking the intersection
1005+
* best inner indexscans for the inner relation. By taking the intersection
10061006
* before scanning the cache, we avoid recomputing when considering
10071007
* join rels that differ only by the inclusion of irrelevant other rels.
10081008
*
10091009
* The search key also includes a bool showing whether the join being
10101010
* considered is an outer join. Since we constrain the join order for
10111011
* outer joins, I believe that this bool can only have one possible value
1012-
* for any particular base relation; but store it anyway to avoid confusion.
1012+
* for any particular lookup key; but store it anyway to avoid confusion.
10131013
*/
10141014

10151015
typedef struct InnerIndexscanInfo
@@ -1018,8 +1018,9 @@ typedef struct InnerIndexscanInfo
10181018
/* The lookup key: */
10191019
Relids other_relids; /* a set of relevant other relids */
10201020
bool isouterjoin; /* true if join is outer */
1021-
/* Best path for this lookup key: */
1022-
Path *best_innerpath; /* best inner indexscan, or NULL if none */
1021+
/* Best paths for this lookup key (NULL if no available indexscans): */
1022+
Path *cheapest_startup_innerpath; /* cheapest startup cost */
1023+
Path *cheapest_total_innerpath; /* cheapest total cost */
10231024
} InnerIndexscanInfo;
10241025

10251026
/*

src/include/optimizer/paths.h

+4-3
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $PostgreSQL: pgsql/src/include/optimizer/paths.h,v 1.97 2007/04/15 20:09:28 tgl Exp $
10+
* $PostgreSQL: pgsql/src/include/optimizer/paths.h,v 1.98 2007/05/22 01:40:33 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -45,8 +45,9 @@ extern void create_index_paths(PlannerInfo *root, RelOptInfo *rel);
4545
extern List *generate_bitmap_or_paths(PlannerInfo *root, RelOptInfo *rel,
4646
List *clauses, List *outer_clauses,
4747
RelOptInfo *outer_rel);
48-
extern Path *best_inner_indexscan(PlannerInfo *root, RelOptInfo *rel,
49-
RelOptInfo *outer_rel, JoinType jointype);
48+
extern void best_inner_indexscan(PlannerInfo *root, RelOptInfo *rel,
49+
RelOptInfo *outer_rel, JoinType jointype,
50+
Path **cheapest_startup, Path **cheapest_total);
5051
extern List *group_clauses_by_indexkey(IndexOptInfo *index,
5152
List *clauses, List *outer_clauses,
5253
Relids outer_relids,

0 commit comments

Comments
 (0)