Skip to content

Commit 08d2d58

Browse files
author
Etsuro Fujita
committed
postgres_fdw: Fix costing of pre-sorted foreign paths with local stats.
Commit aa09cd2 modified estimate_path_cost_size() so that it reuses cached costs of a basic foreign path for a given foreign-base/join relation when costing pre-sorted foreign paths for that relation, but it incorrectly re-computed retrieved_rows, an estimated number of rows fetched from the remote side, which is needed for costing both the basic and pre-sorted foreign paths. To fix, handle retrieved_rows the same way as the cached costs: store in that relation's fpinfo the retrieved_rows estimate computed for costing the basic foreign path, and reuse it when costing the pre-sorted foreign paths. Also, reuse the rows/width estimates stored in that relation's fpinfo when costing the pre-sorted foreign paths, to make the code consistent. In commit ffab494, to extend the costing mentioned above to the foreign-grouping case, I made a change to add_foreign_grouping_paths() to store in a given foreign-grouped relation's RelOptInfo the rows estimate for that relation for reuse, but this patch makes that change unnecessary since we already store the row estimate in that relation's fpinfo, which this patch reuses when costing a foreign path for that relation with the sortClause ordering; remove that change. In passing, fix thinko in commit 7012b13: in estimate_path_cost_size(), the width estimate for a given foreign-grouped relation to be stored in that relation's fpinfo was reset incorrectly when costing a basic foreign path for that relation with local stats. Apply the patch to HEAD only to avoid destabilizing existing plan choices. Author: Etsuro Fujita Discussion: https://postgr.es/m/CAPmGK17jaJLPDEkgnP2VmkOg=5wT8YQ1CqssU8JRpZ_NSE+dqQ@mail.gmail.com
1 parent be2e024 commit 08d2d58

File tree

2 files changed

+63
-41
lines changed

2 files changed

+63
-41
lines changed

contrib/postgres_fdw/postgres_fdw.c

Lines changed: 56 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -661,10 +661,11 @@ postgresGetForeignRelSize(PlannerInfo *root,
661661
cost_qual_eval(&fpinfo->local_conds_cost, fpinfo->local_conds, root);
662662

663663
/*
664-
* Set cached relation costs to some negative value, so that we can detect
665-
* when they are set to some sensible costs during one (usually the first)
666-
* of the calls to estimate_path_cost_size().
664+
* Set # of retrieved rows and cached relation costs to some negative
665+
* value, so that we can detect when they are set to some sensible values,
666+
* during one (usually the first) of the calls to estimate_path_cost_size.
667667
*/
668+
fpinfo->retrieved_rows = -1;
668669
fpinfo->rel_startup_cost = -1;
669670
fpinfo->rel_total_cost = -1;
670671

@@ -2623,7 +2624,6 @@ estimate_path_cost_size(PlannerInfo *root,
26232624
int width;
26242625
Cost startup_cost;
26252626
Cost total_cost;
2626-
Cost cpu_per_tuple;
26272627

26282628
/* Make sure the core code has set up the relation's reltarget */
26292629
Assert(foreignrel->reltarget);
@@ -2736,26 +2736,20 @@ estimate_path_cost_size(PlannerInfo *root,
27362736
*/
27372737
Assert(param_join_conds == NIL);
27382738

2739-
/*
2740-
* Use rows/width estimates made by set_baserel_size_estimates() for
2741-
* base foreign relations and set_joinrel_size_estimates() for join
2742-
* between foreign relations.
2743-
*/
2744-
rows = foreignrel->rows;
2745-
width = foreignrel->reltarget->width;
2746-
2747-
/* Back into an estimate of the number of retrieved rows. */
2748-
retrieved_rows = clamp_row_est(rows / fpinfo->local_conds_sel);
2749-
27502739
/*
27512740
* We will come here again and again with different set of pathkeys or
27522741
* additional post-scan/join-processing steps that caller wants to
2753-
* cost. We don't need to calculate the costs of the underlying scan,
2754-
* join, or grouping each time. Instead, use the costs if we have
2755-
* cached them already.
2742+
* cost. We don't need to calculate the cost/size estimates for the
2743+
* underlying scan, join, or grouping each time. Instead, use those
2744+
* estimates if we have cached them already.
27562745
*/
27572746
if (fpinfo->rel_startup_cost >= 0 && fpinfo->rel_total_cost >= 0)
27582747
{
2748+
Assert(fpinfo->retrieved_rows >= 1);
2749+
2750+
rows = fpinfo->rows;
2751+
retrieved_rows = fpinfo->retrieved_rows;
2752+
width = fpinfo->width;
27592753
startup_cost = fpinfo->rel_startup_cost;
27602754
run_cost = fpinfo->rel_total_cost - fpinfo->rel_startup_cost;
27612755

@@ -2785,6 +2779,10 @@ estimate_path_cost_size(PlannerInfo *root,
27852779
QualCost remote_conds_cost;
27862780
double nrows;
27872781

2782+
/* Use rows/width estimates made by the core code. */
2783+
rows = foreignrel->rows;
2784+
width = foreignrel->reltarget->width;
2785+
27882786
/* For join we expect inner and outer relations set */
27892787
Assert(fpinfo->innerrel && fpinfo->outerrel);
27902788

@@ -2793,7 +2791,12 @@ estimate_path_cost_size(PlannerInfo *root,
27932791

27942792
/* Estimate of number of rows in cross product */
27952793
nrows = fpinfo_i->rows * fpinfo_o->rows;
2796-
/* Clamp retrieved rows estimate to at most size of cross product */
2794+
2795+
/*
2796+
* Back into an estimate of the number of retrieved rows. Just in
2797+
* case this is nuts, clamp to at most nrow.
2798+
*/
2799+
retrieved_rows = clamp_row_est(rows / fpinfo->local_conds_sel);
27972800
retrieved_rows = Min(retrieved_rows, nrows);
27982801

27992802
/*
@@ -2871,9 +2874,8 @@ estimate_path_cost_size(PlannerInfo *root,
28712874

28722875
ofpinfo = (PgFdwRelationInfo *) outerrel->fdw_private;
28732876

2874-
/* Get rows and width from input rel */
2877+
/* Get rows from input rel */
28752878
input_rows = ofpinfo->rows;
2876-
width = ofpinfo->width;
28772879

28782880
/* Collect statistics about aggregates for estimating costs. */
28792881
MemSet(&aggcosts, 0, sizeof(AggClauseCosts));
@@ -2920,6 +2922,9 @@ estimate_path_cost_size(PlannerInfo *root,
29202922
rows = retrieved_rows = numGroups;
29212923
}
29222924

2925+
/* Use width estimate made by the core code. */
2926+
width = foreignrel->reltarget->width;
2927+
29232928
/*-----
29242929
* Startup cost includes:
29252930
* 1. Startup cost for underneath input relation, adjusted for
@@ -2966,7 +2971,17 @@ estimate_path_cost_size(PlannerInfo *root,
29662971
}
29672972
else
29682973
{
2969-
/* Clamp retrieved rows estimates to at most foreignrel->tuples. */
2974+
Cost cpu_per_tuple;
2975+
2976+
/* Use rows/width estimates made by set_baserel_size_estimates. */
2977+
rows = foreignrel->rows;
2978+
width = foreignrel->reltarget->width;
2979+
2980+
/*
2981+
* Back into an estimate of the number of retrieved rows. Just in
2982+
* case this is nuts, clamp to at most foreignrel->tuples.
2983+
*/
2984+
retrieved_rows = clamp_row_est(rows / fpinfo->local_conds_sel);
29702985
retrieved_rows = Min(retrieved_rows, foreignrel->tuples);
29712986

29722987
/*
@@ -3043,18 +3058,20 @@ estimate_path_cost_size(PlannerInfo *root,
30433058
}
30443059

30453060
/*
3046-
* Cache the costs for scans, joins, or groupings without any
3047-
* parameterization, pathkeys, or additional post-scan/join-processing
3048-
* steps, before adding the costs for transferring data from the foreign
3049-
* server. These costs are useful for costing remote joins involving this
3050-
* relation or costing other remote operations for this relation such as
3051-
* remote sorts and remote LIMIT restrictions, when the costs can not be
3052-
* obtained from the foreign server. This function will be called at
3053-
* least once for every foreign relation without any parameterization,
3054-
* pathkeys, or additional post-scan/join-processing steps.
3061+
* Cache the retrieved rows and cost estimates for scans, joins, or
3062+
* groupings without any parameterization, pathkeys, or additional
3063+
* post-scan/join-processing steps, before adding the costs for
3064+
* transferring data from the foreign server. These estimates are useful
3065+
* for costing remote joins involving this relation or costing other
3066+
* remote operations on this relation such as remote sorts and remote
3067+
* LIMIT restrictions, when the costs can not be obtained from the foreign
3068+
* server. This function will be called at least once for every foreign
3069+
* relation without any parameterization, pathkeys, or additional
3070+
* post-scan/join-processing steps.
30553071
*/
30563072
if (pathkeys == NIL && param_join_conds == NIL && fpextra == NULL)
30573073
{
3074+
fpinfo->retrieved_rows = retrieved_rows;
30583075
fpinfo->rel_startup_cost = startup_cost;
30593076
fpinfo->rel_total_cost = total_cost;
30603077
}
@@ -5157,10 +5174,11 @@ foreign_join_ok(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype,
51575174
fpinfo->user = NULL;
51585175

51595176
/*
5160-
* Set cached relation costs to some negative value, so that we can detect
5161-
* when they are set to some sensible costs, during one (usually the
5162-
* first) of the calls to estimate_path_cost_size().
5177+
* Set # of retrieved rows and cached relation costs to some negative
5178+
* value, so that we can detect when they are set to some sensible values,
5179+
* during one (usually the first) of the calls to estimate_path_cost_size.
51635180
*/
5181+
fpinfo->retrieved_rows = -1;
51645182
fpinfo->rel_startup_cost = -1;
51655183
fpinfo->rel_total_cost = -1;
51665184

@@ -5708,10 +5726,11 @@ foreign_grouping_ok(PlannerInfo *root, RelOptInfo *grouped_rel,
57085726
fpinfo->pushdown_safe = true;
57095727

57105728
/*
5711-
* Set cached relation costs to some negative value, so that we can detect
5712-
* when they are set to some sensible costs, during one (usually the
5713-
* first) of the calls to estimate_path_cost_size().
5729+
* Set # of retrieved rows and cached relation costs to some negative
5730+
* value, so that we can detect when they are set to some sensible values,
5731+
* during one (usually the first) of the calls to estimate_path_cost_size.
57145732
*/
5733+
fpinfo->retrieved_rows = -1;
57155734
fpinfo->rel_startup_cost = -1;
57165735
fpinfo->rel_total_cost = -1;
57175736

@@ -5853,8 +5872,6 @@ add_foreign_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel,
58535872
fpinfo->startup_cost = startup_cost;
58545873
fpinfo->total_cost = total_cost;
58555874

5856-
grouped_rel->rows = fpinfo->rows;
5857-
58585875
/* Create and add foreign path to the grouping relation. */
58595876
grouppath = create_foreign_upper_path(root,
58605877
grouped_rel,

contrib/postgres_fdw/postgres_fdw.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,12 +59,17 @@ typedef struct PgFdwRelationInfo
5959
/* Selectivity of join conditions */
6060
Selectivity joinclause_sel;
6161

62-
/* Estimated size and cost for a scan or join. */
62+
/* Estimated size and cost for a scan, join, or grouping/aggregation. */
6363
double rows;
6464
int width;
6565
Cost startup_cost;
6666
Cost total_cost;
67-
/* Costs excluding costs for transferring data from the foreign server */
67+
/*
68+
* Estimated number of rows fetched from the foreign server, and costs
69+
* excluding costs for transferring those rows from the foreign server.
70+
* These are only used by estimate_path_cost_size().
71+
*/
72+
double retrieved_rows;
6873
Cost rel_startup_cost;
6974
Cost rel_total_cost;
7075

0 commit comments

Comments
 (0)