8
8
*
9
9
*
10
10
* IDENTIFICATION
11
- * $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.182 2005/04/06 16:34:05 tgl Exp $
11
+ * $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.183 2005/04/10 19:50:08 tgl Exp $
12
12
*
13
13
*-------------------------------------------------------------------------
14
14
*/
@@ -58,6 +58,10 @@ static Node *preprocess_expression(Query *parse, Node *expr, int kind);
58
58
static void preprocess_qual_conditions (Query * parse , Node * jtnode );
59
59
static Plan * inheritance_planner (Query * parse , List * inheritlist );
60
60
static Plan * grouping_planner (Query * parse , double tuple_fraction );
61
+ static bool choose_hashed_grouping (Query * parse , double tuple_fraction ,
62
+ Path * cheapest_path , Path * sorted_path ,
63
+ List * sort_pathkeys , List * group_pathkeys ,
64
+ double dNumGroups , AggClauseCounts * agg_counts );
61
65
static bool hash_safe_grouping (Query * parse );
62
66
static List * make_subplanTargetList (Query * parse , List * tlist ,
63
67
AttrNumber * * groupColIdx , bool * need_tlist_eval );
@@ -920,34 +924,25 @@ grouping_planner(Query *parse, double tuple_fraction)
920
924
sort_pathkeys = canonicalize_pathkeys (parse , sort_pathkeys );
921
925
922
926
/*
923
- * Consider whether we might want to use hashed grouping.
927
+ * If grouping, estimate the number of groups. (We can't do this
928
+ * until after running query_planner(), either.) Then decide
929
+ * whether we want to use hashed grouping.
924
930
*/
925
931
if (parse -> groupClause )
926
932
{
927
933
List * groupExprs ;
928
934
double cheapest_path_rows ;
929
- int cheapest_path_width ;
930
935
931
936
/*
932
- * Beware in this section of the possibility that
933
- * cheapest_path->parent is NULL. This could happen if user
934
- * does something silly like SELECT 'foo' GROUP BY 1;
937
+ * Beware of the possibility that cheapest_path->parent is NULL.
938
+ * This could happen if user does something silly like
939
+ * SELECT 'foo' GROUP BY 1;
935
940
*/
936
941
if (cheapest_path -> parent )
937
- {
938
942
cheapest_path_rows = cheapest_path -> parent -> rows ;
939
- cheapest_path_width = cheapest_path -> parent -> width ;
940
- }
941
943
else
942
- {
943
944
cheapest_path_rows = 1 ; /* assume non-set result */
944
- cheapest_path_width = 100 ; /* arbitrary */
945
- }
946
945
947
- /*
948
- * Always estimate the number of groups. We can't do this
949
- * until after running query_planner(), either.
950
- */
951
946
groupExprs = get_sortgrouplist_exprs (parse -> groupClause ,
952
947
parse -> targetList );
953
948
dNumGroups = estimate_num_groups (parse ,
@@ -956,130 +951,11 @@ grouping_planner(Query *parse, double tuple_fraction)
956
951
/* Also want it as a long int --- but 'ware overflow! */
957
952
numGroups = (long ) Min (dNumGroups , (double ) LONG_MAX );
958
953
959
- /*
960
- * Check can't-do-it conditions, including whether the
961
- * grouping operators are hashjoinable.
962
- *
963
- * Executor doesn't support hashed aggregation with DISTINCT
964
- * aggregates. (Doing so would imply storing *all* the input
965
- * values in the hash table, which seems like a certain
966
- * loser.)
967
- */
968
- if (!enable_hashagg || !hash_safe_grouping (parse ))
969
- use_hashed_grouping = false;
970
- else if (agg_counts .numDistinctAggs != 0 )
971
- use_hashed_grouping = false;
972
- else
973
- {
974
- /*
975
- * Use hashed grouping if (a) we think we can fit the
976
- * hashtable into work_mem, *and* (b) the estimated cost
977
- * is no more than doing it the other way. While avoiding
978
- * the need for sorted input is usually a win, the fact
979
- * that the output won't be sorted may be a loss; so we
980
- * need to do an actual cost comparison.
981
- */
982
- Size hashentrysize ;
983
-
984
- /* Estimate per-hash-entry space at tuple width... */
985
- hashentrysize = cheapest_path_width ;
986
- /* plus space for pass-by-ref transition values... */
987
- hashentrysize += agg_counts .transitionSpace ;
988
- /* plus the per-hash-entry overhead */
989
- hashentrysize += hash_agg_entry_size (agg_counts .numAggs );
990
-
991
- if (hashentrysize * dNumGroups <= work_mem * 1024L )
992
- {
993
- /*
994
- * Okay, do the cost comparison. We need to consider
995
- * cheapest_path + hashagg [+ final sort] versus
996
- * either cheapest_path [+ sort] + group or agg [+
997
- * final sort] or presorted_path + group or agg [+
998
- * final sort] where brackets indicate a step that may
999
- * not be needed. We assume query_planner() will have
1000
- * returned a presorted path only if it's a winner
1001
- * compared to cheapest_path for this purpose.
1002
- *
1003
- * These path variables are dummies that just hold cost
1004
- * fields; we don't make actual Paths for these steps.
1005
- */
1006
- Path hashed_p ;
1007
- Path sorted_p ;
1008
-
1009
- cost_agg (& hashed_p , parse ,
1010
- AGG_HASHED , agg_counts .numAggs ,
1011
- numGroupCols , dNumGroups ,
1012
- cheapest_path -> startup_cost ,
1013
- cheapest_path -> total_cost ,
1014
- cheapest_path_rows );
1015
- /* Result of hashed agg is always unsorted */
1016
- if (sort_pathkeys )
1017
- cost_sort (& hashed_p , parse , sort_pathkeys ,
1018
- hashed_p .total_cost ,
1019
- dNumGroups ,
1020
- cheapest_path_width );
1021
-
1022
- if (sorted_path )
1023
- {
1024
- sorted_p .startup_cost = sorted_path -> startup_cost ;
1025
- sorted_p .total_cost = sorted_path -> total_cost ;
1026
- current_pathkeys = sorted_path -> pathkeys ;
1027
- }
1028
- else
1029
- {
1030
- sorted_p .startup_cost = cheapest_path -> startup_cost ;
1031
- sorted_p .total_cost = cheapest_path -> total_cost ;
1032
- current_pathkeys = cheapest_path -> pathkeys ;
1033
- }
1034
- if (!pathkeys_contained_in (group_pathkeys ,
1035
- current_pathkeys ))
1036
- {
1037
- cost_sort (& sorted_p , parse , group_pathkeys ,
1038
- sorted_p .total_cost ,
1039
- cheapest_path_rows ,
1040
- cheapest_path_width );
1041
- current_pathkeys = group_pathkeys ;
1042
- }
1043
- if (parse -> hasAggs )
1044
- cost_agg (& sorted_p , parse ,
1045
- AGG_SORTED , agg_counts .numAggs ,
1046
- numGroupCols , dNumGroups ,
1047
- sorted_p .startup_cost ,
1048
- sorted_p .total_cost ,
1049
- cheapest_path_rows );
1050
- else
1051
- cost_group (& sorted_p , parse ,
1052
- numGroupCols , dNumGroups ,
1053
- sorted_p .startup_cost ,
1054
- sorted_p .total_cost ,
1055
- cheapest_path_rows );
1056
- /* The Agg or Group node will preserve ordering */
1057
- if (sort_pathkeys &&
1058
- !pathkeys_contained_in (sort_pathkeys ,
1059
- current_pathkeys ))
1060
- {
1061
- cost_sort (& sorted_p , parse , sort_pathkeys ,
1062
- sorted_p .total_cost ,
1063
- dNumGroups ,
1064
- cheapest_path_width );
1065
- }
1066
-
1067
- /*
1068
- * Now make the decision using the top-level tuple
1069
- * fraction. First we have to convert an absolute
1070
- * count (LIMIT) into fractional form.
1071
- */
1072
- if (tuple_fraction >= 1.0 )
1073
- tuple_fraction /= dNumGroups ;
1074
-
1075
- if (compare_fractional_path_costs (& hashed_p , & sorted_p ,
1076
- tuple_fraction ) < 0 )
1077
- {
1078
- /* Hashed is cheaper, so use it */
1079
- use_hashed_grouping = true;
1080
- }
1081
- }
1082
- }
954
+ use_hashed_grouping =
955
+ choose_hashed_grouping (parse , tuple_fraction ,
956
+ cheapest_path , sorted_path ,
957
+ sort_pathkeys , group_pathkeys ,
958
+ dNumGroups , & agg_counts );
1083
959
}
1084
960
1085
961
/*
@@ -1331,6 +1207,146 @@ grouping_planner(Query *parse, double tuple_fraction)
1331
1207
return result_plan ;
1332
1208
}
1333
1209
1210
+ /*
1211
+ * choose_hashed_grouping - should we use hashed grouping?
1212
+ */
1213
+ static bool
1214
+ choose_hashed_grouping (Query * parse , double tuple_fraction ,
1215
+ Path * cheapest_path , Path * sorted_path ,
1216
+ List * sort_pathkeys , List * group_pathkeys ,
1217
+ double dNumGroups , AggClauseCounts * agg_counts )
1218
+ {
1219
+ int numGroupCols = list_length (parse -> groupClause );
1220
+ double cheapest_path_rows ;
1221
+ int cheapest_path_width ;
1222
+ Size hashentrysize ;
1223
+ List * current_pathkeys ;
1224
+ Path hashed_p ;
1225
+ Path sorted_p ;
1226
+
1227
+ /*
1228
+ * Check can't-do-it conditions, including whether the grouping operators
1229
+ * are hashjoinable.
1230
+ *
1231
+ * Executor doesn't support hashed aggregation with DISTINCT aggregates.
1232
+ * (Doing so would imply storing *all* the input values in the hash table,
1233
+ * which seems like a certain loser.)
1234
+ */
1235
+ if (!enable_hashagg )
1236
+ return false;
1237
+ if (agg_counts -> numDistinctAggs != 0 )
1238
+ return false;
1239
+ if (!hash_safe_grouping (parse ))
1240
+ return false;
1241
+
1242
+ /*
1243
+ * Don't do it if it doesn't look like the hashtable will fit into
1244
+ * work_mem.
1245
+ *
1246
+ * Beware here of the possibility that cheapest_path->parent is NULL.
1247
+ * This could happen if user does something silly like
1248
+ * SELECT 'foo' GROUP BY 1;
1249
+ */
1250
+ if (cheapest_path -> parent )
1251
+ {
1252
+ cheapest_path_rows = cheapest_path -> parent -> rows ;
1253
+ cheapest_path_width = cheapest_path -> parent -> width ;
1254
+ }
1255
+ else
1256
+ {
1257
+ cheapest_path_rows = 1 ; /* assume non-set result */
1258
+ cheapest_path_width = 100 ; /* arbitrary */
1259
+ }
1260
+
1261
+ /* Estimate per-hash-entry space at tuple width... */
1262
+ hashentrysize = cheapest_path_width ;
1263
+ /* plus space for pass-by-ref transition values... */
1264
+ hashentrysize += agg_counts -> transitionSpace ;
1265
+ /* plus the per-hash-entry overhead */
1266
+ hashentrysize += hash_agg_entry_size (agg_counts -> numAggs );
1267
+
1268
+ if (hashentrysize * dNumGroups > work_mem * 1024L )
1269
+ return false;
1270
+
1271
+ /*
1272
+ * See if the estimated cost is no more than doing it the other way.
1273
+ * While avoiding the need for sorted input is usually a win, the fact
1274
+ * that the output won't be sorted may be a loss; so we need to do an
1275
+ * actual cost comparison.
1276
+ *
1277
+ * We need to consider
1278
+ * cheapest_path + hashagg [+ final sort]
1279
+ * versus either
1280
+ * cheapest_path [+ sort] + group or agg [+ final sort]
1281
+ * or
1282
+ * presorted_path + group or agg [+ final sort]
1283
+ * where brackets indicate a step that may not be needed. We assume
1284
+ * query_planner() will have returned a presorted path only if it's a
1285
+ * winner compared to cheapest_path for this purpose.
1286
+ *
1287
+ * These path variables are dummies that just hold cost fields; we don't
1288
+ * make actual Paths for these steps.
1289
+ */
1290
+ cost_agg (& hashed_p , parse , AGG_HASHED , agg_counts -> numAggs ,
1291
+ numGroupCols , dNumGroups ,
1292
+ cheapest_path -> startup_cost , cheapest_path -> total_cost ,
1293
+ cheapest_path_rows );
1294
+ /* Result of hashed agg is always unsorted */
1295
+ if (sort_pathkeys )
1296
+ cost_sort (& hashed_p , parse , sort_pathkeys , hashed_p .total_cost ,
1297
+ dNumGroups , cheapest_path_width );
1298
+
1299
+ if (sorted_path )
1300
+ {
1301
+ sorted_p .startup_cost = sorted_path -> startup_cost ;
1302
+ sorted_p .total_cost = sorted_path -> total_cost ;
1303
+ current_pathkeys = sorted_path -> pathkeys ;
1304
+ }
1305
+ else
1306
+ {
1307
+ sorted_p .startup_cost = cheapest_path -> startup_cost ;
1308
+ sorted_p .total_cost = cheapest_path -> total_cost ;
1309
+ current_pathkeys = cheapest_path -> pathkeys ;
1310
+ }
1311
+ if (!pathkeys_contained_in (group_pathkeys ,
1312
+ current_pathkeys ))
1313
+ {
1314
+ cost_sort (& sorted_p , parse , group_pathkeys , sorted_p .total_cost ,
1315
+ cheapest_path_rows , cheapest_path_width );
1316
+ current_pathkeys = group_pathkeys ;
1317
+ }
1318
+
1319
+ if (parse -> hasAggs )
1320
+ cost_agg (& sorted_p , parse , AGG_SORTED , agg_counts -> numAggs ,
1321
+ numGroupCols , dNumGroups ,
1322
+ sorted_p .startup_cost , sorted_p .total_cost ,
1323
+ cheapest_path_rows );
1324
+ else
1325
+ cost_group (& sorted_p , parse , numGroupCols , dNumGroups ,
1326
+ sorted_p .startup_cost , sorted_p .total_cost ,
1327
+ cheapest_path_rows );
1328
+ /* The Agg or Group node will preserve ordering */
1329
+ if (sort_pathkeys &&
1330
+ !pathkeys_contained_in (sort_pathkeys , current_pathkeys ))
1331
+ cost_sort (& sorted_p , parse , sort_pathkeys , sorted_p .total_cost ,
1332
+ dNumGroups , cheapest_path_width );
1333
+
1334
+ /*
1335
+ * Now make the decision using the top-level tuple fraction. First we
1336
+ * have to convert an absolute count (LIMIT) into fractional form.
1337
+ */
1338
+ if (tuple_fraction >= 1.0 )
1339
+ tuple_fraction /= dNumGroups ;
1340
+
1341
+ if (compare_fractional_path_costs (& hashed_p , & sorted_p ,
1342
+ tuple_fraction ) < 0 )
1343
+ {
1344
+ /* Hashed is cheaper, so use it */
1345
+ return true;
1346
+ }
1347
+ return false;
1348
+ }
1349
+
1334
1350
/*
1335
1351
* hash_safe_grouping - are grouping operators hashable?
1336
1352
*
0 commit comments