54
54
* Portions Copyright (c) 1994, Regents of the University of California
55
55
*
56
56
* IDENTIFICATION
57
- * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.181 2007/04/21 21:01 :44 tgl Exp $
57
+ * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.182 2007/05/04 01:13 :44 tgl Exp $
58
58
*
59
59
*-------------------------------------------------------------------------
60
60
*/
@@ -922,6 +922,10 @@ cost_valuesscan(Path *path, PlannerInfo *root, RelOptInfo *baserel)
922
922
* disk traffic = 2 * relsize * ceil(logM(p / (2*work_mem)))
923
923
* cpu = comparison_cost * t * log2(t)
924
924
*
925
+ * If the sort is bounded (i.e., only the first k result tuples are needed)
926
+ * and k tuples can fit into work_mem, we use a heap method that keeps only
927
+ * k tuples in the heap; this will require about t*log2(k) tuple comparisons.
928
+ *
925
929
* The disk traffic is assumed to be 3/4ths sequential and 1/4th random
926
930
* accesses (XXX can't we refine that guess?)
927
931
*
@@ -932,6 +936,7 @@ cost_valuesscan(Path *path, PlannerInfo *root, RelOptInfo *baserel)
932
936
* 'input_cost' is the total cost for reading the input data
933
937
* 'tuples' is the number of tuples in the relation
934
938
* 'width' is the average tuple width in bytes
939
+ * 'limit_tuples' is the bound on the number of output tuples; -1 if no bound
935
940
*
936
941
* NOTE: some callers currently pass NIL for pathkeys because they
937
942
* can't conveniently supply the sort keys. Since this routine doesn't
@@ -942,11 +947,14 @@ cost_valuesscan(Path *path, PlannerInfo *root, RelOptInfo *baserel)
942
947
*/
943
948
void
944
949
cost_sort (Path * path , PlannerInfo * root ,
945
- List * pathkeys , Cost input_cost , double tuples , int width )
950
+ List * pathkeys , Cost input_cost , double tuples , int width ,
951
+ double limit_tuples )
946
952
{
947
953
Cost startup_cost = input_cost ;
948
954
Cost run_cost = 0 ;
949
- double nbytes = relation_byte_size (tuples , width );
955
+ double input_bytes = relation_byte_size (tuples , width );
956
+ double output_bytes ;
957
+ double output_tuples ;
950
958
long work_mem_bytes = work_mem * 1024L ;
951
959
952
960
if (!enable_sort )
@@ -959,23 +967,39 @@ cost_sort(Path *path, PlannerInfo *root,
959
967
if (tuples < 2.0 )
960
968
tuples = 2.0 ;
961
969
962
- /*
963
- * CPU costs
964
- *
965
- * Assume about two operator evals per tuple comparison and N log2 N
966
- * comparisons
967
- */
968
- startup_cost += 2.0 * cpu_operator_cost * tuples * LOG2 (tuples );
970
+ /* Do we have a useful LIMIT? */
971
+ if (limit_tuples > 0 && limit_tuples < tuples )
972
+ {
973
+ output_tuples = limit_tuples ;
974
+ output_bytes = relation_byte_size (output_tuples , width );
975
+ }
976
+ else
977
+ {
978
+ output_tuples = tuples ;
979
+ output_bytes = input_bytes ;
980
+ }
969
981
970
- /* disk costs */
971
- if (nbytes > work_mem_bytes )
982
+ if (output_bytes > work_mem_bytes )
972
983
{
973
- double npages = ceil (nbytes / BLCKSZ );
974
- double nruns = (nbytes / work_mem_bytes ) * 0.5 ;
984
+ /*
985
+ * We'll have to use a disk-based sort of all the tuples
986
+ */
987
+ double npages = ceil (input_bytes / BLCKSZ );
988
+ double nruns = (input_bytes / work_mem_bytes ) * 0.5 ;
975
989
double mergeorder = tuplesort_merge_order (work_mem_bytes );
976
990
double log_runs ;
977
991
double npageaccesses ;
978
992
993
+ /*
994
+ * CPU costs
995
+ *
996
+ * Assume about two operator evals per tuple comparison and N log2 N
997
+ * comparisons
998
+ */
999
+ startup_cost += 2.0 * cpu_operator_cost * tuples * LOG2 (tuples );
1000
+
1001
+ /* Disk costs */
1002
+
979
1003
/* Compute logM(r) as log(r) / log(M) */
980
1004
if (nruns > mergeorder )
981
1005
log_runs = ceil (log (nruns ) / log (mergeorder ));
@@ -986,10 +1010,27 @@ cost_sort(Path *path, PlannerInfo *root,
986
1010
startup_cost += npageaccesses *
987
1011
(seq_page_cost * 0.75 + random_page_cost * 0.25 );
988
1012
}
1013
+ else if (tuples > 2 * output_tuples || input_bytes > work_mem_bytes )
1014
+ {
1015
+ /*
1016
+ * We'll use a bounded heap-sort keeping just K tuples in memory,
1017
+ * for a total number of tuple comparisons of N log2 K; but the
1018
+ * constant factor is a bit higher than for quicksort. Tweak it
1019
+ * so that the cost curve is continuous at the crossover point.
1020
+ */
1021
+ startup_cost += 2.0 * cpu_operator_cost * tuples * LOG2 (2.0 * output_tuples );
1022
+ }
1023
+ else
1024
+ {
1025
+ /* We'll use plain quicksort on all the input tuples */
1026
+ startup_cost += 2.0 * cpu_operator_cost * tuples * LOG2 (tuples );
1027
+ }
989
1028
990
1029
/*
991
1030
* Also charge a small amount (arbitrarily set equal to operator cost) per
992
- * extracted tuple.
1031
+ * extracted tuple. Note it's correct to use tuples not output_tuples
1032
+ * here --- the upper LIMIT will pro-rate the run cost so we'd be double
1033
+ * counting the LIMIT otherwise.
993
1034
*/
994
1035
run_cost += cpu_operator_cost * tuples ;
995
1036
@@ -1431,7 +1472,8 @@ cost_mergejoin(MergePath *path, PlannerInfo *root)
1431
1472
outersortkeys ,
1432
1473
outer_path -> total_cost ,
1433
1474
outer_path_rows ,
1434
- outer_path -> parent -> width );
1475
+ outer_path -> parent -> width ,
1476
+ -1.0 );
1435
1477
startup_cost += sort_path .startup_cost ;
1436
1478
run_cost += (sort_path .total_cost - sort_path .startup_cost )
1437
1479
* outerscansel ;
@@ -1450,7 +1492,8 @@ cost_mergejoin(MergePath *path, PlannerInfo *root)
1450
1492
innersortkeys ,
1451
1493
inner_path -> total_cost ,
1452
1494
inner_path_rows ,
1453
- inner_path -> parent -> width );
1495
+ inner_path -> parent -> width ,
1496
+ -1.0 );
1454
1497
startup_cost += sort_path .startup_cost ;
1455
1498
run_cost += (sort_path .total_cost - sort_path .startup_cost )
1456
1499
* innerscansel * rescanratio ;
0 commit comments