8
8
*
9
9
*
10
10
* IDENTIFICATION
11
- * $Header: /cvsroot/pgsql/src/backend/executor/nodeHash.c,v 1.71 2002/12/15 16:17:46 tgl Exp $
11
+ * $Header: /cvsroot/pgsql/src/backend/executor/nodeHash.c,v 1.72 2002/12/29 22:28:50 tgl Exp $
12
12
*
13
13
*-------------------------------------------------------------------------
14
14
*/
20
20
*/
21
21
#include "postgres.h"
22
22
23
+ #include <limits.h>
23
24
#include <math.h>
24
25
25
26
#include "access/hash.h"
@@ -344,7 +345,8 @@ ExecChooseHashTableSize(double ntuples, int tupwidth,
344
345
{
345
346
int tupsize ;
346
347
double inner_rel_bytes ;
347
- double hash_table_bytes ;
348
+ long hash_table_bytes ;
349
+ double dtmp ;
348
350
int nbatch ;
349
351
int nbuckets ;
350
352
int totalbuckets ;
@@ -362,20 +364,22 @@ ExecChooseHashTableSize(double ntuples, int tupwidth,
362
364
inner_rel_bytes = ntuples * tupsize * FUDGE_FAC ;
363
365
364
366
/*
365
- * Target hashtable size is SortMem kilobytes, but not less than
366
- * sqrt(estimated inner rel size), so as to avoid horrible
367
- * performance.
367
+ * Target in-memory hashtable size is SortMem kilobytes.
368
368
*/
369
- hash_table_bytes = sqrt (inner_rel_bytes );
370
- if (hash_table_bytes < (SortMem * 1024L ))
371
- hash_table_bytes = SortMem * 1024L ;
369
+ hash_table_bytes = SortMem * 1024L ;
372
370
373
371
/*
374
372
* Count the number of hash buckets we want for the whole relation,
375
373
* for an average bucket load of NTUP_PER_BUCKET (per virtual
376
- * bucket!).
374
+ * bucket!). It has to fit in an int, however.
377
375
*/
378
- totalbuckets = (int ) ceil (ntuples * FUDGE_FAC / NTUP_PER_BUCKET );
376
+ dtmp = ceil (ntuples * FUDGE_FAC / NTUP_PER_BUCKET );
377
+ if (dtmp < INT_MAX )
378
+ totalbuckets = (int ) dtmp ;
379
+ else
380
+ totalbuckets = INT_MAX ;
381
+ if (totalbuckets <= 0 )
382
+ totalbuckets = 1 ;
379
383
380
384
/*
381
385
* Count the number of buckets we think will actually fit in the
@@ -409,10 +413,16 @@ ExecChooseHashTableSize(double ntuples, int tupwidth,
409
413
* that nbatch doesn't have to have anything to do with the ratio
410
414
* totalbuckets/nbuckets; in fact, it is the number of groups we
411
415
* will use for the part of the data that doesn't fall into the
412
- * first nbuckets hash buckets.
416
+ * first nbuckets hash buckets. We try to set it to make all the
417
+ * batches the same size. But we have to keep nbatch small
418
+ * enough to avoid integer overflow in ExecHashJoinGetBatch().
413
419
*/
414
- nbatch = (int ) ceil ((inner_rel_bytes - hash_table_bytes ) /
415
- hash_table_bytes );
420
+ dtmp = ceil ((inner_rel_bytes - hash_table_bytes ) /
421
+ hash_table_bytes );
422
+ if (dtmp < INT_MAX / totalbuckets )
423
+ nbatch = (int ) dtmp ;
424
+ else
425
+ nbatch = INT_MAX / totalbuckets ;
416
426
if (nbatch <= 0 )
417
427
nbatch = 1 ;
418
428
}
0 commit comments