@@ -131,17 +131,7 @@ MultiExecHash(HashState *node)
131
131
132
132
/* resize the hash table if needed (NTUP_PER_BUCKET exceeded) */
133
133
if (hashtable -> nbuckets != hashtable -> nbuckets_optimal )
134
- {
135
- /* We never decrease the number of buckets. */
136
- Assert (hashtable -> nbuckets_optimal > hashtable -> nbuckets );
137
-
138
- #ifdef HJDEBUG
139
- printf ("Increasing nbuckets %d => %d\n" ,
140
- hashtable -> nbuckets , hashtable -> nbuckets_optimal );
141
- #endif
142
-
143
134
ExecHashIncreaseNumBuckets (hashtable );
144
- }
145
135
146
136
/* Account for the buckets in spaceUsed (reported in EXPLAIN ANALYZE) */
147
137
hashtable -> spaceUsed += hashtable -> nbuckets * sizeof (HashJoinTuple );
@@ -486,23 +476,31 @@ ExecChooseHashTableSize(double ntuples, int tupwidth, bool useskew,
486
476
487
477
/*
488
478
* Set nbuckets to achieve an average bucket load of NTUP_PER_BUCKET when
489
- * memory is filled, assuming a single batch. The Min() step limits the
490
- * results so that the pointer arrays we'll try to allocate do not exceed
491
- * work_mem.
479
+ * memory is filled, assuming a single batch; but limit the value so that
480
+ * the pointer arrays we'll try to allocate do not exceed work_mem nor
481
+ * MaxAllocSize.
482
+ *
483
+ * Note that both nbuckets and nbatch must be powers of 2 to make
484
+ * ExecHashGetBucketAndBatch fast.
492
485
*/
493
- max_pointers = (work_mem * 1024L ) / sizeof (void * );
486
+ max_pointers = (work_mem * 1024L ) / sizeof (HashJoinTuple );
487
+ max_pointers = Min (max_pointers , MaxAllocSize / sizeof (HashJoinTuple ));
494
488
/* also ensure we avoid integer overflow in nbatch and nbuckets */
489
+ /* (this step is redundant given the current value of MaxAllocSize) */
495
490
max_pointers = Min (max_pointers , INT_MAX / 2 );
491
+
496
492
dbuckets = ceil (ntuples / NTUP_PER_BUCKET );
497
493
dbuckets = Min (dbuckets , max_pointers );
494
+ /* don't let nbuckets be really small, though ... */
498
495
nbuckets = Max ((int ) dbuckets , 1024 );
496
+ /* ... and force it to be a power of 2. */
499
497
nbuckets = 1 << my_log2 (nbuckets );
500
- bucket_bytes = sizeof (HashJoinTuple ) * nbuckets ;
501
498
502
499
/*
503
500
* If there's not enough space to store the projected number of tuples and
504
501
* the required bucket headers, we will need multiple batches.
505
502
*/
503
+ bucket_bytes = sizeof (HashJoinTuple ) * nbuckets ;
506
504
if (inner_rel_bytes + bucket_bytes > hash_table_bytes )
507
505
{
508
506
/* We'll need multiple batches */
@@ -521,6 +519,7 @@ ExecChooseHashTableSize(double ntuples, int tupwidth, bool useskew,
521
519
lbuckets = 1L << my_log2 (hash_table_bytes / bucket_size );
522
520
lbuckets = Min (lbuckets , max_pointers );
523
521
nbuckets = (int ) lbuckets ;
522
+ nbuckets = 1 << my_log2 (nbuckets );
524
523
bucket_bytes = nbuckets * sizeof (HashJoinTuple );
525
524
526
525
/*
@@ -760,21 +759,18 @@ ExecHashIncreaseNumBuckets(HashJoinTable hashtable)
760
759
if (hashtable -> nbuckets >= hashtable -> nbuckets_optimal )
761
760
return ;
762
761
763
- /*
764
- * We already know the optimal number of buckets, so let's just compute
765
- * the log2_nbuckets for it.
766
- */
762
+ #ifdef HJDEBUG
763
+ printf ("Increasing nbuckets %d => %d\n" ,
764
+ hashtable -> nbuckets , hashtable -> nbuckets_optimal );
765
+ #endif
766
+
767
767
hashtable -> nbuckets = hashtable -> nbuckets_optimal ;
768
- hashtable -> log2_nbuckets = my_log2 ( hashtable -> nbuckets_optimal ) ;
768
+ hashtable -> log2_nbuckets = hashtable -> log2_nbuckets_optimal ;
769
769
770
770
Assert (hashtable -> nbuckets > 1 );
771
771
Assert (hashtable -> nbuckets <= (INT_MAX / 2 ));
772
772
Assert (hashtable -> nbuckets == (1 << hashtable -> log2_nbuckets ));
773
773
774
- #ifdef HJDEBUG
775
- printf ("Increasing nbuckets to %d\n" , hashtable -> nbuckets );
776
- #endif
777
-
778
774
/*
779
775
* Just reallocate the proper number of buckets - we don't need to walk
780
776
* through them - we can walk the dense-allocated chunks (just like in
@@ -785,7 +781,7 @@ ExecHashIncreaseNumBuckets(HashJoinTable hashtable)
785
781
(HashJoinTuple * ) repalloc (hashtable -> buckets ,
786
782
hashtable -> nbuckets * sizeof (HashJoinTuple ));
787
783
788
- memset (hashtable -> buckets , 0 , sizeof ( void * ) * hashtable -> nbuckets );
784
+ memset (hashtable -> buckets , 0 , hashtable -> nbuckets * sizeof ( HashJoinTuple ) );
789
785
790
786
/* scan through all tuples in all chunks to rebuild the hash table */
791
787
for (chunk = hashtable -> chunks ; chunk != NULL ; chunk = chunk -> next )
@@ -878,12 +874,16 @@ ExecHashTableInsert(HashJoinTable hashtable,
878
874
* NTUP_PER_BUCKET threshold, but only when there's still a single
879
875
* batch.
880
876
*/
881
- if ((hashtable -> nbatch == 1 ) &&
882
- (hashtable -> nbuckets_optimal <= INT_MAX / 2 ) && /* overflow protection */
883
- (ntuples >= (hashtable -> nbuckets_optimal * NTUP_PER_BUCKET )))
877
+ if (hashtable -> nbatch == 1 &&
878
+ ntuples > (hashtable -> nbuckets_optimal * NTUP_PER_BUCKET ))
884
879
{
885
- hashtable -> nbuckets_optimal *= 2 ;
886
- hashtable -> log2_nbuckets_optimal += 1 ;
880
+ /* Guard against integer overflow and alloc size overflow */
881
+ if (hashtable -> nbuckets_optimal <= INT_MAX / 2 &&
882
+ hashtable -> nbuckets_optimal * 2 <= MaxAllocSize / sizeof (HashJoinTuple ))
883
+ {
884
+ hashtable -> nbuckets_optimal *= 2 ;
885
+ hashtable -> log2_nbuckets_optimal += 1 ;
886
+ }
887
887
}
888
888
889
889
/* Account for space used, and back off if we've used too much */
0 commit comments