Skip to content

Commit ca5b42d

Browse files
committed
Fix some issues in new hashtable size calculations in nodeHash.c.
Limit the size of the hashtable pointer array to not more than MaxAllocSize, per reports from Kouhei Kaigai and others of "invalid memory alloc request size" failures. There was discussion of allowing the array to get larger than that by using the "huge" palloc API, but so far no proof that that is actually a good idea, and at this point in the 9.5 cycle major changes from old behavior don't seem like the way to go. Fix a rather serious secondary bug in the new code, which was that it didn't ensure nbuckets remained a power of 2 when recomputing it for the multiple-batch case. Clean up sloppy division of labor between ExecHashIncreaseNumBuckets and its sole call site.
1 parent 544ccf6 commit ca5b42d

File tree

2 files changed

+31
-31
lines changed

2 files changed

+31
-31
lines changed

src/backend/executor/nodeHash.c

+30-30
Original file line numberDiff line numberDiff line change
@@ -131,17 +131,7 @@ MultiExecHash(HashState *node)
131131

132132
/* resize the hash table if needed (NTUP_PER_BUCKET exceeded) */
133133
if (hashtable->nbuckets != hashtable->nbuckets_optimal)
134-
{
135-
/* We never decrease the number of buckets. */
136-
Assert(hashtable->nbuckets_optimal > hashtable->nbuckets);
137-
138-
#ifdef HJDEBUG
139-
printf("Increasing nbuckets %d => %d\n",
140-
hashtable->nbuckets, hashtable->nbuckets_optimal);
141-
#endif
142-
143134
ExecHashIncreaseNumBuckets(hashtable);
144-
}
145135

146136
/* Account for the buckets in spaceUsed (reported in EXPLAIN ANALYZE) */
147137
hashtable->spaceUsed += hashtable->nbuckets * sizeof(HashJoinTuple);
@@ -486,23 +476,31 @@ ExecChooseHashTableSize(double ntuples, int tupwidth, bool useskew,
486476

487477
/*
488478
* Set nbuckets to achieve an average bucket load of NTUP_PER_BUCKET when
489-
* memory is filled, assuming a single batch. The Min() step limits the
490-
* results so that the pointer arrays we'll try to allocate do not exceed
491-
* work_mem.
479+
* memory is filled, assuming a single batch; but limit the value so that
480+
* the pointer arrays we'll try to allocate do not exceed work_mem nor
481+
* MaxAllocSize.
482+
*
483+
* Note that both nbuckets and nbatch must be powers of 2 to make
484+
* ExecHashGetBucketAndBatch fast.
492485
*/
493-
max_pointers = (work_mem * 1024L) / sizeof(void *);
486+
max_pointers = (work_mem * 1024L) / sizeof(HashJoinTuple);
487+
max_pointers = Min(max_pointers, MaxAllocSize / sizeof(HashJoinTuple));
494488
/* also ensure we avoid integer overflow in nbatch and nbuckets */
489+
/* (this step is redundant given the current value of MaxAllocSize) */
495490
max_pointers = Min(max_pointers, INT_MAX / 2);
491+
496492
dbuckets = ceil(ntuples / NTUP_PER_BUCKET);
497493
dbuckets = Min(dbuckets, max_pointers);
494+
/* don't let nbuckets be really small, though ... */
498495
nbuckets = Max((int) dbuckets, 1024);
496+
/* ... and force it to be a power of 2. */
499497
nbuckets = 1 << my_log2(nbuckets);
500-
bucket_bytes = sizeof(HashJoinTuple) * nbuckets;
501498

502499
/*
503500
* If there's not enough space to store the projected number of tuples and
504501
* the required bucket headers, we will need multiple batches.
505502
*/
503+
bucket_bytes = sizeof(HashJoinTuple) * nbuckets;
506504
if (inner_rel_bytes + bucket_bytes > hash_table_bytes)
507505
{
508506
/* We'll need multiple batches */
@@ -521,6 +519,7 @@ ExecChooseHashTableSize(double ntuples, int tupwidth, bool useskew,
521519
lbuckets = 1L << my_log2(hash_table_bytes / bucket_size);
522520
lbuckets = Min(lbuckets, max_pointers);
523521
nbuckets = (int) lbuckets;
522+
nbuckets = 1 << my_log2(nbuckets);
524523
bucket_bytes = nbuckets * sizeof(HashJoinTuple);
525524

526525
/*
@@ -760,21 +759,18 @@ ExecHashIncreaseNumBuckets(HashJoinTable hashtable)
760759
if (hashtable->nbuckets >= hashtable->nbuckets_optimal)
761760
return;
762761

763-
/*
764-
* We already know the optimal number of buckets, so let's just compute
765-
* the log2_nbuckets for it.
766-
*/
762+
#ifdef HJDEBUG
763+
printf("Increasing nbuckets %d => %d\n",
764+
hashtable->nbuckets, hashtable->nbuckets_optimal);
765+
#endif
766+
767767
hashtable->nbuckets = hashtable->nbuckets_optimal;
768-
hashtable->log2_nbuckets = my_log2(hashtable->nbuckets_optimal);
768+
hashtable->log2_nbuckets = hashtable->log2_nbuckets_optimal;
769769

770770
Assert(hashtable->nbuckets > 1);
771771
Assert(hashtable->nbuckets <= (INT_MAX / 2));
772772
Assert(hashtable->nbuckets == (1 << hashtable->log2_nbuckets));
773773

774-
#ifdef HJDEBUG
775-
printf("Increasing nbuckets to %d\n", hashtable->nbuckets);
776-
#endif
777-
778774
/*
779775
* Just reallocate the proper number of buckets - we don't need to walk
780776
* through them - we can walk the dense-allocated chunks (just like in
@@ -785,7 +781,7 @@ ExecHashIncreaseNumBuckets(HashJoinTable hashtable)
785781
(HashJoinTuple *) repalloc(hashtable->buckets,
786782
hashtable->nbuckets * sizeof(HashJoinTuple));
787783

788-
memset(hashtable->buckets, 0, sizeof(void *) * hashtable->nbuckets);
784+
memset(hashtable->buckets, 0, hashtable->nbuckets * sizeof(HashJoinTuple));
789785

790786
/* scan through all tuples in all chunks to rebuild the hash table */
791787
for (chunk = hashtable->chunks; chunk != NULL; chunk = chunk->next)
@@ -878,12 +874,16 @@ ExecHashTableInsert(HashJoinTable hashtable,
878874
* NTUP_PER_BUCKET threshold, but only when there's still a single
879875
* batch.
880876
*/
881-
if ((hashtable->nbatch == 1) &&
882-
(hashtable->nbuckets_optimal <= INT_MAX / 2) && /* overflow protection */
883-
(ntuples >= (hashtable->nbuckets_optimal * NTUP_PER_BUCKET)))
877+
if (hashtable->nbatch == 1 &&
878+
ntuples > (hashtable->nbuckets_optimal * NTUP_PER_BUCKET))
884879
{
885-
hashtable->nbuckets_optimal *= 2;
886-
hashtable->log2_nbuckets_optimal += 1;
880+
/* Guard against integer overflow and alloc size overflow */
881+
if (hashtable->nbuckets_optimal <= INT_MAX / 2 &&
882+
hashtable->nbuckets_optimal * 2 <= MaxAllocSize / sizeof(HashJoinTuple))
883+
{
884+
hashtable->nbuckets_optimal *= 2;
885+
hashtable->log2_nbuckets_optimal += 1;
886+
}
887887
}
888888

889889
/* Account for space used, and back off if we've used too much */

src/include/executor/hashjoin.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ typedef struct HashJoinTableData
131131
int nbuckets_original; /* # buckets when starting the first
132132
* hash */
133133
int nbuckets_optimal; /* optimal # buckets (per batch) */
134-
int log2_nbuckets_optimal; /* same as log2_nbuckets optimal */
134+
int log2_nbuckets_optimal; /* log2(nbuckets_optimal) */
135135

136136
/* buckets[i] is head of list of tuples in i'th in-memory bucket */
137137
struct HashJoinTupleData **buckets;

0 commit comments

Comments
 (0)