Skip to content

Commit 07f6913

Browse files
committed
Fix building of large (bigger than shared_buffers) hash indexes.
When the index is predicted to need more than NBuffers buckets, CREATE INDEX attempts to sort the index entries by hash key before insertion, so as to reduce thrashing. This code path got broken by commit 9f03ca9, which overlooked that _hash_form_tuple() is not just an alias for index_form_tuple(). The index got built anyway, but with garbage data, so that searches for pre-existing tuples always failed. Fix by refactoring to separate construction of the indexable data from calling index_form_tuple(). Per bug #14210 from Daniel Newman. Back-patch to 9.5 where the bug was introduced. Report: <20160623162507.17237.39471@wrigleys.postgresql.org>
1 parent b4e6123 commit 07f6913

File tree

3 files changed

+48
-35
lines changed

3 files changed

+48
-35
lines changed

src/backend/access/hash/hash.c

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -140,19 +140,25 @@ hashbuildCallback(Relation index,
140140
void *state)
141141
{
142142
HashBuildState *buildstate = (HashBuildState *) state;
143+
Datum index_values[1];
144+
bool index_isnull[1];
143145
IndexTuple itup;
144146

145-
/* Hash indexes don't index nulls, see notes in hashinsert */
146-
if (isnull[0])
147+
/* convert data to a hash key; on failure, do not insert anything */
148+
if (!_hash_convert_tuple(index,
149+
values, isnull,
150+
index_values, index_isnull))
147151
return;
148152

149153
/* Either spool the tuple for sorting, or just put it into the index */
150154
if (buildstate->spool)
151-
_h_spool(buildstate->spool, &htup->t_self, values, isnull);
155+
_h_spool(buildstate->spool, &htup->t_self,
156+
index_values, index_isnull);
152157
else
153158
{
154159
/* form an index tuple and point it at the heap tuple */
155-
itup = _hash_form_tuple(index, values, isnull);
160+
itup = index_form_tuple(RelationGetDescr(index),
161+
index_values, index_isnull);
156162
itup->t_tid = htup->t_self;
157163
_hash_doinsert(index, itup);
158164
pfree(itup);
@@ -179,22 +185,18 @@ hashinsert(PG_FUNCTION_ARGS)
179185
Relation heapRel = (Relation) PG_GETARG_POINTER(4);
180186
IndexUniqueCheck checkUnique = (IndexUniqueCheck) PG_GETARG_INT32(5);
181187
#endif
188+
Datum index_values[1];
189+
bool index_isnull[1];
182190
IndexTuple itup;
183191

184-
/*
185-
* If the single index key is null, we don't insert it into the index.
186-
* Hash tables support scans on '='. Relational algebra says that A = B
187-
* returns null if either A or B is null. This means that no
188-
* qualification used in an index scan could ever return true on a null
189-
* attribute. It also means that indices can't be used by ISNULL or
190-
* NOTNULL scans, but that's an artifact of the strategy map architecture
191-
* chosen in 1986, not of the way nulls are handled here.
192-
*/
193-
if (isnull[0])
192+
/* convert data to a hash key; on failure, do not insert anything */
193+
if (!_hash_convert_tuple(rel,
194+
values, isnull,
195+
index_values, index_isnull))
194196
PG_RETURN_BOOL(false);
195197

196-
/* generate an index tuple */
197-
itup = _hash_form_tuple(rel, values, isnull);
198+
/* form an index tuple and point it at the heap tuple */
199+
itup = index_form_tuple(RelationGetDescr(rel), index_values, index_isnull);
198200
itup->t_tid = *ht_ctid;
199201

200202
_hash_doinsert(rel, itup);

src/backend/access/hash/hashutil.c

Lines changed: 27 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -248,27 +248,37 @@ _hash_get_indextuple_hashkey(IndexTuple itup)
248248
}
249249

250250
/*
251-
* _hash_form_tuple - form an index tuple containing hash code only
251+
* _hash_convert_tuple - convert raw index data to hash key
252+
*
253+
* Inputs: values and isnull arrays for the user data column(s)
254+
* Outputs: values and isnull arrays for the index tuple, suitable for
255+
* passing to index_form_tuple().
256+
*
257+
* Returns true if successful, false if not (because there are null values).
258+
* On a false result, the given data need not be indexed.
259+
*
260+
* Note: callers know that the index-column arrays are always of length 1.
261+
* In principle, there could be more than one input column, though we do not
262+
* currently support that.
252263
*/
253-
IndexTuple
254-
_hash_form_tuple(Relation index, Datum *values, bool *isnull)
264+
bool
265+
_hash_convert_tuple(Relation index,
266+
Datum *user_values, bool *user_isnull,
267+
Datum *index_values, bool *index_isnull)
255268
{
256-
IndexTuple itup;
257269
uint32 hashkey;
258-
Datum hashkeydatum;
259-
TupleDesc hashdesc;
260270

261-
if (isnull[0])
262-
hashkeydatum = (Datum) 0;
263-
else
264-
{
265-
hashkey = _hash_datum2hashkey(index, values[0]);
266-
hashkeydatum = UInt32GetDatum(hashkey);
267-
}
268-
hashdesc = RelationGetDescr(index);
269-
Assert(hashdesc->natts == 1);
270-
itup = index_form_tuple(hashdesc, &hashkeydatum, isnull);
271-
return itup;
271+
/*
272+
* We do not insert null values into hash indexes. This is okay because
273+
* the only supported search operator is '=', and we assume it is strict.
274+
*/
275+
if (user_isnull[0])
276+
return false;
277+
278+
hashkey = _hash_datum2hashkey(index, user_values[0]);
279+
index_values[0] = UInt32GetDatum(hashkey);
280+
index_isnull[0] = false;
281+
return true;
272282
}
273283

274284
/*

src/include/access/hash.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -350,8 +350,9 @@ extern Bucket _hash_hashkey2bucket(uint32 hashkey, uint32 maxbucket,
350350
extern uint32 _hash_log2(uint32 num);
351351
extern void _hash_checkpage(Relation rel, Buffer buf, int flags);
352352
extern uint32 _hash_get_indextuple_hashkey(IndexTuple itup);
353-
extern IndexTuple _hash_form_tuple(Relation index,
354-
Datum *values, bool *isnull);
353+
extern bool _hash_convert_tuple(Relation index,
354+
Datum *user_values, bool *user_isnull,
355+
Datum *index_values, bool *index_isnull);
355356
extern OffsetNumber _hash_binsearch(Page page, uint32 hash_value);
356357
extern OffsetNumber _hash_binsearch_last(Page page, uint32 hash_value);
357358

0 commit comments

Comments
 (0)