Skip to content

Commit 4adc2f7

Browse files
committed
Change hash indexes to store only the hash code rather than the whole indexed
value. This means that hash index lookups are always lossy and have to be rechecked when the heap is visited; however, the gain in index compactness outweighs this when the indexed values are wide. Also, we only need to perform datatype comparisons when the hash codes match exactly, rather than for every entry in the hash bucket; so it could also win for datatypes that have expensive comparison functions. A small additional win is gained by keeping hash index pages sorted by hash code and using binary search to reduce the number of index tuples we have to look at. Xiao Meng This commit also incorporates Zdenek Kotala's patch to isolate hash metapages and hash bitmaps a bit better from the page header datastructures.
1 parent 440b338 commit 4adc2f7

File tree

13 files changed

+313
-129
lines changed

13 files changed

+313
-129
lines changed

doc/src/sgml/catalogs.sgml

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
<!-- $PostgreSQL: pgsql/doc/src/sgml/catalogs.sgml,v 2.173 2008/09/10 18:09:19 alvherre Exp $ -->
1+
<!-- $PostgreSQL: pgsql/doc/src/sgml/catalogs.sgml,v 2.174 2008/09/15 18:43:41 tgl Exp $ -->
22
<!--
33
Documentation of the system catalogs, directed toward PostgreSQL developers
44
-->
@@ -451,6 +451,13 @@
451451
<entry>Can an index of this type be clustered on?</entry>
452452
</row>
453453

454+
<row>
455+
<entry><structfield>amkeytype</structfield></entry>
456+
<entry><type>oid</type></entry>
457+
<entry><literal><link linkend="catalog-pg-type"><structname>pg_type</structname></link>.oid</literal></entry>
458+
<entry>Type of data stored in index, or zero if not a fixed type</entry>
459+
</row>
460+
454461
<row>
455462
<entry><structfield>aminsert</structfield></entry>
456463
<entry><type>regproc</type></entry>
@@ -6424,7 +6431,7 @@
64246431
<row>
64256432
<entry><structfield>sourceline</structfield></entry>
64266433
<entry><type>text</type></entry>
6427-
<entry>Line number within the sourcefile the current value was set
6434+
<entry>Line number within the sourcefile the current value was set
64286435
from (NULL for values set in sources other than configuration files)
64296436
</entry>
64306437
</row>

src/backend/access/hash/hash.c

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/access/hash/hash.c,v 1.104 2008/06/19 00:46:03 alvherre Exp $
11+
* $PostgreSQL: pgsql/src/backend/access/hash/hash.c,v 1.105 2008/09/15 18:43:41 tgl Exp $
1212
*
1313
* NOTES
1414
* This file contains only the public interface routines.
@@ -79,12 +79,12 @@ hashbuild(PG_FUNCTION_ARGS)
7979
* then we'll thrash horribly. To prevent that scenario, we can sort the
8080
* tuples by (expected) bucket number. However, such a sort is useless
8181
* overhead when the index does fit in RAM. We choose to sort if the
82-
* initial index size exceeds effective_cache_size.
82+
* initial index size exceeds NBuffers.
8383
*
8484
* NOTE: this test will need adjustment if a bucket is ever different
8585
* from one page.
8686
*/
87-
if (num_buckets >= (uint32) effective_cache_size)
87+
if (num_buckets >= (uint32) NBuffers)
8888
buildstate.spool = _h_spoolinit(index, num_buckets);
8989
else
9090
buildstate.spool = NULL;
@@ -129,7 +129,7 @@ hashbuildCallback(Relation index,
129129
IndexTuple itup;
130130

131131
/* form an index tuple and point it at the heap tuple */
132-
itup = index_form_tuple(RelationGetDescr(index), values, isnull);
132+
itup = _hash_form_tuple(index, values, isnull);
133133
itup->t_tid = htup->t_self;
134134

135135
/* Hash indexes don't index nulls, see notes in hashinsert */
@@ -153,8 +153,8 @@ hashbuildCallback(Relation index,
153153
/*
154154
* hashinsert() -- insert an index tuple into a hash table.
155155
*
156-
* Hash on the index tuple's key, find the appropriate location
157-
* for the new tuple, and put it there.
156+
* Hash on the heap tuple's key, form an index tuple with hash code.
157+
* Find the appropriate location for the new tuple, and put it there.
158158
*/
159159
Datum
160160
hashinsert(PG_FUNCTION_ARGS)
@@ -171,7 +171,7 @@ hashinsert(PG_FUNCTION_ARGS)
171171
IndexTuple itup;
172172

173173
/* generate an index tuple */
174-
itup = index_form_tuple(RelationGetDescr(rel), values, isnull);
174+
itup = _hash_form_tuple(rel, values, isnull);
175175
itup->t_tid = *ht_ctid;
176176

177177
/*
@@ -211,8 +211,8 @@ hashgettuple(PG_FUNCTION_ARGS)
211211
OffsetNumber offnum;
212212
bool res;
213213

214-
/* Hash indexes are never lossy (at the moment anyway) */
215-
scan->xs_recheck = false;
214+
/* Hash indexes are always lossy since we store only the hash code */
215+
scan->xs_recheck = true;
216216

217217
/*
218218
* We hold pin but not lock on current buffer while outside the hash AM.
@@ -317,7 +317,8 @@ hashgetbitmap(PG_FUNCTION_ARGS)
317317
/* Save tuple ID, and continue scanning */
318318
if (add_tuple)
319319
{
320-
tbm_add_tuples(tbm, &scan->xs_ctup.t_self, 1, false);
320+
/* Note we mark the tuple ID as requiring recheck */
321+
tbm_add_tuples(tbm, &scan->xs_ctup.t_self, 1, true);
321322
ntids++;
322323
}
323324

@@ -527,7 +528,7 @@ hashbulkdelete(PG_FUNCTION_ARGS)
527528
* each bucket.
528529
*/
529530
metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ, LH_META_PAGE);
530-
metap = (HashMetaPage) BufferGetPage(metabuf);
531+
metap = HashPageGetMeta(BufferGetPage(metabuf));
531532
orig_maxbucket = metap->hashm_maxbucket;
532533
orig_ntuples = metap->hashm_ntuples;
533534
memcpy(&local_metapage, metap, sizeof(local_metapage));
@@ -629,7 +630,7 @@ hashbulkdelete(PG_FUNCTION_ARGS)
629630

630631
/* Write-lock metapage and check for split since we started */
631632
metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_WRITE, LH_META_PAGE);
632-
metap = (HashMetaPage) BufferGetPage(metabuf);
633+
metap = HashPageGetMeta(BufferGetPage(metabuf));
633634

634635
if (cur_maxbucket != metap->hashm_maxbucket)
635636
{

src/backend/access/hash/hashinsert.c

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/access/hash/hashinsert.c,v 1.50 2008/06/19 00:46:03 alvherre Exp $
11+
* $PostgreSQL: pgsql/src/backend/access/hash/hashinsert.c,v 1.51 2008/09/15 18:43:41 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -43,18 +43,11 @@ _hash_doinsert(Relation rel, IndexTuple itup)
4343
bool do_expand;
4444
uint32 hashkey;
4545
Bucket bucket;
46-
Datum datum;
47-
bool isnull;
4846

4947
/*
50-
* Compute the hash key for the item. We do this first so as not to need
51-
* to hold any locks while running the hash function.
48+
* Get the hash key for the item (it's stored in the index tuple itself).
5249
*/
53-
if (rel->rd_rel->relnatts != 1)
54-
elog(ERROR, "hash indexes support only one index key");
55-
datum = index_getattr(itup, 1, RelationGetDescr(rel), &isnull);
56-
Assert(!isnull);
57-
hashkey = _hash_datum2hashkey(rel, datum);
50+
hashkey = _hash_get_indextuple_hashkey(itup);
5851

5952
/* compute item size too */
6053
itemsz = IndexTupleDSize(*itup);
@@ -69,12 +62,14 @@ _hash_doinsert(Relation rel, IndexTuple itup)
6962

7063
/* Read the metapage */
7164
metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ, LH_META_PAGE);
72-
metap = (HashMetaPage) BufferGetPage(metabuf);
65+
metap = HashPageGetMeta(BufferGetPage(metabuf));
7366

7467
/*
7568
* Check whether the item can fit on a hash page at all. (Eventually, we
7669
* ought to try to apply TOAST methods if not.) Note that at this point,
7770
* itemsz doesn't include the ItemId.
71+
*
72+
* XXX this is useless code if we are only storing hash keys.
7873
*/
7974
if (itemsz > HashMaxItemSize((Page) metap))
8075
ereport(ERROR,
@@ -197,11 +192,15 @@ _hash_pgaddtup(Relation rel,
197192
{
198193
OffsetNumber itup_off;
199194
Page page;
195+
uint32 hashkey;
200196

201197
_hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
202198
page = BufferGetPage(buf);
203199

204-
itup_off = OffsetNumberNext(PageGetMaxOffsetNumber(page));
200+
/* Find where to insert the tuple (preserving page's hashkey ordering) */
201+
hashkey = _hash_get_indextuple_hashkey(itup);
202+
itup_off = _hash_binsearch(page, hashkey);
203+
205204
if (PageAddItem(page, (Item) itup, itemsize, itup_off, false, false)
206205
== InvalidOffsetNumber)
207206
elog(ERROR, "failed to add index item to \"%s\"",

src/backend/access/hash/hashovfl.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/access/hash/hashovfl.c,v 1.64 2008/06/19 00:46:03 alvherre Exp $
11+
* $PostgreSQL: pgsql/src/backend/access/hash/hashovfl.c,v 1.65 2008/09/15 18:43:41 tgl Exp $
1212
*
1313
* NOTES
1414
* Overflow pages look like ordinary relation pages.
@@ -187,7 +187,7 @@ _hash_getovflpage(Relation rel, Buffer metabuf)
187187
_hash_chgbufaccess(rel, metabuf, HASH_NOLOCK, HASH_WRITE);
188188

189189
_hash_checkpage(rel, metabuf, LH_META_PAGE);
190-
metap = (HashMetaPage) BufferGetPage(metabuf);
190+
metap = HashPageGetMeta(BufferGetPage(metabuf));
191191

192192
/* start search at hashm_firstfree */
193193
orig_firstfree = metap->hashm_firstfree;
@@ -450,7 +450,7 @@ _hash_freeovflpage(Relation rel, Buffer ovflbuf,
450450

451451
/* Read the metapage so we can determine which bitmap page to use */
452452
metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ, LH_META_PAGE);
453-
metap = (HashMetaPage) BufferGetPage(metabuf);
453+
metap = HashPageGetMeta(BufferGetPage(metabuf));
454454

455455
/* Identify which bit to set */
456456
ovflbitno = blkno_to_bitno(metap, ovflblkno);

src/backend/access/hash/hashpage.c

Lines changed: 10 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/access/hash/hashpage.c,v 1.76 2008/08/11 11:05:10 heikki Exp $
11+
* $PostgreSQL: pgsql/src/backend/access/hash/hashpage.c,v 1.77 2008/09/15 18:43:41 tgl Exp $
1212
*
1313
* NOTES
1414
* Postgres hash pages look like ordinary relation pages. The opaque
@@ -348,11 +348,9 @@ _hash_metapinit(Relation rel, double num_tuples)
348348
* Determine the target fill factor (in tuples per bucket) for this index.
349349
* The idea is to make the fill factor correspond to pages about as full
350350
* as the user-settable fillfactor parameter says. We can compute it
351-
* exactly if the index datatype is fixed-width, but for var-width there's
352-
* some guessing involved.
351+
* exactly since the index datatype (i.e. uint32 hash key) is fixed-width.
353352
*/
354-
data_width = get_typavgwidth(RelationGetDescr(rel)->attrs[0]->atttypid,
355-
RelationGetDescr(rel)->attrs[0]->atttypmod);
353+
data_width = sizeof(uint32);
356354
item_width = MAXALIGN(sizeof(IndexTupleData)) + MAXALIGN(data_width) +
357355
sizeof(ItemIdData); /* include the line pointer */
358356
ffactor = RelationGetTargetPageUsage(rel, HASH_DEFAULT_FILLFACTOR) / item_width;
@@ -395,20 +393,18 @@ _hash_metapinit(Relation rel, double num_tuples)
395393
pageopaque->hasho_flag = LH_META_PAGE;
396394
pageopaque->hasho_page_id = HASHO_PAGE_ID;
397395

398-
metap = (HashMetaPage) pg;
396+
metap = HashPageGetMeta(pg);
399397

400398
metap->hashm_magic = HASH_MAGIC;
401399
metap->hashm_version = HASH_VERSION;
402400
metap->hashm_ntuples = 0;
403401
metap->hashm_nmaps = 0;
404402
metap->hashm_ffactor = ffactor;
405-
metap->hashm_bsize = BufferGetPageSize(metabuf);
403+
metap->hashm_bsize = HashGetMaxBitmapSize(pg);
406404
/* find largest bitmap array size that will fit in page size */
407405
for (i = _hash_log2(metap->hashm_bsize); i > 0; --i)
408406
{
409-
if ((1 << i) <= (metap->hashm_bsize -
410-
(MAXALIGN(sizeof(PageHeaderData)) +
411-
MAXALIGN(sizeof(HashPageOpaqueData)))))
407+
if ((1 << i) <= metap->hashm_bsize)
412408
break;
413409
}
414410
Assert(i > 0);
@@ -532,7 +528,7 @@ _hash_expandtable(Relation rel, Buffer metabuf)
532528
_hash_chgbufaccess(rel, metabuf, HASH_NOLOCK, HASH_WRITE);
533529

534530
_hash_checkpage(rel, metabuf, LH_META_PAGE);
535-
metap = (HashMetaPage) BufferGetPage(metabuf);
531+
metap = HashPageGetMeta(BufferGetPage(metabuf));
536532

537533
/*
538534
* Check to see if split is still needed; someone else might have already
@@ -774,8 +770,6 @@ _hash_splitbucket(Relation rel,
774770
Buffer nbuf;
775771
BlockNumber oblkno;
776772
BlockNumber nblkno;
777-
bool null;
778-
Datum datum;
779773
HashPageOpaque oopaque;
780774
HashPageOpaque nopaque;
781775
IndexTuple itup;
@@ -785,7 +779,6 @@ _hash_splitbucket(Relation rel,
785779
OffsetNumber omaxoffnum;
786780
Page opage;
787781
Page npage;
788-
TupleDesc itupdesc = RelationGetDescr(rel);
789782

790783
/*
791784
* It should be okay to simultaneously write-lock pages from each bucket,
@@ -846,16 +839,11 @@ _hash_splitbucket(Relation rel,
846839
}
847840

848841
/*
849-
* Re-hash the tuple to determine which bucket it now belongs in.
850-
*
851-
* It is annoying to call the hash function while holding locks, but
852-
* releasing and relocking the page for each tuple is unappealing too.
842+
* Fetch the item's hash key (conveniently stored in the item)
843+
* and determine which bucket it now belongs in.
853844
*/
854845
itup = (IndexTuple) PageGetItem(opage, PageGetItemId(opage, ooffnum));
855-
datum = index_getattr(itup, 1, itupdesc, &null);
856-
Assert(!null);
857-
858-
bucket = _hash_hashkey2bucket(_hash_datum2hashkey(rel, datum),
846+
bucket = _hash_hashkey2bucket(_hash_get_indextuple_hashkey(itup),
859847
maxbucket, highmask, lowmask);
860848

861849
if (bucket == nbucket)

0 commit comments

Comments
 (0)