postgres
diff --git a/‎doc/src/sgml/catalogs.sgml
Lines changed: 9 additions & 2 deletions b/‎doc/src/sgml/catalogs.sgml
Lines changed: 9 additions & 2 deletions
diff --git a/‎src/backend/access/hash/hash.c
Lines changed: 13 additions & 12 deletions b/‎src/backend/access/hash/hash.c
Lines changed: 13 additions & 12 deletions
diff --git a/‎src/backend/access/hash/hashinsert.c
Lines changed: 11 additions & 12 deletions b/‎src/backend/access/hash/hashinsert.c
Lines changed: 11 additions & 12 deletions
diff --git a/‎src/backend/access/hash/hashovfl.c
Lines changed: 3 additions & 3 deletions b/‎src/backend/access/hash/hashovfl.c
Lines changed: 3 additions & 3 deletions
diff --git a/‎src/backend/access/hash/hashpage.c
Lines changed: 10 additions & 22 deletions b/‎src/backend/access/hash/hashpage.c
Lines changed: 10 additions & 22 deletions
@@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/catalogs.sgml,v 2.173 2008/09/10 18:09:19 alvherre Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/catalogs.sgml,v 2.174 2008/09/15 18:43:41 tgl Exp $ -->
 <!--
  Documentation of the system catalogs, directed toward PostgreSQL developers
  -->
@@ -451,6 +451,13 @@
       <entry>Can an index of this type be clustered on?</entry>
      </row>
 
+     <row>
+      <entry><structfield>amkeytype</structfield></entry>
+      <entry><type>oid</type></entry>
+      <entry><literal><link linkend="catalog-pg-type"><structname>pg_type</structname></link>.oid</literal></entry>
+      <entry>Type of data stored in index, or zero if not a fixed type</entry>
+     </row>
+
      <row>
       <entry><structfield>aminsert</structfield></entry>
       <entry><type>regproc</type></entry>
@@ -6424,7 +6431,7 @@
      <row>
       <entry><structfield>sourceline</structfield></entry>
       <entry><type>text</type></entry>
-      <entry>Line number within the sourcefile the current value was set 
+      <entry>Line number within the sourcefile the current value was set
       from (NULL for values set in sources other than configuration files)
       </entry>
      </row>
 
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/access/hash/hash.c,v 1.104 2008/06/19 00:46:03 alvherre Exp $
+ *	  $PostgreSQL: pgsql/src/backend/access/hash/hash.c,v 1.105 2008/09/15 18:43:41 tgl Exp $
  *
  * NOTES
  *	  This file contains only the public interface routines.
@@ -79,12 +79,12 @@ hashbuild(PG_FUNCTION_ARGS)
 	 * then we'll thrash horribly.  To prevent that scenario, we can sort the
 	 * tuples by (expected) bucket number.  However, such a sort is useless
 	 * overhead when the index does fit in RAM.  We choose to sort if the
-	 * initial index size exceeds effective_cache_size.
+	 * initial index size exceeds NBuffers.
 	 *
 	 * NOTE: this test will need adjustment if a bucket is ever different
 	 * from one page.
 	 */
-	if (num_buckets >= (uint32) effective_cache_size)
+	if (num_buckets >= (uint32) NBuffers)
 		buildstate.spool = _h_spoolinit(index, num_buckets);
 	else
 		buildstate.spool = NULL;
@@ -129,7 +129,7 @@ hashbuildCallback(Relation index,
 	IndexTuple	itup;
 
 	/* form an index tuple and point it at the heap tuple */
-	itup = index_form_tuple(RelationGetDescr(index), values, isnull);
+	itup = _hash_form_tuple(index, values, isnull);
 	itup->t_tid = htup->t_self;
 
 	/* Hash indexes don't index nulls, see notes in hashinsert */
@@ -153,8 +153,8 @@ hashbuildCallback(Relation index,
 /*
  *	hashinsert() -- insert an index tuple into a hash table.
  *
- *	Hash on the index tuple's key, find the appropriate location
- *	for the new tuple, and put it there.
+ *	Hash on the heap tuple's key, form an index tuple with hash code.
+ *	Find the appropriate location for the new tuple, and put it there.
  */
 Datum
 hashinsert(PG_FUNCTION_ARGS)
@@ -171,7 +171,7 @@ hashinsert(PG_FUNCTION_ARGS)
 	IndexTuple	itup;
 
 	/* generate an index tuple */
-	itup = index_form_tuple(RelationGetDescr(rel), values, isnull);
+	itup = _hash_form_tuple(rel, values, isnull);
 	itup->t_tid = *ht_ctid;
 
 	/*
@@ -211,8 +211,8 @@ hashgettuple(PG_FUNCTION_ARGS)
 	OffsetNumber offnum;
 	bool		res;
 
-	/* Hash indexes are never lossy (at the moment anyway) */
-	scan->xs_recheck = false;
+	/* Hash indexes are always lossy since we store only the hash code */
+	scan->xs_recheck = true;
 
 	/*
 	 * We hold pin but not lock on current buffer while outside the hash AM.
@@ -317,7 +317,8 @@ hashgetbitmap(PG_FUNCTION_ARGS)
 		/* Save tuple ID, and continue scanning */
 		if (add_tuple) 
 		{
-			tbm_add_tuples(tbm, &scan->xs_ctup.t_self, 1, false);
+			/* Note we mark the tuple ID as requiring recheck */
+			tbm_add_tuples(tbm, &scan->xs_ctup.t_self, 1, true);
 			ntids++;
 		}
 
@@ -527,7 +528,7 @@ hashbulkdelete(PG_FUNCTION_ARGS)
 	 * each bucket.
 	 */
 	metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ, LH_META_PAGE);
-	metap = (HashMetaPage) BufferGetPage(metabuf);
+	metap =  HashPageGetMeta(BufferGetPage(metabuf));
 	orig_maxbucket = metap->hashm_maxbucket;
 	orig_ntuples = metap->hashm_ntuples;
 	memcpy(&local_metapage, metap, sizeof(local_metapage));
@@ -629,7 +630,7 @@ hashbulkdelete(PG_FUNCTION_ARGS)
 
 	/* Write-lock metapage and check for split since we started */
 	metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_WRITE, LH_META_PAGE);
-	metap = (HashMetaPage) BufferGetPage(metabuf);
+	metap = HashPageGetMeta(BufferGetPage(metabuf));
 
 	if (cur_maxbucket != metap->hashm_maxbucket)
 	{
 
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/access/hash/hashinsert.c,v 1.50 2008/06/19 00:46:03 alvherre Exp $
+ *	  $PostgreSQL: pgsql/src/backend/access/hash/hashinsert.c,v 1.51 2008/09/15 18:43:41 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -43,18 +43,11 @@ _hash_doinsert(Relation rel, IndexTuple itup)
 	bool		do_expand;
 	uint32		hashkey;
 	Bucket		bucket;
-	Datum		datum;
-	bool		isnull;
 
 	/*
-	 * Compute the hash key for the item.  We do this first so as not to need
-	 * to hold any locks while running the hash function.
+	 * Get the hash key for the item (it's stored in the index tuple itself).
 	 */
-	if (rel->rd_rel->relnatts != 1)
-		elog(ERROR, "hash indexes support only one index key");
-	datum = index_getattr(itup, 1, RelationGetDescr(rel), &isnull);
-	Assert(!isnull);
-	hashkey = _hash_datum2hashkey(rel, datum);
+	hashkey = _hash_get_indextuple_hashkey(itup);
 
 	/* compute item size too */
 	itemsz = IndexTupleDSize(*itup);
@@ -69,12 +62,14 @@ _hash_doinsert(Relation rel, IndexTuple itup)
 
 	/* Read the metapage */
 	metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ, LH_META_PAGE);
-	metap = (HashMetaPage) BufferGetPage(metabuf);
+	metap = HashPageGetMeta(BufferGetPage(metabuf));
 
 	/*
 	 * Check whether the item can fit on a hash page at all. (Eventually, we
 	 * ought to try to apply TOAST methods if not.)  Note that at this point,
 	 * itemsz doesn't include the ItemId.
+	 *
+	 * XXX this is useless code if we are only storing hash keys.
 	 */
 	if (itemsz > HashMaxItemSize((Page) metap))
 		ereport(ERROR,
@@ -197,11 +192,15 @@ _hash_pgaddtup(Relation rel,
 {
 	OffsetNumber itup_off;
 	Page		page;
+	uint32		hashkey;
 
 	_hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
 	page = BufferGetPage(buf);
 
-	itup_off = OffsetNumberNext(PageGetMaxOffsetNumber(page));
+	/* Find where to insert the tuple (preserving page's hashkey ordering) */
+	hashkey = _hash_get_indextuple_hashkey(itup);
+	itup_off = _hash_binsearch(page, hashkey);
+
 	if (PageAddItem(page, (Item) itup, itemsize, itup_off, false, false)
 		== InvalidOffsetNumber)
 		elog(ERROR, "failed to add index item to \"%s\"",
 
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/access/hash/hashovfl.c,v 1.64 2008/06/19 00:46:03 alvherre Exp $
+ *	  $PostgreSQL: pgsql/src/backend/access/hash/hashovfl.c,v 1.65 2008/09/15 18:43:41 tgl Exp $
  *
  * NOTES
  *	  Overflow pages look like ordinary relation pages.
@@ -187,7 +187,7 @@ _hash_getovflpage(Relation rel, Buffer metabuf)
 	_hash_chgbufaccess(rel, metabuf, HASH_NOLOCK, HASH_WRITE);
 
 	_hash_checkpage(rel, metabuf, LH_META_PAGE);
-	metap = (HashMetaPage) BufferGetPage(metabuf);
+	metap = HashPageGetMeta(BufferGetPage(metabuf));
 
 	/* start search at hashm_firstfree */
 	orig_firstfree = metap->hashm_firstfree;
@@ -450,7 +450,7 @@ _hash_freeovflpage(Relation rel, Buffer ovflbuf,
 
 	/* Read the metapage so we can determine which bitmap page to use */
 	metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ, LH_META_PAGE);
-	metap = (HashMetaPage) BufferGetPage(metabuf);
+	metap = HashPageGetMeta(BufferGetPage(metabuf));
 
 	/* Identify which bit to set */
 	ovflbitno = blkno_to_bitno(metap, ovflblkno);
 
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/access/hash/hashpage.c,v 1.76 2008/08/11 11:05:10 heikki Exp $
+ *	  $PostgreSQL: pgsql/src/backend/access/hash/hashpage.c,v 1.77 2008/09/15 18:43:41 tgl Exp $
  *
  * NOTES
  *	  Postgres hash pages look like ordinary relation pages.  The opaque
@@ -348,11 +348,9 @@ _hash_metapinit(Relation rel, double num_tuples)
 	 * Determine the target fill factor (in tuples per bucket) for this index.
 	 * The idea is to make the fill factor correspond to pages about as full
 	 * as the user-settable fillfactor parameter says.	We can compute it
-	 * exactly if the index datatype is fixed-width, but for var-width there's
-	 * some guessing involved.
+	 * exactly since the index datatype (i.e. uint32 hash key) is fixed-width.
 	 */
-	data_width = get_typavgwidth(RelationGetDescr(rel)->attrs[0]->atttypid,
-								 RelationGetDescr(rel)->attrs[0]->atttypmod);
+	data_width = sizeof(uint32);
 	item_width = MAXALIGN(sizeof(IndexTupleData)) + MAXALIGN(data_width) +
 		sizeof(ItemIdData);		/* include the line pointer */
 	ffactor = RelationGetTargetPageUsage(rel, HASH_DEFAULT_FILLFACTOR) / item_width;
@@ -395,20 +393,18 @@ _hash_metapinit(Relation rel, double num_tuples)
 	pageopaque->hasho_flag = LH_META_PAGE;
 	pageopaque->hasho_page_id = HASHO_PAGE_ID;
 
-	metap = (HashMetaPage) pg;
+	metap = HashPageGetMeta(pg);
 
 	metap->hashm_magic = HASH_MAGIC;
 	metap->hashm_version = HASH_VERSION;
 	metap->hashm_ntuples = 0;
 	metap->hashm_nmaps = 0;
 	metap->hashm_ffactor = ffactor;
-	metap->hashm_bsize = BufferGetPageSize(metabuf);
+	metap->hashm_bsize = HashGetMaxBitmapSize(pg);
 	/* find largest bitmap array size that will fit in page size */
 	for (i = _hash_log2(metap->hashm_bsize); i > 0; --i)
 	{
-		if ((1 << i) <= (metap->hashm_bsize -
-						 (MAXALIGN(sizeof(PageHeaderData)) +
-						  MAXALIGN(sizeof(HashPageOpaqueData)))))
+		if ((1 << i) <= metap->hashm_bsize)
 			break;
 	}
 	Assert(i > 0);
@@ -532,7 +528,7 @@ _hash_expandtable(Relation rel, Buffer metabuf)
 	_hash_chgbufaccess(rel, metabuf, HASH_NOLOCK, HASH_WRITE);
 
 	_hash_checkpage(rel, metabuf, LH_META_PAGE);
-	metap = (HashMetaPage) BufferGetPage(metabuf);
+	metap = HashPageGetMeta(BufferGetPage(metabuf));
 
 	/*
 	 * Check to see if split is still needed; someone else might have already
@@ -774,8 +770,6 @@ _hash_splitbucket(Relation rel,
 	Buffer		nbuf;
 	BlockNumber oblkno;
 	BlockNumber nblkno;
-	bool		null;
-	Datum		datum;
 	HashPageOpaque oopaque;
 	HashPageOpaque nopaque;
 	IndexTuple	itup;
@@ -785,7 +779,6 @@ _hash_splitbucket(Relation rel,
 	OffsetNumber omaxoffnum;
 	Page		opage;
 	Page		npage;
-	TupleDesc	itupdesc = RelationGetDescr(rel);
 
 	/*
 	 * It should be okay to simultaneously write-lock pages from each bucket,
@@ -846,16 +839,11 @@ _hash_splitbucket(Relation rel,
 		}
 
 		/*
-		 * Re-hash the tuple to determine which bucket it now belongs in.
-		 *
-		 * It is annoying to call the hash function while holding locks, but
-		 * releasing and relocking the page for each tuple is unappealing too.
+		 * Fetch the item's hash key (conveniently stored in the item)
+		 * and determine which bucket it now belongs in.
 		 */
 		itup = (IndexTuple) PageGetItem(opage, PageGetItemId(opage, ooffnum));
-		datum = index_getattr(itup, 1, itupdesc, &null);
-		Assert(!null);
-
-		bucket = _hash_hashkey2bucket(_hash_datum2hashkey(rel, datum),
+		bucket = _hash_hashkey2bucket(_hash_get_indextuple_hashkey(itup),
 									  maxbucket, highmask, lowmask);
 
 		if (bucket == nbucket)