postgres
diff --git a/‎doc/src/sgml/catalogs.sgml
Lines changed: 14 additions & 14 deletions b/‎doc/src/sgml/catalogs.sgml
Lines changed: 14 additions & 14 deletions
diff --git a/‎doc/src/sgml/indexam.sgml
Lines changed: 60 additions & 25 deletions b/‎doc/src/sgml/indexam.sgml
Lines changed: 60 additions & 25 deletions
diff --git a/‎src/backend/access/gist/gistget.c
Lines changed: 28 additions & 1 deletion b/‎src/backend/access/gist/gistget.c
Lines changed: 28 additions & 1 deletion
diff --git a/‎src/backend/access/hash/hash.c
Lines changed: 70 additions & 1 deletion b/‎src/backend/access/hash/hash.c
Lines changed: 70 additions & 1 deletion
diff --git a/‎src/backend/access/heap/heapam.c
Lines changed: 23 additions & 6 deletions b/‎src/backend/access/heap/heapam.c
Lines changed: 23 additions & 6 deletions
@@ -1,6 +1,6 @@
 <!--
  Documentation of the system catalogs, directed toward PostgreSQL developers
- $PostgreSQL: pgsql/doc/src/sgml/catalogs.sgml,v 2.96 2005/02/13 03:04:15 tgl Exp $
+ $PostgreSQL: pgsql/doc/src/sgml/catalogs.sgml,v 2.97 2005/03/27 23:52:51 tgl Exp $
  -->
 
 <chapter id="catalogs">
@@ -316,13 +316,6 @@
       <entry>Name of the access method</entry>
      </row>
 
-     <row>
-      <entry><structfield>amowner</structfield></entry>
-      <entry><type>int4</type></entry>
-      <entry><literal><link linkend="catalog-pg-shadow"><structname>pg_shadow</structname></link>.usesysid</literal></entry>
-      <entry>User ID of the owner (currently not used)</entry>
-     </row>
-
      <row>
       <entry><structfield>amstrategies</structfield></entry>
       <entry><type>int2</type></entry>
@@ -374,24 +367,31 @@
      </row>
 
      <row>
-      <entry><structfield>amgettuple</structfield></entry>
+      <entry><structfield>aminsert</structfield></entry>
       <entry><type>regproc</type></entry>
       <entry><literal><link linkend="catalog-pg-proc"><structname>pg_proc</structname></link>.oid</literal></entry>
-      <entry><quote>Next valid tuple</quote> function</entry>
+      <entry><quote>Insert this tuple</quote> function</entry>
      </row>
 
      <row>
-      <entry><structfield>aminsert</structfield></entry>
+      <entry><structfield>ambeginscan</structfield></entry>
       <entry><type>regproc</type></entry>
       <entry><literal><link linkend="catalog-pg-proc"><structname>pg_proc</structname></link>.oid</literal></entry>
-      <entry><quote>Insert this tuple</quote> function</entry>
+      <entry><quote>Start new scan</quote> function</entry>
      </row>
 
      <row>
-      <entry><structfield>ambeginscan</structfield></entry>
+      <entry><structfield>amgettuple</structfield></entry>
       <entry><type>regproc</type></entry>
       <entry><literal><link linkend="catalog-pg-proc"><structname>pg_proc</structname></link>.oid</literal></entry>
-      <entry><quote>Start new scan</quote> function</entry>
+      <entry><quote>Next valid tuple</quote> function</entry>
+     </row>
+
+     <row>
+      <entry><structfield>amgetmulti</structfield></entry>
+      <entry><type>regproc</type></entry>
+      <entry><literal><link linkend="catalog-pg-proc"><structname>pg_proc</structname></link>.oid</literal></entry>
+      <entry><quote>Fetch multiple tuples</quote> function</entry>
      </row>
 
      <row>
 
@@ -1,5 +1,5 @@
 <!--
-$PostgreSQL: pgsql/doc/src/sgml/indexam.sgml,v 2.2 2005/03/21 01:23:55 tgl Exp $
+$PostgreSQL: pgsql/doc/src/sgml/indexam.sgml,v 2.3 2005/03/27 23:52:51 tgl Exp $
 -->
 
 <chapter id="indexam">
@@ -252,6 +252,28 @@ amgettuple (IndexScanDesc scan,
 
   <para>
 <programlisting>
+boolean
+amgetmulti (IndexScanDesc scan,
+            ItemPointer tids,
+            int32 max_tids,
+            int32 *returned_tids);
+</programlisting>
+   Fetch multiple tuples in the given scan.  Returns TRUE if the scan should
+   continue, FALSE if no matching tuples remain.  <literal>tids</> points to
+   a caller-supplied array of <literal>max_tids</>
+   <structname>ItemPointerData</> records, which the call fills with TIDs of
+   matching tuples.  <literal>*returned_tids</> is set to the number of TIDs
+   actually returned.  This can be less than <literal>max_tids</>, or even
+   zero, even when the return value is TRUE.  (This provision allows the
+   access method to choose the most efficient stopping points in its scan,
+   for example index page boundaries.)  <function>amgetmulti</> and
+   <function>amgettuple</> cannot be used in the same index scan; there
+   are other restrictions too when using <function>amgetmulti</>, as explained
+   in <xref linkend="index-scanning">.
+  </para>
+
+  <para>
+<programlisting>
 void
 amrescan (IndexScanDesc scan,
           ScanKey key);
@@ -297,7 +319,6 @@ amrestrpos (IndexScanDesc scan);
 <programlisting>
 void
 amcostestimate (Query *root,
-                RelOptInfo *rel,
                 IndexOptInfo *index,
                 List *indexQuals,
                 Cost *indexStartupCost,
@@ -407,6 +428,25 @@ amcostestimate (Query *root,
    true, insertions or deletions from other backends must be handled as well.)
   </para>
 
+  <para>
+   Instead of using <function>amgettuple</>, an index scan can be done with 
+   <function>amgetmulti</> to fetch multiple tuples per call.  This can be
+   noticeably more efficient than <function>amgettuple</> because it allows
+   avoiding lock/unlock cycles within the access method.  In principle
+   <function>amgetmulti</> should have the same effects as repeated
+   <function>amgettuple</> calls, but we impose several restrictions to
+   simplify matters.  In the first place, <function>amgetmulti</> does not
+   take a <literal>direction</> argument, and therefore it does not support
+   backwards scan nor intrascan reversal of direction.  The access method
+   need not support marking or restoring scan positions during an
+   <function>amgetmulti</> scan, either.  (These restrictions cost little
+   since it would be difficult to use these features in an
+   <function>amgetmulti</> scan anyway: adjusting the caller's buffered
+   list of TIDs would be complex.)  Finally, <function>amgetmulti</> does
+   not guarantee any locking of the returned tuples, with implications
+   spelled out in <xref linkend="index-locking">.
+  </para>
+
  </sect1>
 
  <sect1 id="index-locking">
@@ -515,10 +555,15 @@ amcostestimate (Query *root,
    and only visit the heap tuples sometime later, requires much less index
    locking overhead and may allow a more efficient heap access pattern.
    Per the above analysis, we must use the synchronous approach for
-   non-MVCC-compliant snapshots, but an asynchronous scan would be safe
-   for a query using an MVCC snapshot.  This possibility is not exploited
-   as of <productname>PostgreSQL</productname> 8.0, but it is likely to be
-   investigated soon.
+   non-MVCC-compliant snapshots, but an asynchronous scan is workable
+   for a query using an MVCC snapshot.
+  </para>
+
+  <para>
+   In an <function>amgetmulti</> index scan, the access method need not
+   guarantee to keep an index pin on any of the returned tuples.  (It would be
+   impractical to pin more than the last one anyway.)  Therefore
+   it is only safe to use such scans with MVCC-compliant snapshots.
   </para>
 
  </sect1>
@@ -611,7 +656,6 @@ amcostestimate (Query *root,
 <programlisting>
 void
 amcostestimate (Query *root,
-                RelOptInfo *rel,
                 IndexOptInfo *index,
                 List *indexQuals,
                 Cost *indexStartupCost,
@@ -632,20 +676,11 @@ amcostestimate (Query *root,
      </listitem>
     </varlistentry>
 
-    <varlistentry>
-     <term>rel</term>
-     <listitem>
-      <para>
-       The relation the index is on.
-      </para>
-     </listitem>
-    </varlistentry>
-
     <varlistentry>
      <term>index</term>
      <listitem>
       <para>
-       The index itself.
+       The index being considered.
       </para>
      </listitem>
     </varlistentry>
@@ -714,19 +749,19 @@ amcostestimate (Query *root,
 
   <para>
    The index access costs should be computed in the units used by
-   <filename>src/backend/optimizer/path/costsize.c</filename>: a sequential disk block fetch
-   has cost 1.0, a nonsequential fetch has cost random_page_cost, and
-   the cost of processing one index row should usually be taken as
-   cpu_index_tuple_cost (which is a user-adjustable optimizer parameter).
-   In addition, an appropriate multiple of cpu_operator_cost should be charged
+   <filename>src/backend/optimizer/path/costsize.c</filename>: a sequential
+   disk block fetch has cost 1.0, a nonsequential fetch has cost
+   <varname>random_page_cost</>, and the cost of processing one index row
+   should usually be taken as <varname>cpu_index_tuple_cost</>.  In addition,
+   an appropriate multiple of <varname>cpu_operator_cost</> should be charged
    for any comparison operators invoked during index processing (especially
    evaluation of the indexQuals themselves).
   </para>
 
   <para>
    The access costs should include all disk and CPU costs associated with
-   scanning the index itself, but NOT the costs of retrieving or processing
-   the parent-table rows that are identified by the index.
+   scanning the index itself, but <emphasis>not</> the costs of retrieving or
+   processing the parent-table rows that are identified by the index.
   </para>
 
   <para>
@@ -764,7 +799,7 @@ amcostestimate (Query *root,
 
 <programlisting>
 *indexSelectivity = clauselist_selectivity(root, indexQuals,
-                                           rel-&gt;relid, JOIN_INNER);
+                                           index-&gt;rel-&gt;relid, JOIN_INNER);
 </programlisting>
     </para>
    </step>
 
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/access/gist/gistget.c,v 1.44 2005/02/05 19:38:58 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/access/gist/gistget.c,v 1.45 2005/03/27 23:52:55 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -47,6 +47,33 @@ gistgettuple(PG_FUNCTION_ARGS)
 	PG_RETURN_BOOL(res);
 }
 
+Datum
+gistgetmulti(PG_FUNCTION_ARGS)
+{
+	IndexScanDesc s = (IndexScanDesc) PG_GETARG_POINTER(0);
+	ItemPointer	tids = (ItemPointer) PG_GETARG_POINTER(1);
+	int32		max_tids = PG_GETARG_INT32(2);
+	int32	   *returned_tids = (int32 *) PG_GETARG_POINTER(3);
+	bool		res = true;
+	int32		ntids = 0;
+
+	/* XXX generic implementation: loop around guts of gistgettuple */
+	while (ntids < max_tids)
+	{
+		if (ItemPointerIsValid(&(s->currentItemData)))
+			res = gistnext(s, ForwardScanDirection);
+		else
+			res = gistfirst(s, ForwardScanDirection);
+		if (!res)
+			break;
+		tids[ntids] = s->xs_ctup.t_self;
+		ntids++;
+	}
+
+	*returned_tids = ntids;
+	PG_RETURN_BOOL(res);
+}
+
 static bool
 gistfirst(IndexScanDesc s, ScanDirection dir)
 {
 
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/access/hash/hash.c,v 1.77 2005/03/21 01:23:57 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/access/hash/hash.c,v 1.78 2005/03/27 23:52:57 tgl Exp $
  *
  * NOTES
  *	  This file contains only the public interface routines.
@@ -264,6 +264,75 @@ hashgettuple(PG_FUNCTION_ARGS)
 }
 
 
+/*
+ *	hashgetmulti() -- get multiple tuples at once
+ *
+ * This is a somewhat generic implementation: it avoids lock reacquisition
+ * overhead, but there's no smarts about picking especially good stopping
+ * points such as index page boundaries.
+ */
+Datum
+hashgetmulti(PG_FUNCTION_ARGS)
+{
+	IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
+	ItemPointer	tids = (ItemPointer) PG_GETARG_POINTER(1);
+	int32		max_tids = PG_GETARG_INT32(2);
+	int32	   *returned_tids = (int32 *) PG_GETARG_POINTER(3);
+	HashScanOpaque so = (HashScanOpaque) scan->opaque;
+	Relation	rel = scan->indexRelation;
+	bool		res = true;
+	int32		ntids = 0;
+
+	/*
+	 * We hold pin but not lock on current buffer while outside the hash
+	 * AM. Reacquire the read lock here.
+	 */
+	if (BufferIsValid(so->hashso_curbuf))
+		_hash_chgbufaccess(rel, so->hashso_curbuf, HASH_NOLOCK, HASH_READ);
+
+	while (ntids < max_tids)
+	{
+		/*
+		 * Start scan, or advance to next tuple.
+		 */
+		if (ItemPointerIsValid(&(scan->currentItemData)))
+			res = _hash_next(scan, ForwardScanDirection);
+		else
+			res = _hash_first(scan, ForwardScanDirection);
+		/*
+		 * Skip killed tuples if asked to.
+		 */
+		if (scan->ignore_killed_tuples)
+		{
+			while (res)
+			{
+				Page		page;
+				OffsetNumber offnum;
+
+				offnum = ItemPointerGetOffsetNumber(&(scan->currentItemData));
+				page = BufferGetPage(so->hashso_curbuf);
+				if (!ItemIdDeleted(PageGetItemId(page, offnum)))
+					break;
+				res = _hash_next(scan, ForwardScanDirection);
+			}
+		}
+
+		if (!res)
+			break;
+		/* Save tuple ID, and continue scanning */
+		tids[ntids] = scan->xs_ctup.t_self;
+		ntids++;
+	}
+
+	/* Release read lock on current buffer, but keep it pinned */
+	if (BufferIsValid(so->hashso_curbuf))
+		_hash_chgbufaccess(rel, so->hashso_curbuf, HASH_READ, HASH_NOLOCK);
+
+	*returned_tids = ntids;
+	PG_RETURN_BOOL(res);
+}
+
+
 /*
  *	hashbeginscan() -- start a scan on a hash index
  */
 
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.184 2005/03/20 23:40:23 neilc Exp $
+ *	  $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.185 2005/03/27 23:52:58 tgl Exp $
  *
  *
  * INTERFACE ROUTINES
@@ -933,18 +933,35 @@ heap_release_fetch(Relation relation,
 	 * Need share lock on buffer to examine tuple commit status.
 	 */
 	LockBuffer(buffer, BUFFER_LOCK_SHARE);
+	dp = (PageHeader) BufferGetPage(buffer);
 
 	/*
-	 * get the item line pointer corresponding to the requested tid
+	 * We'd better check for out-of-range offnum in case of VACUUM since
+	 * the TID was obtained.
 	 */
-	dp = (PageHeader) BufferGetPage(buffer);
 	offnum = ItemPointerGetOffsetNumber(tid);
+	if (offnum < FirstOffsetNumber || offnum > PageGetMaxOffsetNumber(dp))
+	{
+		LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
+		if (keep_buf)
+			*userbuf = buffer;
+		else
+		{
+			ReleaseBuffer(buffer);
+			*userbuf = InvalidBuffer;
+		}
+		tuple->t_datamcxt = NULL;
+		tuple->t_data = NULL;
+		return false;
+	}
+
+	/*
+	 * get the item line pointer corresponding to the requested tid
+	 */
 	lp = PageGetItemId(dp, offnum);
 
 	/*
-	 * must check for deleted tuple (see for example analyze.c, which is
-	 * careful to pass an offnum in range, but doesn't know if the offnum
-	 * actually corresponds to an undeleted tuple).
+	 * Must check for deleted tuple.
 	 */
 	if (!ItemIdIsUsed(lp))
 	{