Skip to content

Commit bf3dbb5

Browse files
committed
First steps towards index scans with heap access decoupled from index
access: define new index access method functions 'amgetmulti' that can fetch multiple TIDs per call. (The functions exist but are totally untested as yet.) Since I was modifying pg_am anyway, remove the no-longer-needed 'rel' parameter from amcostestimate functions, and also remove the vestigial amowner column that was creating useless work for Alvaro's shared-object-dependencies project. Initdb forced due to changes in pg_am.
1 parent 351519a commit bf3dbb5

File tree

20 files changed

+537
-176
lines changed

20 files changed

+537
-176
lines changed

doc/src/sgml/catalogs.sgml

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
<!--
22
Documentation of the system catalogs, directed toward PostgreSQL developers
3-
$PostgreSQL: pgsql/doc/src/sgml/catalogs.sgml,v 2.96 2005/02/13 03:04:15 tgl Exp $
3+
$PostgreSQL: pgsql/doc/src/sgml/catalogs.sgml,v 2.97 2005/03/27 23:52:51 tgl Exp $
44
-->
55

66
<chapter id="catalogs">
@@ -316,13 +316,6 @@
316316
<entry>Name of the access method</entry>
317317
</row>
318318

319-
<row>
320-
<entry><structfield>amowner</structfield></entry>
321-
<entry><type>int4</type></entry>
322-
<entry><literal><link linkend="catalog-pg-shadow"><structname>pg_shadow</structname></link>.usesysid</literal></entry>
323-
<entry>User ID of the owner (currently not used)</entry>
324-
</row>
325-
326319
<row>
327320
<entry><structfield>amstrategies</structfield></entry>
328321
<entry><type>int2</type></entry>
@@ -374,24 +367,31 @@
374367
</row>
375368

376369
<row>
377-
<entry><structfield>amgettuple</structfield></entry>
370+
<entry><structfield>aminsert</structfield></entry>
378371
<entry><type>regproc</type></entry>
379372
<entry><literal><link linkend="catalog-pg-proc"><structname>pg_proc</structname></link>.oid</literal></entry>
380-
<entry><quote>Next valid tuple</quote> function</entry>
373+
<entry><quote>Insert this tuple</quote> function</entry>
381374
</row>
382375

383376
<row>
384-
<entry><structfield>aminsert</structfield></entry>
377+
<entry><structfield>ambeginscan</structfield></entry>
385378
<entry><type>regproc</type></entry>
386379
<entry><literal><link linkend="catalog-pg-proc"><structname>pg_proc</structname></link>.oid</literal></entry>
387-
<entry><quote>Insert this tuple</quote> function</entry>
380+
<entry><quote>Start new scan</quote> function</entry>
388381
</row>
389382

390383
<row>
391-
<entry><structfield>ambeginscan</structfield></entry>
384+
<entry><structfield>amgettuple</structfield></entry>
392385
<entry><type>regproc</type></entry>
393386
<entry><literal><link linkend="catalog-pg-proc"><structname>pg_proc</structname></link>.oid</literal></entry>
394-
<entry><quote>Start new scan</quote> function</entry>
387+
<entry><quote>Next valid tuple</quote> function</entry>
388+
</row>
389+
390+
<row>
391+
<entry><structfield>amgetmulti</structfield></entry>
392+
<entry><type>regproc</type></entry>
393+
<entry><literal><link linkend="catalog-pg-proc"><structname>pg_proc</structname></link>.oid</literal></entry>
394+
<entry><quote>Fetch multiple tuples</quote> function</entry>
395395
</row>
396396

397397
<row>

doc/src/sgml/indexam.sgml

Lines changed: 60 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
<!--
2-
$PostgreSQL: pgsql/doc/src/sgml/indexam.sgml,v 2.2 2005/03/21 01:23:55 tgl Exp $
2+
$PostgreSQL: pgsql/doc/src/sgml/indexam.sgml,v 2.3 2005/03/27 23:52:51 tgl Exp $
33
-->
44

55
<chapter id="indexam">
@@ -252,6 +252,28 @@ amgettuple (IndexScanDesc scan,
252252

253253
<para>
254254
<programlisting>
255+
boolean
256+
amgetmulti (IndexScanDesc scan,
257+
ItemPointer tids,
258+
int32 max_tids,
259+
int32 *returned_tids);
260+
</programlisting>
261+
Fetch multiple tuples in the given scan. Returns TRUE if the scan should
262+
continue, FALSE if no matching tuples remain. <literal>tids</> points to
263+
a caller-supplied array of <literal>max_tids</>
264+
<structname>ItemPointerData</> records, which the call fills with TIDs of
265+
matching tuples. <literal>*returned_tids</> is set to the number of TIDs
266+
actually returned. This can be less than <literal>max_tids</>, or even
267+
zero, even when the return value is TRUE. (This provision allows the
268+
access method to choose the most efficient stopping points in its scan,
269+
for example index page boundaries.) <function>amgetmulti</> and
270+
<function>amgettuple</> cannot be used in the same index scan; there
271+
are other restrictions too when using <function>amgetmulti</>, as explained
272+
in <xref linkend="index-scanning">.
273+
</para>
274+
275+
<para>
276+
<programlisting>
255277
void
256278
amrescan (IndexScanDesc scan,
257279
ScanKey key);
@@ -297,7 +319,6 @@ amrestrpos (IndexScanDesc scan);
297319
<programlisting>
298320
void
299321
amcostestimate (Query *root,
300-
RelOptInfo *rel,
301322
IndexOptInfo *index,
302323
List *indexQuals,
303324
Cost *indexStartupCost,
@@ -407,6 +428,25 @@ amcostestimate (Query *root,
407428
true, insertions or deletions from other backends must be handled as well.)
408429
</para>
409430

431+
<para>
432+
Instead of using <function>amgettuple</>, an index scan can be done with
433+
<function>amgetmulti</> to fetch multiple tuples per call. This can be
434+
noticeably more efficient than <function>amgettuple</> because it allows
435+
avoiding lock/unlock cycles within the access method. In principle
436+
<function>amgetmulti</> should have the same effects as repeated
437+
<function>amgettuple</> calls, but we impose several restrictions to
438+
simplify matters. In the first place, <function>amgetmulti</> does not
439+
take a <literal>direction</> argument, and therefore it does not support
440+
backwards scan nor intrascan reversal of direction. The access method
441+
need not support marking or restoring scan positions during an
442+
<function>amgetmulti</> scan, either. (These restrictions cost little
443+
since it would be difficult to use these features in an
444+
<function>amgetmulti</> scan anyway: adjusting the caller's buffered
445+
list of TIDs would be complex.) Finally, <function>amgetmulti</> does
446+
not guarantee any locking of the returned tuples, with implications
447+
spelled out in <xref linkend="index-locking">.
448+
</para>
449+
410450
</sect1>
411451

412452
<sect1 id="index-locking">
@@ -515,10 +555,15 @@ amcostestimate (Query *root,
515555
and only visit the heap tuples sometime later, requires much less index
516556
locking overhead and may allow a more efficient heap access pattern.
517557
Per the above analysis, we must use the synchronous approach for
518-
non-MVCC-compliant snapshots, but an asynchronous scan would be safe
519-
for a query using an MVCC snapshot. This possibility is not exploited
520-
as of <productname>PostgreSQL</productname> 8.0, but it is likely to be
521-
investigated soon.
558+
non-MVCC-compliant snapshots, but an asynchronous scan is workable
559+
for a query using an MVCC snapshot.
560+
</para>
561+
562+
<para>
563+
In an <function>amgetmulti</> index scan, the access method need not
564+
guarantee to keep an index pin on any of the returned tuples. (It would be
565+
impractical to pin more than the last one anyway.) Therefore
566+
it is only safe to use such scans with MVCC-compliant snapshots.
522567
</para>
523568

524569
</sect1>
@@ -611,7 +656,6 @@ amcostestimate (Query *root,
611656
<programlisting>
612657
void
613658
amcostestimate (Query *root,
614-
RelOptInfo *rel,
615659
IndexOptInfo *index,
616660
List *indexQuals,
617661
Cost *indexStartupCost,
@@ -632,20 +676,11 @@ amcostestimate (Query *root,
632676
</listitem>
633677
</varlistentry>
634678

635-
<varlistentry>
636-
<term>rel</term>
637-
<listitem>
638-
<para>
639-
The relation the index is on.
640-
</para>
641-
</listitem>
642-
</varlistentry>
643-
644679
<varlistentry>
645680
<term>index</term>
646681
<listitem>
647682
<para>
648-
The index itself.
683+
The index being considered.
649684
</para>
650685
</listitem>
651686
</varlistentry>
@@ -714,19 +749,19 @@ amcostestimate (Query *root,
714749

715750
<para>
716751
The index access costs should be computed in the units used by
717-
<filename>src/backend/optimizer/path/costsize.c</filename>: a sequential disk block fetch
718-
has cost 1.0, a nonsequential fetch has cost random_page_cost, and
719-
the cost of processing one index row should usually be taken as
720-
cpu_index_tuple_cost (which is a user-adjustable optimizer parameter).
721-
In addition, an appropriate multiple of cpu_operator_cost should be charged
752+
<filename>src/backend/optimizer/path/costsize.c</filename>: a sequential
753+
disk block fetch has cost 1.0, a nonsequential fetch has cost
754+
<varname>random_page_cost</>, and the cost of processing one index row
755+
should usually be taken as <varname>cpu_index_tuple_cost</>. In addition,
756+
an appropriate multiple of <varname>cpu_operator_cost</> should be charged
722757
for any comparison operators invoked during index processing (especially
723758
evaluation of the indexQuals themselves).
724759
</para>
725760

726761
<para>
727762
The access costs should include all disk and CPU costs associated with
728-
scanning the index itself, but NOT the costs of retrieving or processing
729-
the parent-table rows that are identified by the index.
763+
scanning the index itself, but <emphasis>not</> the costs of retrieving or
764+
processing the parent-table rows that are identified by the index.
730765
</para>
731766

732767
<para>
@@ -764,7 +799,7 @@ amcostestimate (Query *root,
764799

765800
<programlisting>
766801
*indexSelectivity = clauselist_selectivity(root, indexQuals,
767-
rel-&gt;relid, JOIN_INNER);
802+
index-&gt;rel-&gt;relid, JOIN_INNER);
768803
</programlisting>
769804
</para>
770805
</step>

src/backend/access/gist/gistget.c

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/access/gist/gistget.c,v 1.44 2005/02/05 19:38:58 tgl Exp $
11+
* $PostgreSQL: pgsql/src/backend/access/gist/gistget.c,v 1.45 2005/03/27 23:52:55 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -47,6 +47,33 @@ gistgettuple(PG_FUNCTION_ARGS)
4747
PG_RETURN_BOOL(res);
4848
}
4949

50+
Datum
51+
gistgetmulti(PG_FUNCTION_ARGS)
52+
{
53+
IndexScanDesc s = (IndexScanDesc) PG_GETARG_POINTER(0);
54+
ItemPointer tids = (ItemPointer) PG_GETARG_POINTER(1);
55+
int32 max_tids = PG_GETARG_INT32(2);
56+
int32 *returned_tids = (int32 *) PG_GETARG_POINTER(3);
57+
bool res = true;
58+
int32 ntids = 0;
59+
60+
/* XXX generic implementation: loop around guts of gistgettuple */
61+
while (ntids < max_tids)
62+
{
63+
if (ItemPointerIsValid(&(s->currentItemData)))
64+
res = gistnext(s, ForwardScanDirection);
65+
else
66+
res = gistfirst(s, ForwardScanDirection);
67+
if (!res)
68+
break;
69+
tids[ntids] = s->xs_ctup.t_self;
70+
ntids++;
71+
}
72+
73+
*returned_tids = ntids;
74+
PG_RETURN_BOOL(res);
75+
}
76+
5077
static bool
5178
gistfirst(IndexScanDesc s, ScanDirection dir)
5279
{

src/backend/access/hash/hash.c

Lines changed: 70 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/access/hash/hash.c,v 1.77 2005/03/21 01:23:57 tgl Exp $
11+
* $PostgreSQL: pgsql/src/backend/access/hash/hash.c,v 1.78 2005/03/27 23:52:57 tgl Exp $
1212
*
1313
* NOTES
1414
* This file contains only the public interface routines.
@@ -264,6 +264,75 @@ hashgettuple(PG_FUNCTION_ARGS)
264264
}
265265

266266

267+
/*
268+
* hashgetmulti() -- get multiple tuples at once
269+
*
270+
* This is a somewhat generic implementation: it avoids lock reacquisition
271+
* overhead, but there's no smarts about picking especially good stopping
272+
* points such as index page boundaries.
273+
*/
274+
Datum
275+
hashgetmulti(PG_FUNCTION_ARGS)
276+
{
277+
IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
278+
ItemPointer tids = (ItemPointer) PG_GETARG_POINTER(1);
279+
int32 max_tids = PG_GETARG_INT32(2);
280+
int32 *returned_tids = (int32 *) PG_GETARG_POINTER(3);
281+
HashScanOpaque so = (HashScanOpaque) scan->opaque;
282+
Relation rel = scan->indexRelation;
283+
bool res = true;
284+
int32 ntids = 0;
285+
286+
/*
287+
* We hold pin but not lock on current buffer while outside the hash
288+
* AM. Reacquire the read lock here.
289+
*/
290+
if (BufferIsValid(so->hashso_curbuf))
291+
_hash_chgbufaccess(rel, so->hashso_curbuf, HASH_NOLOCK, HASH_READ);
292+
293+
while (ntids < max_tids)
294+
{
295+
/*
296+
* Start scan, or advance to next tuple.
297+
*/
298+
if (ItemPointerIsValid(&(scan->currentItemData)))
299+
res = _hash_next(scan, ForwardScanDirection);
300+
else
301+
res = _hash_first(scan, ForwardScanDirection);
302+
/*
303+
* Skip killed tuples if asked to.
304+
*/
305+
if (scan->ignore_killed_tuples)
306+
{
307+
while (res)
308+
{
309+
Page page;
310+
OffsetNumber offnum;
311+
312+
offnum = ItemPointerGetOffsetNumber(&(scan->currentItemData));
313+
page = BufferGetPage(so->hashso_curbuf);
314+
if (!ItemIdDeleted(PageGetItemId(page, offnum)))
315+
break;
316+
res = _hash_next(scan, ForwardScanDirection);
317+
}
318+
}
319+
320+
if (!res)
321+
break;
322+
/* Save tuple ID, and continue scanning */
323+
tids[ntids] = scan->xs_ctup.t_self;
324+
ntids++;
325+
}
326+
327+
/* Release read lock on current buffer, but keep it pinned */
328+
if (BufferIsValid(so->hashso_curbuf))
329+
_hash_chgbufaccess(rel, so->hashso_curbuf, HASH_READ, HASH_NOLOCK);
330+
331+
*returned_tids = ntids;
332+
PG_RETURN_BOOL(res);
333+
}
334+
335+
267336
/*
268337
* hashbeginscan() -- start a scan on a hash index
269338
*/

src/backend/access/heap/heapam.c

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.184 2005/03/20 23:40:23 neilc Exp $
11+
* $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.185 2005/03/27 23:52:58 tgl Exp $
1212
*
1313
*
1414
* INTERFACE ROUTINES
@@ -933,18 +933,35 @@ heap_release_fetch(Relation relation,
933933
* Need share lock on buffer to examine tuple commit status.
934934
*/
935935
LockBuffer(buffer, BUFFER_LOCK_SHARE);
936+
dp = (PageHeader) BufferGetPage(buffer);
936937

937938
/*
938-
* get the item line pointer corresponding to the requested tid
939+
* We'd better check for out-of-range offnum in case of VACUUM since
940+
* the TID was obtained.
939941
*/
940-
dp = (PageHeader) BufferGetPage(buffer);
941942
offnum = ItemPointerGetOffsetNumber(tid);
943+
if (offnum < FirstOffsetNumber || offnum > PageGetMaxOffsetNumber(dp))
944+
{
945+
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
946+
if (keep_buf)
947+
*userbuf = buffer;
948+
else
949+
{
950+
ReleaseBuffer(buffer);
951+
*userbuf = InvalidBuffer;
952+
}
953+
tuple->t_datamcxt = NULL;
954+
tuple->t_data = NULL;
955+
return false;
956+
}
957+
958+
/*
959+
* get the item line pointer corresponding to the requested tid
960+
*/
942961
lp = PageGetItemId(dp, offnum);
943962

944963
/*
945-
* must check for deleted tuple (see for example analyze.c, which is
946-
* careful to pass an offnum in range, but doesn't know if the offnum
947-
* actually corresponds to an undeleted tuple).
964+
* Must check for deleted tuple.
948965
*/
949966
if (!ItemIdIsUsed(lp))
950967
{

0 commit comments

Comments
 (0)