Skip to content

Commit b9b8831

Browse files
committed
Create a "relation mapping" infrastructure to support changing the relfilenodes
of shared or nailed system catalogs. This has two key benefits: * The new CLUSTER-based VACUUM FULL can be applied safely to all catalogs. * We no longer have to use an unsafe reindex-in-place approach for reindexing shared catalogs. CLUSTER on nailed catalogs now works too, although I left it disabled on shared catalogs because the resulting pg_index.indisclustered update would only be visible in one database. Since reindexing shared system catalogs is now fully transactional and crash-safe, the former special cases in REINDEX behavior have been removed; shared catalogs are treated the same as non-shared. This commit does not do anything about the recently-discussed problem of deadlocks between VACUUM FULL/CLUSTER on a system catalog and other concurrent queries; will address that in a separate patch. As a stopgap, parallel_schedule has been tweaked to run vacuum.sql by itself, to avoid such failures during the regression tests.
1 parent 7fc30c4 commit b9b8831

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

54 files changed

+2315
-584
lines changed

contrib/oid2name/oid2name.c

+11-11
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
* Originally by
66
* B. Palmer, bpalmer@crimelabs.net 1-17-2001
77
*
8-
* $PostgreSQL: pgsql/contrib/oid2name/oid2name.c,v 1.36 2009/06/11 14:48:51 momjian Exp $
8+
* $PostgreSQL: pgsql/contrib/oid2name/oid2name.c,v 1.37 2010/02/07 20:48:08 tgl Exp $
99
*/
1010
#include "postgres_fe.h"
1111

@@ -440,7 +440,7 @@ sql_exec_dumpalldbs(PGconn *conn, struct options * opts)
440440
/* get the oid and database name from the system pg_database table */
441441
snprintf(todo, sizeof(todo),
442442
"SELECT d.oid AS \"Oid\", datname AS \"Database Name\", "
443-
"spcname AS \"Tablespace\" FROM pg_database d JOIN pg_tablespace t ON "
443+
"spcname AS \"Tablespace\" FROM pg_catalog.pg_database d JOIN pg_catalog.pg_tablespace t ON "
444444
"(dattablespace = t.oid) ORDER BY 2");
445445

446446
sql_exec(conn, todo, opts->quiet);
@@ -456,10 +456,10 @@ sql_exec_dumpalltables(PGconn *conn, struct options * opts)
456456
char *addfields = ",c.oid AS \"Oid\", nspname AS \"Schema\", spcname as \"Tablespace\" ";
457457

458458
snprintf(todo, sizeof(todo),
459-
"SELECT relfilenode as \"Filenode\", relname as \"Table Name\" %s "
459+
"SELECT pg_catalog.pg_relation_filenode(c.oid) as \"Filenode\", relname as \"Table Name\" %s "
460460
"FROM pg_class c "
461461
" LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace "
462-
" LEFT JOIN pg_catalog.pg_database d ON d.datname = current_database(),"
462+
" LEFT JOIN pg_catalog.pg_database d ON d.datname = pg_catalog.current_database(),"
463463
" pg_catalog.pg_tablespace t "
464464
"WHERE relkind IN ('r'%s%s) AND "
465465
" %s"
@@ -477,7 +477,7 @@ sql_exec_dumpalltables(PGconn *conn, struct options * opts)
477477
}
478478

479479
/*
480-
* Show oid, relfilenode, name, schema and tablespace for each of the
480+
* Show oid, filenode, name, schema and tablespace for each of the
481481
* given objects in the current database.
482482
*/
483483
void
@@ -492,7 +492,7 @@ sql_exec_searchtables(PGconn *conn, struct options * opts)
492492
bool written = false;
493493
char *addfields = ",c.oid AS \"Oid\", nspname AS \"Schema\", spcname as \"Tablespace\" ";
494494

495-
/* get tables qualifiers, whether names, relfilenodes, or OIDs */
495+
/* get tables qualifiers, whether names, filenodes, or OIDs */
496496
comma_oids = get_comma_elts(opts->oids);
497497
comma_tables = get_comma_elts(opts->tables);
498498
comma_filenodes = get_comma_elts(opts->filenodes);
@@ -511,7 +511,7 @@ sql_exec_searchtables(PGconn *conn, struct options * opts)
511511
{
512512
if (written)
513513
ptr += sprintf(ptr, " OR ");
514-
ptr += sprintf(ptr, "c.relfilenode IN (%s)", comma_filenodes);
514+
ptr += sprintf(ptr, "pg_catalog.pg_relation_filenode(c.oid) IN (%s)", comma_filenodes);
515515
written = true;
516516
}
517517
if (opts->tables->num > 0)
@@ -527,10 +527,10 @@ sql_exec_searchtables(PGconn *conn, struct options * opts)
527527
/* now build the query */
528528
todo = (char *) myalloc(650 + strlen(qualifiers));
529529
snprintf(todo, 650 + strlen(qualifiers),
530-
"SELECT relfilenode as \"Filenode\", relname as \"Table Name\" %s\n"
531-
"FROM pg_class c \n"
530+
"SELECT pg_catalog.pg_relation_filenode(c.oid) as \"Filenode\", relname as \"Table Name\" %s\n"
531+
"FROM pg_catalog.pg_class c \n"
532532
" LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace \n"
533-
" LEFT JOIN pg_catalog.pg_database d ON d.datname = current_database(),\n"
533+
" LEFT JOIN pg_catalog.pg_database d ON d.datname = pg_catalog.current_database(),\n"
534534
" pg_catalog.pg_tablespace t \n"
535535
"WHERE relkind IN ('r', 'i', 'S', 't') AND \n"
536536
" t.oid = CASE\n"
@@ -554,7 +554,7 @@ sql_exec_dumpalltbspc(PGconn *conn, struct options * opts)
554554

555555
snprintf(todo, sizeof(todo),
556556
"SELECT oid AS \"Oid\", spcname as \"Tablespace Name\"\n"
557-
"FROM pg_tablespace");
557+
"FROM pg_catalog.pg_tablespace");
558558

559559
sql_exec(conn, todo, opts->quiet);
560560
}

doc/src/sgml/catalogs.sgml

+4-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
<!-- $PostgreSQL: pgsql/doc/src/sgml/catalogs.sgml,v 2.220 2010/02/03 17:25:05 momjian Exp $ -->
1+
<!-- $PostgreSQL: pgsql/doc/src/sgml/catalogs.sgml,v 2.221 2010/02/07 20:48:09 tgl Exp $ -->
22
<!--
33
Documentation of the system catalogs, directed toward PostgreSQL developers
44
-->
@@ -1473,7 +1473,9 @@
14731473
<entry><structfield>relfilenode</structfield></entry>
14741474
<entry><type>oid</type></entry>
14751475
<entry></entry>
1476-
<entry>Name of the on-disk file of this relation; 0 if none</entry>
1476+
<entry>Name of the on-disk file of this relation; zero means this
1477+
is a <quote>mapped</> relation whose disk file name is determined
1478+
by low-level state</entry>
14771479
</row>
14781480

14791481
<row>

doc/src/sgml/diskusage.sgml

+15-19
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
<!-- $PostgreSQL: pgsql/doc/src/sgml/diskusage.sgml,v 1.19 2010/02/03 17:25:05 momjian Exp $ -->
1+
<!-- $PostgreSQL: pgsql/doc/src/sgml/diskusage.sgml,v 1.20 2010/02/07 20:48:09 tgl Exp $ -->
22

33
<chapter id="diskusage">
44
<title>Monitoring Disk Usage</title>
@@ -29,30 +29,31 @@
2929
</para>
3030

3131
<para>
32-
You can monitor disk space three ways: using
33-
SQL functions listed in <xref linkend="functions-admin-dbsize">,
34-
using <command>VACUUM</> information, and from the command line
35-
using the tools in <filename>contrib/oid2name</>. The SQL functions
36-
are the easiest to use and report information about tables, tables with
37-
indexes and long value storage (TOAST), databases, and tablespaces.
32+
You can monitor disk space in three ways:
33+
using the SQL functions listed in <xref linkend="functions-admin-dbsize">,
34+
using the tools in <filename>contrib/oid2name</>, or
35+
using manual inspection of the system catalogs.
36+
The SQL functions are the easiest to use and are generally recommended.
37+
<filename>contrib/oid2name</> is described in <xref linkend="oid2name">.
38+
The remainder of this section shows how to do it by inspection of the
39+
system catalogs.
3840
</para>
3941

4042
<para>
4143
Using <application>psql</> on a recently vacuumed or analyzed database,
4244
you can issue queries to see the disk usage of any table:
4345
<programlisting>
44-
SELECT relfilenode, relpages FROM pg_class WHERE relname = 'customer';
46+
SELECT pg_relation_filepath(oid), relpages FROM pg_class WHERE relname = 'customer';
4547

46-
relfilenode | relpages
47-
-------------+----------
48-
16806 | 60
48+
pg_relation_filepath | relpages
49+
----------------------+----------
50+
base/16384/16806 | 60
4951
(1 row)
5052
</programlisting>
5153
Each page is typically 8 kilobytes. (Remember, <structfield>relpages</>
5254
is only updated by <command>VACUUM</>, <command>ANALYZE</>, and
53-
a few DDL commands such as <command>CREATE INDEX</>.) The
54-
<structfield>relfilenode</> value is of interest if you want to examine
55-
the table's disk file directly.
55+
a few DDL commands such as <command>CREATE INDEX</>.) The file pathname
56+
is of interest if you want to examine the table's disk file directly.
5657
</para>
5758

5859
<para>
@@ -107,11 +108,6 @@ ORDER BY relpages DESC;
107108
customer | 3144
108109
</programlisting>
109110
</para>
110-
111-
<para>
112-
You can also use <filename>contrib/oid2name</> to show disk usage; see
113-
<xref linkend="oid2name"> for more details and examples.
114-
</para>
115111
</sect1>
116112

117113
<sect1 id="disk-full">

doc/src/sgml/func.sgml

+63-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
<!-- $PostgreSQL: pgsql/doc/src/sgml/func.sgml,v 1.500 2010/02/01 15:38:21 rhaas Exp $ -->
1+
<!-- $PostgreSQL: pgsql/doc/src/sgml/func.sgml,v 1.501 2010/02/07 20:48:09 tgl Exp $ -->
22

33
<chapter id="functions">
44
<title>Functions and Operators</title>
@@ -13434,6 +13434,68 @@ postgres=# SELECT * FROM pg_xlogfile_name_offset(pg_stop_backup());
1343413434
appropriate.
1343513435
</para>
1343613436

13437+
<para>
13438+
The functions shown in <xref linkend="functions-admin-dblocation"> assist
13439+
in identifying the specific disk files associated with database objects.
13440+
</para>
13441+
13442+
<indexterm>
13443+
<primary>pg_relation_filenode</primary>
13444+
</indexterm>
13445+
<indexterm>
13446+
<primary>pg_relation_filepath</primary>
13447+
</indexterm>
13448+
13449+
<table id="functions-admin-dblocation">
13450+
<title>Database Object Location Functions</title>
13451+
<tgroup cols="3">
13452+
<thead>
13453+
<row><entry>Name</entry> <entry>Return Type</entry> <entry>Description</entry>
13454+
</row>
13455+
</thead>
13456+
13457+
<tbody>
13458+
<row>
13459+
<entry>
13460+
<literal><function>pg_relation_filenode</function>(<parameter>relation</parameter> <type>regclass</type>)</literal>
13461+
</entry>
13462+
<entry><type>oid</type></entry>
13463+
<entry>
13464+
Filenode number of the relation with the specified OID or name
13465+
</entry>
13466+
</row>
13467+
<row>
13468+
<entry>
13469+
<literal><function>pg_relation_filepath</function>(<parameter>relation</parameter> <type>regclass</type>)</literal>
13470+
</entry>
13471+
<entry><type>text</type></entry>
13472+
<entry>
13473+
File path name of the relation with the specified OID or name
13474+
</entry>
13475+
</row>
13476+
</tbody>
13477+
</tgroup>
13478+
</table>
13479+
13480+
<para>
13481+
<function>pg_relation_filenode</> accepts the OID or name of a table,
13482+
index, sequence, or toast table, and returns the <quote>filenode</> number
13483+
currently assigned to it. The filenode is the base component of the file
13484+
name(s) used for the relation (see <xref linkend="storage-file-layout">
13485+
for more information). For most tables the result is the same as
13486+
<structname>pg_class</>.<structfield>relfilenode</>, but for certain
13487+
system catalogs <structfield>relfilenode</> is zero and this function must
13488+
be used to get the correct value. The function returns NULL if passed
13489+
a relation that does not have storage, such as a view.
13490+
</para>
13491+
13492+
<para>
13493+
<function>pg_relation_filepath</> is similar to
13494+
<function>pg_relation_filenode</>, but it returns the entire file pathname
13495+
(relative to the database cluster's data directory <varname>PGDATA</>) of
13496+
the relation.
13497+
</para>
13498+
1343713499
<para>
1343813500
The functions shown in <xref
1343913501
linkend="functions-admin-genfile"> provide native access to

doc/src/sgml/pgbuffercache.sgml

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
<!-- $PostgreSQL: pgsql/doc/src/sgml/pgbuffercache.sgml,v 2.5 2009/05/18 11:08:24 petere Exp $ -->
1+
<!-- $PostgreSQL: pgsql/doc/src/sgml/pgbuffercache.sgml,v 2.6 2010/02/07 20:48:09 tgl Exp $ -->
22

33
<sect1 id="pgbuffercache">
44
<title>pg_buffercache</title>
@@ -56,7 +56,7 @@
5656
<entry><structfield>relfilenode</structfield></entry>
5757
<entry><type>oid</type></entry>
5858
<entry><literal>pg_class.relfilenode</literal></entry>
59-
<entry>Relfilenode of the relation</entry>
59+
<entry>Filenode number of the relation</entry>
6060
</row>
6161

6262
<row>
@@ -137,7 +137,7 @@
137137
<programlisting>
138138
regression=# SELECT c.relname, count(*) AS buffers
139139
FROM pg_buffercache b INNER JOIN pg_class c
140-
ON b.relfilenode = c.relfilenode AND
140+
ON b.relfilenode = pg_relation_filenode(c.oid) AND
141141
b.reldatabase IN (0, (SELECT oid FROM pg_database
142142
WHERE datname = current_database()))
143143
GROUP BY c.relname

doc/src/sgml/ref/cluster.sgml

+6-6
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
<!--
2-
$PostgreSQL: pgsql/doc/src/sgml/ref/cluster.sgml,v 1.47 2009/09/19 10:23:26 petere Exp $
2+
$PostgreSQL: pgsql/doc/src/sgml/ref/cluster.sgml,v 1.48 2010/02/07 20:48:09 tgl Exp $
33
PostgreSQL documentation
44
-->
55

@@ -30,12 +30,12 @@ CLUSTER [VERBOSE]
3030
<title>Description</title>
3131

3232
<para>
33-
<command>CLUSTER</command> instructs <productname>PostgreSQL</productname>
33+
<command>CLUSTER</command> instructs <productname>PostgreSQL</productname>
3434
to cluster the table specified
3535
by <replaceable class="parameter">table_name</replaceable>
3636
based on the index specified by
3737
<replaceable class="parameter">index_name</replaceable>. The index must
38-
already have been defined on
38+
already have been defined on
3939
<replaceable class="parameter">table_name</replaceable>.
4040
</para>
4141

@@ -46,9 +46,9 @@ CLUSTER [VERBOSE]
4646
not clustered. That is, no attempt is made to store new or
4747
updated rows according to their index order. (If one wishes, one can
4848
periodically recluster by issuing the command again. Also, setting
49-
the table's <literal>FILLFACTOR</literal> storage parameter to less than 100% can aid
50-
in preserving cluster ordering during updates, since updated rows
51-
are preferentially kept on the same page.)
49+
the table's <literal>FILLFACTOR</literal> storage parameter to less than
50+
100% can aid in preserving cluster ordering during updates, since updated
51+
rows are kept on the same page if enough space is available there.)
5252
</para>
5353

5454
<para>

doc/src/sgml/ref/reindex.sgml

+10-34
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
<!--
2-
$PostgreSQL: pgsql/doc/src/sgml/ref/reindex.sgml,v 1.38 2008/11/14 10:22:47 petere Exp $
2+
$PostgreSQL: pgsql/doc/src/sgml/ref/reindex.sgml,v 1.39 2010/02/07 20:48:09 tgl Exp $
33
PostgreSQL documentation
44
-->
55

@@ -77,7 +77,7 @@ REINDEX { INDEX | TABLE | DATABASE | SYSTEM } <replaceable class="PARAMETER">nam
7777
</itemizedlist>
7878
</para>
7979
</refsect1>
80-
80+
8181
<refsect1>
8282
<title>Parameters</title>
8383

@@ -106,9 +106,9 @@ REINDEX { INDEX | TABLE | DATABASE | SYSTEM } <replaceable class="PARAMETER">nam
106106
<listitem>
107107
<para>
108108
Recreate all indexes within the current database.
109-
Indexes on shared system catalogs are skipped except in stand-alone mode
110-
(see below). This form of <command>REINDEX</command> cannot be executed
111-
inside a transaction block.
109+
Indexes on shared system catalogs are also processed.
110+
This form of <command>REINDEX</command> cannot be executed inside a
111+
transaction block.
112112
</para>
113113
</listitem>
114114
</varlistentry>
@@ -118,8 +118,8 @@ REINDEX { INDEX | TABLE | DATABASE | SYSTEM } <replaceable class="PARAMETER">nam
118118
<listitem>
119119
<para>
120120
Recreate all indexes on system catalogs within the current database.
121-
Indexes on user tables are not processed. Also, indexes on shared
122-
system catalogs are skipped except in stand-alone mode (see below).
121+
Indexes on shared system catalogs are included.
122+
Indexes on user tables are not processed.
123123
This form of <command>REINDEX</command> cannot be executed inside a
124124
transaction block.
125125
</para>
@@ -134,7 +134,7 @@ REINDEX { INDEX | TABLE | DATABASE | SYSTEM } <replaceable class="PARAMETER">nam
134134
reindexed. Index and table names can be schema-qualified.
135135
Presently, <command>REINDEX DATABASE</> and <command>REINDEX SYSTEM</>
136136
can only reindex the current database, so their parameter must match
137-
the current database's name.
137+
the current database's name.
138138
</para>
139139
</listitem>
140140
</varlistentry>
@@ -156,7 +156,7 @@ REINDEX { INDEX | TABLE | DATABASE | SYSTEM } <replaceable class="PARAMETER">nam
156156
<para>
157157
If you suspect corruption of an index on a user table, you can
158158
simply rebuild that index, or all indexes on the table, using
159-
<command>REINDEX INDEX</command> or <command>REINDEX TABLE</command>.
159+
<command>REINDEX INDEX</command> or <command>REINDEX TABLE</command>.
160160
</para>
161161

162162
<para>
@@ -197,30 +197,6 @@ REINDEX { INDEX | TABLE | DATABASE | SYSTEM } <replaceable class="PARAMETER">nam
197197
have been completed.
198198
</para>
199199

200-
<para>
201-
If corruption is suspected in the indexes of any of the shared
202-
system catalogs (which are <structname>pg_authid</structname>,
203-
<structname>pg_auth_members</structname>,
204-
<structname>pg_database</structname>,
205-
<structname>pg_pltemplate</structname>,
206-
<structname>pg_shdepend</structname>,
207-
<structname>pg_shdescription</structname>, and
208-
<structname>pg_tablespace</structname>), then a standalone server
209-
must be used to repair it. <command>REINDEX</> will not process
210-
shared catalogs in multiuser mode.
211-
</para>
212-
213-
<para>
214-
For all indexes except the shared system catalogs, <command>REINDEX</>
215-
is crash-safe and transaction-safe. <command>REINDEX</> is not
216-
crash-safe for shared indexes, which is why this case is disallowed
217-
during normal operation. If a failure occurs while reindexing one
218-
of these catalogs in standalone mode, it will not be possible to
219-
restart the regular server until the problem is rectified. (The
220-
typical symptom of a partially rebuilt shared index is <quote>index is not
221-
a btree</> errors.)
222-
</para>
223-
224200
<para>
225201
<command>REINDEX</command> is similar to a drop and recreate of the index
226202
in that the index contents are rebuilt from scratch. However, the locking
@@ -290,7 +266,7 @@ broken_db=&gt; \q
290266
</programlisting>
291267
</para>
292268
</refsect1>
293-
269+
294270
<refsect1>
295271
<title>Compatibility</title>
296272

0 commit comments

Comments
 (0)