Skip to content

Commit cfdd03f

Browse files
committed
Allow CLUSTER on partitioned tables
This is essentially the same as applying VACUUM FULL to a partitioned table, which has been supported since commit 3c3bb99 (March 2017). While there's no great use case in applying CLUSTER to partitioned tables, we don't have any strong reason not to allow it either. For now, partitioned indexes cannot be marked clustered, so an index must always be specified. While at it, rename some variables that were RangeVars during the development that led to 8bc717c but never made it that way to the source tree; there's no need to perpetuate names that have always been more confusing than helpful. Author: Justin Pryzby <pryzby@telsasoft.com> Reviewed-by: Matthias van de Meent <boekewurm+postgres@gmail.com> Discussion: https://postgr.es/m/20201028003312.GU9241@telsasoft.com Discussion: https://postgr.es/m/20200611153502.GT14879@telsasoft.com
1 parent b7c485f commit cfdd03f

File tree

7 files changed

+249
-97
lines changed

7 files changed

+249
-97
lines changed

doc/src/sgml/ref/cluster.sgml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,13 @@ CLUSTER [VERBOSE]
196196
in the <structname>pg_stat_progress_cluster</structname> view. See
197197
<xref linkend="cluster-progress-reporting"/> for details.
198198
</para>
199+
200+
<para>
201+
Clustering a partitioned table clusters each of its partitions using the
202+
partition of the specified partitioned index. When clustering a partitioned
203+
table, the index may not be omitted.
204+
</para>
205+
199206
</refsect1>
200207

201208
<refsect1>

src/backend/commands/cluster.c

Lines changed: 165 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,9 @@
3232
#include "catalog/index.h"
3333
#include "catalog/namespace.h"
3434
#include "catalog/objectaccess.h"
35+
#include "catalog/partition.h"
3536
#include "catalog/pg_am.h"
37+
#include "catalog/pg_inherits.h"
3638
#include "catalog/toasting.h"
3739
#include "commands/cluster.h"
3840
#include "commands/defrem.h"
@@ -68,11 +70,14 @@ typedef struct
6870
} RelToCluster;
6971

7072

73+
static void cluster_multiple_rels(List *rtcs, ClusterParams *params);
7174
static void rebuild_relation(Relation OldHeap, Oid indexOid, bool verbose);
7275
static void copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
7376
bool verbose, bool *pSwapToastByContent,
7477
TransactionId *pFreezeXid, MultiXactId *pCutoffMulti);
7578
static List *get_tables_to_cluster(MemoryContext cluster_context);
79+
static List *get_tables_to_cluster_partitioned(MemoryContext cluster_context,
80+
Oid indexOid);
7681

7782

7883
/*---------------------------------------------------------------------------
@@ -105,6 +110,10 @@ cluster(ParseState *pstate, ClusterStmt *stmt, bool isTopLevel)
105110
ListCell *lc;
106111
ClusterParams params = {0};
107112
bool verbose = false;
113+
Relation rel = NULL;
114+
Oid indexOid = InvalidOid;
115+
MemoryContext cluster_context;
116+
List *rtcs;
108117

109118
/* Parse option list */
110119
foreach(lc, stmt->params)
@@ -126,11 +135,13 @@ cluster(ParseState *pstate, ClusterStmt *stmt, bool isTopLevel)
126135
if (stmt->relation != NULL)
127136
{
128137
/* This is the single-relation case. */
129-
Oid tableOid,
130-
indexOid = InvalidOid;
131-
Relation rel;
138+
Oid tableOid;
132139

133-
/* Find, lock, and check permissions on the table */
140+
/*
141+
* Find, lock, and check permissions on the table. We obtain
142+
* AccessExclusiveLock right away to avoid lock-upgrade hazard in the
143+
* single-transaction case.
144+
*/
134145
tableOid = RangeVarGetRelidExtended(stmt->relation,
135146
AccessExclusiveLock,
136147
0,
@@ -146,14 +157,6 @@ cluster(ParseState *pstate, ClusterStmt *stmt, bool isTopLevel)
146157
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
147158
errmsg("cannot cluster temporary tables of other sessions")));
148159

149-
/*
150-
* Reject clustering a partitioned table.
151-
*/
152-
if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
153-
ereport(ERROR,
154-
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
155-
errmsg("cannot cluster a partitioned table")));
156-
157160
if (stmt->indexname == NULL)
158161
{
159162
ListCell *index;
@@ -188,71 +191,101 @@ cluster(ParseState *pstate, ClusterStmt *stmt, bool isTopLevel)
188191
stmt->indexname, stmt->relation->relname)));
189192
}
190193

191-
/* close relation, keep lock till commit */
192-
table_close(rel, NoLock);
194+
if (rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
195+
{
196+
/* close relation, keep lock till commit */
197+
table_close(rel, NoLock);
193198

194-
/* Do the job. */
195-
cluster_rel(tableOid, indexOid, &params);
199+
/* Do the job. */
200+
cluster_rel(tableOid, indexOid, &params);
201+
202+
return;
203+
}
204+
}
205+
206+
/*
207+
* By here, we know we are in a multi-table situation. In order to avoid
208+
* holding locks for too long, we want to process each table in its own
209+
* transaction. This forces us to disallow running inside a user
210+
* transaction block.
211+
*/
212+
PreventInTransactionBlock(isTopLevel, "CLUSTER");
213+
214+
/* Also, we need a memory context to hold our list of relations */
215+
cluster_context = AllocSetContextCreate(PortalContext,
216+
"Cluster",
217+
ALLOCSET_DEFAULT_SIZES);
218+
219+
/*
220+
* Either we're processing a partitioned table, or we were not given any
221+
* table name at all. In either case, obtain a list of relations to
222+
* process.
223+
*
224+
* In the former case, an index name must have been given, so we don't
225+
* need to recheck its "indisclustered" bit, but we have to check that it
226+
* is an index that we can cluster on. In the latter case, we set the
227+
* option bit to have indisclustered verified.
228+
*
229+
* Rechecking the relation itself is necessary here in all cases.
230+
*/
231+
params.options |= CLUOPT_RECHECK;
232+
if (rel != NULL)
233+
{
234+
Assert(rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE);
235+
check_index_is_clusterable(rel, indexOid, true, AccessShareLock);
236+
rtcs = get_tables_to_cluster_partitioned(cluster_context, indexOid);
237+
238+
/* close relation, releasing lock on parent table */
239+
table_close(rel, AccessExclusiveLock);
196240
}
197241
else
198242
{
199-
/*
200-
* This is the "multi relation" case. We need to cluster all tables
201-
* that have some index with indisclustered set.
202-
*/
203-
MemoryContext cluster_context;
204-
List *rvs;
205-
ListCell *rv;
243+
rtcs = get_tables_to_cluster(cluster_context);
244+
params.options |= CLUOPT_RECHECK_ISCLUSTERED;
245+
}
206246

207-
/*
208-
* We cannot run this form of CLUSTER inside a user transaction block;
209-
* we'd be holding locks way too long.
210-
*/
211-
PreventInTransactionBlock(isTopLevel, "CLUSTER");
247+
/* Do the job. */
248+
cluster_multiple_rels(rtcs, &params);
212249

213-
/*
214-
* Create special memory context for cross-transaction storage.
215-
*
216-
* Since it is a child of PortalContext, it will go away even in case
217-
* of error.
218-
*/
219-
cluster_context = AllocSetContextCreate(PortalContext,
220-
"Cluster",
221-
ALLOCSET_DEFAULT_SIZES);
250+
/* Start a new transaction for the cleanup work. */
251+
StartTransactionCommand();
222252

223-
/*
224-
* Build the list of relations to cluster. Note that this lives in
225-
* cluster_context.
226-
*/
227-
rvs = get_tables_to_cluster(cluster_context);
253+
/* Clean up working storage */
254+
MemoryContextDelete(cluster_context);
255+
}
228256

229-
/* Commit to get out of starting transaction */
230-
PopActiveSnapshot();
231-
CommitTransactionCommand();
257+
/*
258+
* Given a list of relations to cluster, process each of them in a separate
259+
* transaction.
260+
*
261+
* We expect to be in a transaction at start, but there isn't one when we
262+
* return.
263+
*/
264+
static void
265+
cluster_multiple_rels(List *rtcs, ClusterParams *params)
266+
{
267+
ListCell *lc;
232268

233-
/* Ok, now that we've got them all, cluster them one by one */
234-
foreach(rv, rvs)
235-
{
236-
RelToCluster *rvtc = (RelToCluster *) lfirst(rv);
237-
ClusterParams cluster_params = params;
269+
/* Commit to get out of starting transaction */
270+
PopActiveSnapshot();
271+
CommitTransactionCommand();
238272

239-
/* Start a new transaction for each relation. */
240-
StartTransactionCommand();
241-
/* functions in indexes may want a snapshot set */
242-
PushActiveSnapshot(GetTransactionSnapshot());
243-
/* Do the job. */
244-
cluster_params.options |= CLUOPT_RECHECK;
245-
cluster_rel(rvtc->tableOid, rvtc->indexOid,
246-
&cluster_params);
247-
PopActiveSnapshot();
248-
CommitTransactionCommand();
249-
}
273+
/* Cluster the tables, each in a separate transaction */
274+
foreach(lc, rtcs)
275+
{
276+
RelToCluster *rtc = (RelToCluster *) lfirst(lc);
250277

251-
/* Start a new transaction for the cleanup work. */
278+
/* Start a new transaction for each relation. */
252279
StartTransactionCommand();
253280

254-
/* Clean up working storage */
255-
MemoryContextDelete(cluster_context);
281+
/* functions in indexes may want a snapshot set */
282+
PushActiveSnapshot(GetTransactionSnapshot());
283+
284+
/* Do the job. */
285+
cluster_rel(rtc->tableOid, rtc->indexOid, params);
286+
287+
PopActiveSnapshot();
288+
CommitTransactionCommand();
256289
}
257290
}
258291

@@ -327,10 +360,11 @@ cluster_rel(Oid tableOid, Oid indexOid, ClusterParams *params)
327360
/*
328361
* Silently skip a temp table for a remote session. Only doing this
329362
* check in the "recheck" case is appropriate (which currently means
330-
* somebody is executing a database-wide CLUSTER), because there is
331-
* another check in cluster() which will stop any attempt to cluster
332-
* remote temp tables by name. There is another check in cluster_rel
333-
* which is redundant, but we leave it for extra safety.
363+
* somebody is executing a database-wide CLUSTER or on a partitioned
364+
* table), because there is another check in cluster() which will stop
365+
* any attempt to cluster remote temp tables by name. There is
366+
* another check in cluster_rel which is redundant, but we leave it
367+
* for extra safety.
334368
*/
335369
if (RELATION_IS_OTHER_TEMP(OldHeap))
336370
{
@@ -352,9 +386,11 @@ cluster_rel(Oid tableOid, Oid indexOid, ClusterParams *params)
352386
}
353387

354388
/*
355-
* Check that the index is still the one with indisclustered set.
389+
* Check that the index is still the one with indisclustered set,
390+
* if needed.
356391
*/
357-
if (!get_index_isclustered(indexOid))
392+
if ((params->options & CLUOPT_RECHECK_ISCLUSTERED) != 0 &&
393+
!get_index_isclustered(indexOid))
358394
{
359395
relation_close(OldHeap, AccessExclusiveLock);
360396
pgstat_progress_end_command();
@@ -415,6 +451,10 @@ cluster_rel(Oid tableOid, Oid indexOid, ClusterParams *params)
415451
return;
416452
}
417453

454+
Assert(OldHeap->rd_rel->relkind == RELKIND_RELATION ||
455+
OldHeap->rd_rel->relkind == RELKIND_MATVIEW ||
456+
OldHeap->rd_rel->relkind == RELKIND_TOASTVALUE);
457+
418458
/*
419459
* All predicate locks on the tuples or pages are about to be made
420460
* invalid, because we move tuples around. Promote them to relation
@@ -585,8 +625,8 @@ rebuild_relation(Relation OldHeap, Oid indexOid, bool verbose)
585625
TransactionId frozenXid;
586626
MultiXactId cutoffMulti;
587627

588-
/* Mark the correct index as clustered */
589628
if (OidIsValid(indexOid))
629+
/* Mark the correct index as clustered */
590630
mark_index_clustered(OldHeap, indexOid, true);
591631

592632
/* Remember info about rel before closing OldHeap */
@@ -1528,8 +1568,8 @@ finish_heap_swap(Oid OIDOldHeap, Oid OIDNewHeap,
15281568

15291569
/*
15301570
* Reset the relrewrite for the toast. The command-counter
1531-
* increment is required here as we are about to update
1532-
* the tuple that is updated as part of RenameRelationInternal.
1571+
* increment is required here as we are about to update the tuple
1572+
* that is updated as part of RenameRelationInternal.
15331573
*/
15341574
CommandCounterIncrement();
15351575
ResetRelRewrite(newrel->rd_rel->reltoastrelid);
@@ -1564,8 +1604,7 @@ get_tables_to_cluster(MemoryContext cluster_context)
15641604
HeapTuple indexTuple;
15651605
Form_pg_index index;
15661606
MemoryContext old_context;
1567-
RelToCluster *rvtc;
1568-
List *rvs = NIL;
1607+
List *rtcs = NIL;
15691608

15701609
/*
15711610
* Get all indexes that have indisclustered set and are owned by
@@ -1579,27 +1618,69 @@ get_tables_to_cluster(MemoryContext cluster_context)
15791618
scan = table_beginscan_catalog(indRelation, 1, &entry);
15801619
while ((indexTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
15811620
{
1621+
RelToCluster *rtc;
1622+
15821623
index = (Form_pg_index) GETSTRUCT(indexTuple);
15831624

15841625
if (!pg_class_ownercheck(index->indrelid, GetUserId()))
15851626
continue;
15861627

1587-
/*
1588-
* We have to build the list in a different memory context so it will
1589-
* survive the cross-transaction processing
1590-
*/
1628+
/* Use a permanent memory context for the result list */
15911629
old_context = MemoryContextSwitchTo(cluster_context);
15921630

1593-
rvtc = (RelToCluster *) palloc(sizeof(RelToCluster));
1594-
rvtc->tableOid = index->indrelid;
1595-
rvtc->indexOid = index->indexrelid;
1596-
rvs = lappend(rvs, rvtc);
1631+
rtc = (RelToCluster *) palloc(sizeof(RelToCluster));
1632+
rtc->tableOid = index->indrelid;
1633+
rtc->indexOid = index->indexrelid;
1634+
rtcs = lappend(rtcs, rtc);
15971635

15981636
MemoryContextSwitchTo(old_context);
15991637
}
16001638
table_endscan(scan);
16011639

16021640
relation_close(indRelation, AccessShareLock);
16031641

1604-
return rvs;
1642+
return rtcs;
1643+
}
1644+
1645+
/*
1646+
* Given an index on a partitioned table, return a list of RelToCluster for
1647+
* all the children leaves tables/indexes.
1648+
*
1649+
* Caller must hold lock on the table containing the index.
1650+
*
1651+
* Like expand_vacuum_rel, but here caller must hold AccessExclusiveLock
1652+
* on the table already.
1653+
*/
1654+
static List *
1655+
get_tables_to_cluster_partitioned(MemoryContext cluster_context, Oid indexOid)
1656+
{
1657+
List *inhoids;
1658+
ListCell *lc;
1659+
List *rtcs = NIL;
1660+
MemoryContext old_context;
1661+
1662+
/* Do not lock the children until they're processed */
1663+
inhoids = find_all_inheritors(indexOid, NoLock, NULL);
1664+
1665+
/* Use a permanent memory context for the result list */
1666+
old_context = MemoryContextSwitchTo(cluster_context);
1667+
1668+
foreach(lc, inhoids)
1669+
{
1670+
Oid indexrelid = lfirst_oid(lc);
1671+
Oid relid = IndexGetRelation(indexrelid, false);
1672+
RelToCluster *rtc;
1673+
1674+
/* consider only leaf indexes */
1675+
if (get_rel_relkind(indexrelid) != RELKIND_INDEX)
1676+
continue;
1677+
1678+
rtc = (RelToCluster *) palloc(sizeof(RelToCluster));
1679+
rtc->tableOid = relid;
1680+
rtc->indexOid = indexrelid;
1681+
rtcs = lappend(rtcs, rtc);
1682+
}
1683+
1684+
MemoryContextSwitchTo(old_context);
1685+
return rtcs;
16051686
}

0 commit comments

Comments
 (0)