Skip to content

Commit 30833ba

Browse files
committed
Expand partitioned tables in PartDesc order.
Previously, we expanded the inheritance hierarchy in the order in which find_all_inheritors had locked the tables, but that turns out to block quite a bit of useful optimization. For example, a partition-wise join can't count on two tables with matching bounds to get expanded in the same order. Where possible, this change results in expanding partitioned tables in *bound* order. Bound order isn't well-defined for a list-partitioned table with a null-accepting partition or for a list-partitioned table where the bounds for a single partition are interleaved with other partitions. However, when expansion in bound order is possible, it opens up further opportunities for optimization, such as strength-reducing MergeAppend to Append when the expansion order matches the desired sort order. Patch by me, with cosmetic revisions by Ashutosh Bapat. Discussion: http://postgr.es/m/CA+TgmoZrKj7kEzcMSum3aXV4eyvvbh9WD=c6m=002WMheDyE3A@mail.gmail.com
1 parent 6708e44 commit 30833ba

File tree

2 files changed

+220
-112
lines changed

2 files changed

+220
-112
lines changed

src/backend/optimizer/prep/prepunion.c

Lines changed: 218 additions & 110 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
#include "access/heapam.h"
3434
#include "access/htup_details.h"
3535
#include "access/sysattr.h"
36+
#include "catalog/partition.h"
3637
#include "catalog/pg_inherits_fn.h"
3738
#include "catalog/pg_type.h"
3839
#include "miscadmin.h"
@@ -100,6 +101,19 @@ static List *generate_append_tlist(List *colTypes, List *colCollations,
100101
static List *generate_setop_grouplist(SetOperationStmt *op, List *targetlist);
101102
static void expand_inherited_rtentry(PlannerInfo *root, RangeTblEntry *rte,
102103
Index rti);
104+
static void expand_partitioned_rtentry(PlannerInfo *root,
105+
RangeTblEntry *parentrte,
106+
Index parentRTindex, Relation parentrel,
107+
PlanRowMark *parentrc, PartitionDesc partdesc,
108+
LOCKMODE lockmode,
109+
bool *has_child, List **appinfos,
110+
List **partitioned_child_rels);
111+
static void expand_single_inheritance_child(PlannerInfo *root,
112+
RangeTblEntry *parentrte,
113+
Index parentRTindex, Relation parentrel,
114+
PlanRowMark *parentrc, Relation childrel,
115+
bool *has_child, List **appinfos,
116+
List **partitioned_child_rels);
103117
static void make_inh_translation_list(Relation oldrelation,
104118
Relation newrelation,
105119
Index newvarno,
@@ -1455,131 +1469,62 @@ expand_inherited_rtentry(PlannerInfo *root, RangeTblEntry *rte, Index rti)
14551469
/* Scan the inheritance set and expand it */
14561470
appinfos = NIL;
14571471
has_child = false;
1458-
foreach(l, inhOIDs)
1472+
if (RelationGetPartitionDesc(oldrelation) != NULL)
14591473
{
1460-
Oid childOID = lfirst_oid(l);
1461-
Relation newrelation;
1462-
RangeTblEntry *childrte;
1463-
Index childRTindex;
1464-
AppendRelInfo *appinfo;
1465-
1466-
/* Open rel if needed; we already have required locks */
1467-
if (childOID != parentOID)
1468-
newrelation = heap_open(childOID, NoLock);
1469-
else
1470-
newrelation = oldrelation;
1471-
1472-
/*
1473-
* It is possible that the parent table has children that are temp
1474-
* tables of other backends. We cannot safely access such tables
1475-
* (because of buffering issues), and the best thing to do seems to be
1476-
* to silently ignore them.
1477-
*/
1478-
if (childOID != parentOID && RELATION_IS_OTHER_TEMP(newrelation))
1479-
{
1480-
heap_close(newrelation, lockmode);
1481-
continue;
1482-
}
1483-
14841474
/*
1485-
* Build an RTE for the child, and attach to query's rangetable list.
1486-
* We copy most fields of the parent's RTE, but replace relation OID
1487-
* and relkind, and set inh = false. Also, set requiredPerms to zero
1488-
* since all required permissions checks are done on the original RTE.
1489-
* Likewise, set the child's securityQuals to empty, because we only
1490-
* want to apply the parent's RLS conditions regardless of what RLS
1491-
* properties individual children may have. (This is an intentional
1492-
* choice to make inherited RLS work like regular permissions checks.)
1493-
* The parent securityQuals will be propagated to children along with
1494-
* other base restriction clauses, so we don't need to do it here.
1475+
* If this table has partitions, recursively expand them in the order
1476+
* in which they appear in the PartitionDesc. But first, expand the
1477+
* parent itself.
14951478
*/
1496-
childrte = copyObject(rte);
1497-
childrte->relid = childOID;
1498-
childrte->relkind = newrelation->rd_rel->relkind;
1499-
childrte->inh = false;
1500-
childrte->requiredPerms = 0;
1501-
childrte->securityQuals = NIL;
1502-
parse->rtable = lappend(parse->rtable, childrte);
1503-
childRTindex = list_length(parse->rtable);
1504-
1479+
expand_single_inheritance_child(root, rte, rti, oldrelation, oldrc,
1480+
oldrelation,
1481+
&has_child, &appinfos,
1482+
&partitioned_child_rels);
1483+
expand_partitioned_rtentry(root, rte, rti, oldrelation, oldrc,
1484+
RelationGetPartitionDesc(oldrelation),
1485+
lockmode,
1486+
&has_child, &appinfos,
1487+
&partitioned_child_rels);
1488+
}
1489+
else
1490+
{
15051491
/*
1506-
* Build an AppendRelInfo for this parent and child, unless the child
1507-
* is a partitioned table.
1492+
* This table has no partitions. Expand any plain inheritance
1493+
* children in the order the OIDs were returned by
1494+
* find_all_inheritors.
15081495
*/
1509-
if (childrte->relkind != RELKIND_PARTITIONED_TABLE)
1496+
foreach(l, inhOIDs)
15101497
{
1511-
/* Remember if we saw a real child. */
1498+
Oid childOID = lfirst_oid(l);
1499+
Relation newrelation;
1500+
1501+
/* Open rel if needed; we already have required locks */
15121502
if (childOID != parentOID)
1513-
has_child = true;
1514-
1515-
appinfo = makeNode(AppendRelInfo);
1516-
appinfo->parent_relid = rti;
1517-
appinfo->child_relid = childRTindex;
1518-
appinfo->parent_reltype = oldrelation->rd_rel->reltype;
1519-
appinfo->child_reltype = newrelation->rd_rel->reltype;
1520-
make_inh_translation_list(oldrelation, newrelation, childRTindex,
1521-
&appinfo->translated_vars);
1522-
appinfo->parent_reloid = parentOID;
1523-
appinfos = lappend(appinfos, appinfo);
1503+
newrelation = heap_open(childOID, NoLock);
1504+
else
1505+
newrelation = oldrelation;
15241506

15251507
/*
1526-
* Translate the column permissions bitmaps to the child's attnums
1527-
* (we have to build the translated_vars list before we can do
1528-
* this). But if this is the parent table, leave copyObject's
1529-
* result alone.
1530-
*
1531-
* Note: we need to do this even though the executor won't run any
1532-
* permissions checks on the child RTE. The
1533-
* insertedCols/updatedCols bitmaps may be examined for
1534-
* trigger-firing purposes.
1508+
* It is possible that the parent table has children that are temp
1509+
* tables of other backends. We cannot safely access such tables
1510+
* (because of buffering issues), and the best thing to do seems
1511+
* to be to silently ignore them.
15351512
*/
1536-
if (childOID != parentOID)
1513+
if (childOID != parentOID && RELATION_IS_OTHER_TEMP(newrelation))
15371514
{
1538-
childrte->selectedCols = translate_col_privs(rte->selectedCols,
1539-
appinfo->translated_vars);
1540-
childrte->insertedCols = translate_col_privs(rte->insertedCols,
1541-
appinfo->translated_vars);
1542-
childrte->updatedCols = translate_col_privs(rte->updatedCols,
1543-
appinfo->translated_vars);
1515+
heap_close(newrelation, lockmode);
1516+
continue;
15441517
}
1545-
}
1546-
else
1547-
partitioned_child_rels = lappend_int(partitioned_child_rels,
1548-
childRTindex);
15491518

1550-
/*
1551-
* Build a PlanRowMark if parent is marked FOR UPDATE/SHARE.
1552-
*/
1553-
if (oldrc)
1554-
{
1555-
PlanRowMark *newrc = makeNode(PlanRowMark);
1556-
1557-
newrc->rti = childRTindex;
1558-
newrc->prti = rti;
1559-
newrc->rowmarkId = oldrc->rowmarkId;
1560-
/* Reselect rowmark type, because relkind might not match parent */
1561-
newrc->markType = select_rowmark_type(childrte, oldrc->strength);
1562-
newrc->allMarkTypes = (1 << newrc->markType);
1563-
newrc->strength = oldrc->strength;
1564-
newrc->waitPolicy = oldrc->waitPolicy;
1565-
1566-
/*
1567-
* We mark RowMarks for partitioned child tables as parent
1568-
* RowMarks so that the executor ignores them (except their
1569-
* existence means that the child tables be locked using
1570-
* appropriate mode).
1571-
*/
1572-
newrc->isParent = (childrte->relkind == RELKIND_PARTITIONED_TABLE);
1573-
1574-
/* Include child's rowmark type in parent's allMarkTypes */
1575-
oldrc->allMarkTypes |= newrc->allMarkTypes;
1519+
expand_single_inheritance_child(root, rte, rti, oldrelation, oldrc,
1520+
newrelation,
1521+
&has_child, &appinfos,
1522+
&partitioned_child_rels);
15761523

1577-
root->rowMarks = lappend(root->rowMarks, newrc);
1524+
/* Close child relations, but keep locks */
1525+
if (childOID != parentOID)
1526+
heap_close(newrelation, NoLock);
15781527
}
1579-
1580-
/* Close child relations, but keep locks */
1581-
if (childOID != parentOID)
1582-
heap_close(newrelation, NoLock);
15831528
}
15841529

15851530
heap_close(oldrelation, NoLock);
@@ -1620,6 +1565,169 @@ expand_inherited_rtentry(PlannerInfo *root, RangeTblEntry *rte, Index rti)
16201565
root->append_rel_list = list_concat(root->append_rel_list, appinfos);
16211566
}
16221567

1568+
static void
1569+
expand_partitioned_rtentry(PlannerInfo *root, RangeTblEntry *parentrte,
1570+
Index parentRTindex, Relation parentrel,
1571+
PlanRowMark *parentrc, PartitionDesc partdesc,
1572+
LOCKMODE lockmode,
1573+
bool *has_child, List **appinfos,
1574+
List **partitioned_child_rels)
1575+
{
1576+
int i;
1577+
1578+
check_stack_depth();
1579+
1580+
for (i = 0; i < partdesc->nparts; i++)
1581+
{
1582+
Oid childOID = partdesc->oids[i];
1583+
Relation childrel;
1584+
1585+
/* Open rel; we already have required locks */
1586+
childrel = heap_open(childOID, NoLock);
1587+
1588+
/* As in expand_inherited_rtentry, skip non-local temp tables */
1589+
if (RELATION_IS_OTHER_TEMP(childrel))
1590+
{
1591+
heap_close(childrel, lockmode);
1592+
continue;
1593+
}
1594+
1595+
expand_single_inheritance_child(root, parentrte, parentRTindex,
1596+
parentrel, parentrc, childrel,
1597+
has_child, appinfos,
1598+
partitioned_child_rels);
1599+
1600+
/* If this child is itself partitioned, recurse */
1601+
if (childrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
1602+
expand_partitioned_rtentry(root, parentrte, parentRTindex,
1603+
parentrel, parentrc,
1604+
RelationGetPartitionDesc(childrel),
1605+
lockmode,
1606+
has_child, appinfos,
1607+
partitioned_child_rels);
1608+
1609+
/* Close child relation, but keep locks */
1610+
heap_close(childrel, NoLock);
1611+
}
1612+
}
1613+
1614+
/*
1615+
* expand_single_inheritance_child
1616+
* Expand a single inheritance child, if needed.
1617+
*
1618+
* If this is a temp table of another backend, we'll return without doing
1619+
* anything at all. Otherwise, we'll set "has_child" to true, build a
1620+
* RangeTblEntry and either a PartitionedChildRelInfo or AppendRelInfo as
1621+
* appropriate, plus maybe a PlanRowMark.
1622+
*/
1623+
static void
1624+
expand_single_inheritance_child(PlannerInfo *root, RangeTblEntry *parentrte,
1625+
Index parentRTindex, Relation parentrel,
1626+
PlanRowMark *parentrc, Relation childrel,
1627+
bool *has_child, List **appinfos,
1628+
List **partitioned_child_rels)
1629+
{
1630+
Query *parse = root->parse;
1631+
Oid parentOID = RelationGetRelid(parentrel);
1632+
Oid childOID = RelationGetRelid(childrel);
1633+
RangeTblEntry *childrte;
1634+
Index childRTindex;
1635+
AppendRelInfo *appinfo;
1636+
1637+
/*
1638+
* Build an RTE for the child, and attach to query's rangetable list. We
1639+
* copy most fields of the parent's RTE, but replace relation OID and
1640+
* relkind, and set inh = false. Also, set requiredPerms to zero since
1641+
* all required permissions checks are done on the original RTE. Likewise,
1642+
* set the child's securityQuals to empty, because we only want to apply
1643+
* the parent's RLS conditions regardless of what RLS properties
1644+
* individual children may have. (This is an intentional choice to make
1645+
* inherited RLS work like regular permissions checks.) The parent
1646+
* securityQuals will be propagated to children along with other base
1647+
* restriction clauses, so we don't need to do it here.
1648+
*/
1649+
childrte = copyObject(parentrte);
1650+
childrte->relid = childOID;
1651+
childrte->relkind = childrel->rd_rel->relkind;
1652+
childrte->inh = false;
1653+
childrte->requiredPerms = 0;
1654+
childrte->securityQuals = NIL;
1655+
parse->rtable = lappend(parse->rtable, childrte);
1656+
childRTindex = list_length(parse->rtable);
1657+
1658+
/*
1659+
* Build an AppendRelInfo for this parent and child, unless the child is a
1660+
* partitioned table.
1661+
*/
1662+
if (childrte->relkind != RELKIND_PARTITIONED_TABLE)
1663+
{
1664+
/* Remember if we saw a real child. */
1665+
if (childOID != parentOID)
1666+
*has_child = true;
1667+
1668+
appinfo = makeNode(AppendRelInfo);
1669+
appinfo->parent_relid = parentRTindex;
1670+
appinfo->child_relid = childRTindex;
1671+
appinfo->parent_reltype = parentrel->rd_rel->reltype;
1672+
appinfo->child_reltype = childrel->rd_rel->reltype;
1673+
make_inh_translation_list(parentrel, childrel, childRTindex,
1674+
&appinfo->translated_vars);
1675+
appinfo->parent_reloid = parentOID;
1676+
*appinfos = lappend(*appinfos, appinfo);
1677+
1678+
/*
1679+
* Translate the column permissions bitmaps to the child's attnums (we
1680+
* have to build the translated_vars list before we can do this). But
1681+
* if this is the parent table, leave copyObject's result alone.
1682+
*
1683+
* Note: we need to do this even though the executor won't run any
1684+
* permissions checks on the child RTE. The insertedCols/updatedCols
1685+
* bitmaps may be examined for trigger-firing purposes.
1686+
*/
1687+
if (childOID != parentOID)
1688+
{
1689+
childrte->selectedCols = translate_col_privs(parentrte->selectedCols,
1690+
appinfo->translated_vars);
1691+
childrte->insertedCols = translate_col_privs(parentrte->insertedCols,
1692+
appinfo->translated_vars);
1693+
childrte->updatedCols = translate_col_privs(parentrte->updatedCols,
1694+
appinfo->translated_vars);
1695+
}
1696+
}
1697+
else
1698+
*partitioned_child_rels = lappend_int(*partitioned_child_rels,
1699+
childRTindex);
1700+
1701+
/*
1702+
* Build a PlanRowMark if parent is marked FOR UPDATE/SHARE.
1703+
*/
1704+
if (parentrc)
1705+
{
1706+
PlanRowMark *childrc = makeNode(PlanRowMark);
1707+
1708+
childrc->rti = childRTindex;
1709+
childrc->prti = parentRTindex;
1710+
childrc->rowmarkId = parentrc->rowmarkId;
1711+
/* Reselect rowmark type, because relkind might not match parent */
1712+
childrc->markType = select_rowmark_type(childrte, parentrc->strength);
1713+
childrc->allMarkTypes = (1 << childrc->markType);
1714+
childrc->strength = parentrc->strength;
1715+
childrc->waitPolicy = parentrc->waitPolicy;
1716+
1717+
/*
1718+
* We mark RowMarks for partitioned child tables as parent RowMarks so
1719+
* that the executor ignores them (except their existence means that
1720+
* the child tables be locked using appropriate mode).
1721+
*/
1722+
childrc->isParent = (childrte->relkind == RELKIND_PARTITIONED_TABLE);
1723+
1724+
/* Include child's rowmark type in parent's allMarkTypes */
1725+
parentrc->allMarkTypes |= childrc->allMarkTypes;
1726+
1727+
root->rowMarks = lappend(root->rowMarks, childrc);
1728+
}
1729+
}
1730+
16231731
/*
16241732
* make_inh_translation_list
16251733
* Build the list of translations from parent Vars to child Vars for

src/test/regress/expected/insert.out

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -278,12 +278,12 @@ select tableoid::regclass, * from list_parted;
278278
-------------+----+----
279279
part_aa_bb | aA |
280280
part_cc_dd | cC | 1
281-
part_null | | 0
282-
part_null | | 1
283281
part_ee_ff1 | ff | 1
284282
part_ee_ff1 | EE | 1
285283
part_ee_ff2 | ff | 11
286284
part_ee_ff2 | EE | 10
285+
part_null | | 0
286+
part_null | | 1
287287
(8 rows)
288288

289289
-- some more tests to exercise tuple-routing with multi-level partitioning

0 commit comments

Comments
 (0)