Skip to content

Commit e3faddf

Browse files
committed
Fix state reversal after partition tuple routing
We make some changes to ModifyTableState and the EState it uses whenever we route tuples to partitions; but we weren't restoring properly in all cases, possibly causing crashes when partitions with different tuple descriptors are targeted by tuples inserted in the same command. Refactor some code, creating ExecPrepareTupleRouting, to encapsulate the needed state changing logic, and have it invoked one level above its current place (ie. put it in ExecModifyTable instead of ExecInsert); this makes it all more readable. Add a test case to exercise this. We don't support having views as partitions; and since only views can have INSTEAD OF triggers, there is no point in testing for INSTEAD OF when processing insertions into a partitioned table. Remove code that appears to support this (but which is actually never relevant.) In passing, fix location of some very confusing comments in ModifyTableState. Reported-by: Amit Langote Author: Etsuro Fujita, Amit Langote Discussion: https://postgr/es/m/0473bf5c-57b1-f1f7-3d58-455c2230bc5f@lab.ntt.co.jp
1 parent ff30116 commit e3faddf

File tree

5 files changed

+189
-113
lines changed

5 files changed

+189
-113
lines changed

src/backend/commands/copy.c

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2656,13 +2656,12 @@ CopyFrom(CopyState cstate)
26562656
if (cstate->transition_capture != NULL)
26572657
{
26582658
if (resultRelInfo->ri_TrigDesc &&
2659-
(resultRelInfo->ri_TrigDesc->trig_insert_before_row ||
2660-
resultRelInfo->ri_TrigDesc->trig_insert_instead_row))
2659+
resultRelInfo->ri_TrigDesc->trig_insert_before_row)
26612660
{
26622661
/*
2663-
* If there are any BEFORE or INSTEAD triggers on the
2664-
* partition, we'll have to be ready to convert their
2665-
* result back to tuplestore format.
2662+
* If there are any BEFORE triggers on the partition,
2663+
* we'll have to be ready to convert their result back to
2664+
* tuplestore format.
26662665
*/
26672666
cstate->transition_capture->tcs_original_insert_tuple = NULL;
26682667
cstate->transition_capture->tcs_map =
@@ -2803,12 +2802,13 @@ CopyFrom(CopyState cstate)
28032802
* tuples inserted by an INSERT command.
28042803
*/
28052804
processed++;
2805+
}
28062806

2807-
if (saved_resultRelInfo)
2808-
{
2809-
resultRelInfo = saved_resultRelInfo;
2810-
estate->es_result_relation_info = resultRelInfo;
2811-
}
2807+
/* Restore the saved ResultRelInfo */
2808+
if (saved_resultRelInfo)
2809+
{
2810+
resultRelInfo = saved_resultRelInfo;
2811+
estate->es_result_relation_info = resultRelInfo;
28122812
}
28132813
}
28142814

src/backend/executor/nodeModifyTable.c

Lines changed: 117 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,10 @@ static bool ExecOnConflictUpdate(ModifyTableState *mtstate,
6262
EState *estate,
6363
bool canSetTag,
6464
TupleTableSlot **returning);
65+
static TupleTableSlot *ExecPrepareTupleRouting(ModifyTableState *mtstate,
66+
EState *estate,
67+
ResultRelInfo *targetRelInfo,
68+
TupleTableSlot *slot);
6569

6670
/*
6771
* Verify that the tuples to be produced by INSERT or UPDATE match the
@@ -259,7 +263,6 @@ ExecInsert(ModifyTableState *mtstate,
259263
{
260264
HeapTuple tuple;
261265
ResultRelInfo *resultRelInfo;
262-
ResultRelInfo *saved_resultRelInfo = NULL;
263266
Relation resultRelationDesc;
264267
Oid newId;
265268
List *recheckIndexes = NIL;
@@ -275,100 +278,6 @@ ExecInsert(ModifyTableState *mtstate,
275278
* get information on the (current) result relation
276279
*/
277280
resultRelInfo = estate->es_result_relation_info;
278-
279-
/* Determine the partition to heap_insert the tuple into */
280-
if (mtstate->mt_partition_dispatch_info)
281-
{
282-
int leaf_part_index;
283-
TupleConversionMap *map;
284-
285-
/*
286-
* Away we go ... If we end up not finding a partition after all,
287-
* ExecFindPartition() does not return and errors out instead.
288-
* Otherwise, the returned value is to be used as an index into arrays
289-
* mt_partitions[] and mt_partition_tupconv_maps[] that will get us
290-
* the ResultRelInfo and TupleConversionMap for the partition,
291-
* respectively.
292-
*/
293-
leaf_part_index = ExecFindPartition(resultRelInfo,
294-
mtstate->mt_partition_dispatch_info,
295-
slot,
296-
estate);
297-
Assert(leaf_part_index >= 0 &&
298-
leaf_part_index < mtstate->mt_num_partitions);
299-
300-
/*
301-
* Save the old ResultRelInfo and switch to the one corresponding to
302-
* the selected partition.
303-
*/
304-
saved_resultRelInfo = resultRelInfo;
305-
resultRelInfo = mtstate->mt_partitions + leaf_part_index;
306-
307-
/* We do not yet have a way to insert into a foreign partition */
308-
if (resultRelInfo->ri_FdwRoutine)
309-
ereport(ERROR,
310-
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
311-
errmsg("cannot route inserted tuples to a foreign table")));
312-
313-
/* For ExecInsertIndexTuples() to work on the partition's indexes */
314-
estate->es_result_relation_info = resultRelInfo;
315-
316-
/*
317-
* If we're capturing transition tuples, we might need to convert from
318-
* the partition rowtype to parent rowtype.
319-
*/
320-
if (mtstate->mt_transition_capture != NULL)
321-
{
322-
if (resultRelInfo->ri_TrigDesc &&
323-
(resultRelInfo->ri_TrigDesc->trig_insert_before_row ||
324-
resultRelInfo->ri_TrigDesc->trig_insert_instead_row))
325-
{
326-
/*
327-
* If there are any BEFORE or INSTEAD triggers on the
328-
* partition, we'll have to be ready to convert their result
329-
* back to tuplestore format.
330-
*/
331-
mtstate->mt_transition_capture->tcs_original_insert_tuple = NULL;
332-
mtstate->mt_transition_capture->tcs_map =
333-
mtstate->mt_transition_tupconv_maps[leaf_part_index];
334-
}
335-
else
336-
{
337-
/*
338-
* Otherwise, just remember the original unconverted tuple, to
339-
* avoid a needless round trip conversion.
340-
*/
341-
mtstate->mt_transition_capture->tcs_original_insert_tuple = tuple;
342-
mtstate->mt_transition_capture->tcs_map = NULL;
343-
}
344-
}
345-
if (mtstate->mt_oc_transition_capture != NULL)
346-
mtstate->mt_oc_transition_capture->tcs_map =
347-
mtstate->mt_transition_tupconv_maps[leaf_part_index];
348-
349-
/*
350-
* We might need to convert from the parent rowtype to the partition
351-
* rowtype.
352-
*/
353-
map = mtstate->mt_partition_tupconv_maps[leaf_part_index];
354-
if (map)
355-
{
356-
Relation partrel = resultRelInfo->ri_RelationDesc;
357-
358-
tuple = do_convert_tuple(tuple, map);
359-
360-
/*
361-
* We must use the partition's tuple descriptor from this point
362-
* on, until we're finished dealing with the partition. Use the
363-
* dedicated slot for that.
364-
*/
365-
slot = mtstate->mt_partition_tuple_slot;
366-
Assert(slot != NULL);
367-
ExecSetSlotDescriptor(slot, RelationGetDescr(partrel));
368-
ExecStoreTuple(tuple, slot, InvalidBuffer, true);
369-
}
370-
}
371-
372281
resultRelationDesc = resultRelInfo->ri_RelationDesc;
373282

374283
/*
@@ -477,7 +386,7 @@ ExecInsert(ModifyTableState *mtstate,
477386
* No need though if the tuple has been routed, and a BR trigger
478387
* doesn't exist.
479388
*/
480-
if (saved_resultRelInfo != NULL &&
389+
if (resultRelInfo->ri_PartitionRoot != NULL &&
481390
!(resultRelInfo->ri_TrigDesc &&
482391
resultRelInfo->ri_TrigDesc->trig_insert_before_row))
483392
check_partition_constr = false;
@@ -645,9 +554,6 @@ ExecInsert(ModifyTableState *mtstate,
645554
if (resultRelInfo->ri_projectReturning)
646555
result = ExecProcessReturning(resultRelInfo, slot, planSlot);
647556

648-
if (saved_resultRelInfo)
649-
estate->es_result_relation_info = saved_resultRelInfo;
650-
651557
return result;
652558
}
653559

@@ -1545,6 +1451,111 @@ ExecSetupTransitionCaptureState(ModifyTableState *mtstate, EState *estate)
15451451
}
15461452
}
15471453

1454+
/*
1455+
* ExecPrepareTupleRouting --- prepare for routing one tuple
1456+
*
1457+
* Determine the partition in which the tuple in slot is to be inserted,
1458+
* and modify mtstate and estate to prepare for it.
1459+
*
1460+
* Caller must revert the estate changes after executing the insertion!
1461+
* In mtstate, transition capture changes may also need to be reverted.
1462+
*
1463+
* Returns a slot holding the tuple of the partition rowtype.
1464+
*/
1465+
static TupleTableSlot *
1466+
ExecPrepareTupleRouting(ModifyTableState *mtstate,
1467+
EState *estate,
1468+
ResultRelInfo *targetRelInfo,
1469+
TupleTableSlot *slot)
1470+
{
1471+
int partidx;
1472+
ResultRelInfo *partrel;
1473+
HeapTuple tuple;
1474+
TupleConversionMap *map;
1475+
1476+
/*
1477+
* Determine the target partition. If ExecFindPartition does not find
1478+
* a partition after all, it doesn't return here; otherwise, the returned
1479+
* value is to be used as an index into the arrays for the resultRelInfo
1480+
* and TupleConversionMap for the partition.
1481+
*/
1482+
partidx = ExecFindPartition(targetRelInfo,
1483+
mtstate->mt_partition_dispatch_info,
1484+
slot,
1485+
estate);
1486+
Assert(partidx >= 0 && partidx < mtstate->mt_num_partitions);
1487+
1488+
/* Get the ResultRelInfo corresponding to the selected partition. */
1489+
partrel = &mtstate->mt_partitions[partidx];
1490+
1491+
/* We do not yet have a way to insert into a foreign partition */
1492+
if (partrel->ri_FdwRoutine)
1493+
ereport(ERROR,
1494+
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1495+
errmsg("cannot route inserted tuples to a foreign table")));
1496+
1497+
/*
1498+
* Make it look like we are inserting into the partition.
1499+
*/
1500+
estate->es_result_relation_info = partrel;
1501+
1502+
/* Get the heap tuple out of the given slot. */
1503+
tuple = ExecMaterializeSlot(slot);
1504+
1505+
/*
1506+
* If we're capturing transition tuples, we might need to convert from the
1507+
* partition rowtype to parent rowtype.
1508+
*/
1509+
if (mtstate->mt_transition_capture != NULL)
1510+
{
1511+
if (partrel->ri_TrigDesc &&
1512+
partrel->ri_TrigDesc->trig_insert_before_row)
1513+
{
1514+
/*
1515+
* If there are any BEFORE triggers on the partition, we'll have
1516+
* to be ready to convert their result back to tuplestore format.
1517+
*/
1518+
mtstate->mt_transition_capture->tcs_original_insert_tuple = NULL;
1519+
mtstate->mt_transition_capture->tcs_map =
1520+
mtstate->mt_transition_tupconv_maps[partidx];
1521+
}
1522+
else
1523+
{
1524+
/*
1525+
* Otherwise, just remember the original unconverted tuple, to
1526+
* avoid a needless round trip conversion.
1527+
*/
1528+
mtstate->mt_transition_capture->tcs_original_insert_tuple = tuple;
1529+
mtstate->mt_transition_capture->tcs_map = NULL;
1530+
}
1531+
}
1532+
if (mtstate->mt_oc_transition_capture != NULL)
1533+
{
1534+
mtstate->mt_oc_transition_capture->tcs_map =
1535+
mtstate->mt_transition_tupconv_maps[partidx];
1536+
}
1537+
1538+
/*
1539+
* Convert the tuple, if necessary.
1540+
*/
1541+
map = mtstate->mt_partition_tupconv_maps[partidx];
1542+
if (map)
1543+
{
1544+
tuple = do_convert_tuple(tuple, map);
1545+
1546+
/*
1547+
* We must use the partition's tuple descriptor from this point on,
1548+
* until we're finished dealing with the partition. Use the
1549+
* dedicated slot for that.
1550+
*/
1551+
slot = mtstate->mt_partition_tuple_slot;
1552+
ExecSetSlotDescriptor(slot, RelationGetDescr(partrel->ri_RelationDesc));
1553+
ExecStoreTuple(tuple, slot, InvalidBuffer, true);
1554+
}
1555+
1556+
return slot;
1557+
}
1558+
15481559
/* ----------------------------------------------------------------
15491560
* ExecModifyTable
15501561
*
@@ -1763,9 +1774,16 @@ ExecModifyTable(PlanState *pstate)
17631774
switch (operation)
17641775
{
17651776
case CMD_INSERT:
1777+
/* Prepare for tuple routing, if needed. */
1778+
if (node->mt_partition_dispatch_info)
1779+
slot = ExecPrepareTupleRouting(node, estate,
1780+
resultRelInfo, slot);
17661781
slot = ExecInsert(node, slot, planSlot,
17671782
node->mt_arbiterindexes, node->mt_onconflict,
17681783
estate, node->canSetTag);
1784+
/* Revert ExecPrepareTupleRouting's node change. */
1785+
if (node->mt_partition_dispatch_info)
1786+
estate->es_result_relation_info = resultRelInfo;
17691787
break;
17701788
case CMD_UPDATE:
17711789
slot = ExecUpdate(node, tupleid, oldtuple, slot, planSlot,

src/include/nodes/execnodes.h

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -981,15 +981,24 @@ typedef struct ModifyTableState
981981
int mt_num_partitions; /* Number of members in the following
982982
* arrays */
983983
ResultRelInfo *mt_partitions; /* Per partition result relation */
984-
TupleConversionMap **mt_partition_tupconv_maps;
984+
985985
/* Per partition tuple conversion map */
986+
TupleConversionMap **mt_partition_tupconv_maps;
987+
988+
/*
989+
* Used to manipulate any given leaf partition's rowtype after that
990+
* partition is chosen for insertion by tuple-routing.
991+
*/
986992
TupleTableSlot *mt_partition_tuple_slot;
987-
struct TransitionCaptureState *mt_transition_capture;
993+
988994
/* controls transition table population for specified operation */
989-
struct TransitionCaptureState *mt_oc_transition_capture;
995+
struct TransitionCaptureState *mt_transition_capture;
996+
990997
/* controls transition table population for INSERT...ON CONFLICT UPDATE */
991-
TupleConversionMap **mt_transition_tupconv_maps;
998+
struct TransitionCaptureState *mt_oc_transition_capture;
999+
9921000
/* Per plan/partition tuple conversion */
1001+
TupleConversionMap **mt_transition_tupconv_maps;
9931002
} ModifyTableState;
9941003

9951004
/* ----------------

src/test/regress/expected/insert.out

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -554,6 +554,32 @@ drop role regress_coldesc_role;
554554
drop table inserttest3;
555555
drop table brtrigpartcon;
556556
drop function brtrigpartcon1trigf();
557+
-- check that "do nothing" BR triggers work with tuple-routing (this checks
558+
-- that estate->es_result_relation_info is appropriately set/reset for each
559+
-- routed tuple)
560+
create table donothingbrtrig_test (a int, b text) partition by list (a);
561+
create table donothingbrtrig_test1 (b text, a int);
562+
create table donothingbrtrig_test2 (c text, b text, a int);
563+
alter table donothingbrtrig_test2 drop column c;
564+
create or replace function donothingbrtrig_func() returns trigger as $$begin raise notice 'b: %', new.b; return NULL; end$$ language plpgsql;
565+
create trigger donothingbrtrig1 before insert on donothingbrtrig_test1 for each row execute procedure donothingbrtrig_func();
566+
create trigger donothingbrtrig2 before insert on donothingbrtrig_test2 for each row execute procedure donothingbrtrig_func();
567+
alter table donothingbrtrig_test attach partition donothingbrtrig_test1 for values in (1);
568+
alter table donothingbrtrig_test attach partition donothingbrtrig_test2 for values in (2);
569+
insert into donothingbrtrig_test values (1, 'foo'), (2, 'bar');
570+
NOTICE: b: foo
571+
NOTICE: b: bar
572+
copy donothingbrtrig_test from stdout;
573+
NOTICE: b: baz
574+
NOTICE: b: qux
575+
select tableoid::regclass, * from donothingbrtrig_test;
576+
tableoid | a | b
577+
----------+---+---
578+
(0 rows)
579+
580+
-- cleanup
581+
drop table donothingbrtrig_test;
582+
drop function donothingbrtrig_func();
557583
-- check multi-column range partitioning with minvalue/maxvalue constraints
558584
create table mcrparted (a text, b int) partition by range(a, b);
559585
create table mcrparted1_lt_b partition of mcrparted for values from (minvalue, minvalue) to ('b', minvalue);

src/test/regress/sql/insert.sql

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -378,6 +378,29 @@ drop table inserttest3;
378378
drop table brtrigpartcon;
379379
drop function brtrigpartcon1trigf();
380380

381+
-- check that "do nothing" BR triggers work with tuple-routing (this checks
382+
-- that estate->es_result_relation_info is appropriately set/reset for each
383+
-- routed tuple)
384+
create table donothingbrtrig_test (a int, b text) partition by list (a);
385+
create table donothingbrtrig_test1 (b text, a int);
386+
create table donothingbrtrig_test2 (c text, b text, a int);
387+
alter table donothingbrtrig_test2 drop column c;
388+
create or replace function donothingbrtrig_func() returns trigger as $$begin raise notice 'b: %', new.b; return NULL; end$$ language plpgsql;
389+
create trigger donothingbrtrig1 before insert on donothingbrtrig_test1 for each row execute procedure donothingbrtrig_func();
390+
create trigger donothingbrtrig2 before insert on donothingbrtrig_test2 for each row execute procedure donothingbrtrig_func();
391+
alter table donothingbrtrig_test attach partition donothingbrtrig_test1 for values in (1);
392+
alter table donothingbrtrig_test attach partition donothingbrtrig_test2 for values in (2);
393+
insert into donothingbrtrig_test values (1, 'foo'), (2, 'bar');
394+
copy donothingbrtrig_test from stdout;
395+
1 baz
396+
2 qux
397+
\.
398+
select tableoid::regclass, * from donothingbrtrig_test;
399+
400+
-- cleanup
401+
drop table donothingbrtrig_test;
402+
drop function donothingbrtrig_func();
403+
381404
-- check multi-column range partitioning with minvalue/maxvalue constraints
382405
create table mcrparted (a text, b int) partition by range(a, b);
383406
create table mcrparted1_lt_b partition of mcrparted for values from (minvalue, minvalue) to ('b', minvalue);

0 commit comments

Comments
 (0)