Skip to content

Commit 677a1dc

Browse files
committed
Fix publish_as_relid with multiple publications
Commit 83fd453 allowed publishing of changes via ancestors, for publications defined with publish_via_partition_root. But the way the ancestor was determined in get_rel_sync_entry() was incorrect, simply updating the same variable. So with multiple publications, replicating different ancestors, the outcome depended on the order of publications in the list - the value from the last loop was used, even if it wasn't the top-most ancestor. This is a probably rare situation, as in most cases publications do not overlap, so each partition has exactly one candidate ancestor to replicate as and there's no ambiguity. Fixed by tracking the "ancestor level" for each publication, and picking the top-most ancestor. Adds a test case, verifying the correct ancestor is used for publishing the changes and that this does not depend on order of publications in the list. Older releases have another bug in this loop - once all actions are replicated, the loop is terminated, on the assumption that inspecting additional publications is unecessary. But that misses the fact that those additional applications may replicate different ancestors. Fixed by removal of this break condition. We might still terminate the loop in some cases (e.g. when replicating all actions and the ancestor is the partition root). Backpatch to 13, where publish_via_partition_root was introduced. Initial report and fix by me, test added by Hou zj. Reviews and improvements by Amit Kapila. Author: Tomas Vondra, Hou zj, Amit Kapila Reviewed-by: Amit Kapila, Hou zj Discussion: https://postgr.es/m/d26d24dd-2fab-3c48-0162-2b7f84a9c893%40enterprisedb.com
1 parent 7d30f59 commit 677a1dc

File tree

2 files changed

+95
-8
lines changed

2 files changed

+95
-8
lines changed

src/backend/replication/pgoutput/pgoutput.c

Lines changed: 40 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1039,6 +1039,7 @@ get_rel_sync_entry(PGOutputData *data, Oid relid)
10391039
List *pubids = GetRelationPublications(relid);
10401040
ListCell *lc;
10411041
Oid publish_as_relid = relid;
1042+
int publish_ancestor_level = 0;
10421043
bool am_partition = get_rel_relispartition(relid);
10431044
char relkind = get_rel_relkind(relid);
10441045

@@ -1064,11 +1065,28 @@ get_rel_sync_entry(PGOutputData *data, Oid relid)
10641065
Publication *pub = lfirst(lc);
10651066
bool publish = false;
10661067

1068+
/*
1069+
* Under what relid should we publish changes in this publication?
1070+
* We'll use the top-most relid across all publications. Also track
1071+
* the ancestor level for this publication.
1072+
*/
1073+
Oid pub_relid = relid;
1074+
int ancestor_level = 0;
1075+
1076+
/*
1077+
* If this is a FOR ALL TABLES publication, pick the partition root
1078+
* and set the ancestor level accordingly.
1079+
*/
10671080
if (pub->alltables)
10681081
{
10691082
publish = true;
10701083
if (pub->pubviaroot && am_partition)
1071-
publish_as_relid = llast_oid(get_partition_ancestors(relid));
1084+
{
1085+
List *ancestors = get_partition_ancestors(relid);
1086+
1087+
pub_relid = llast_oid(ancestors);
1088+
ancestor_level = list_length(ancestors);
1089+
}
10721090
}
10731091

10741092
if (!publish)
@@ -1085,6 +1103,7 @@ get_rel_sync_entry(PGOutputData *data, Oid relid)
10851103
{
10861104
List *ancestors = get_partition_ancestors(relid);
10871105
ListCell *lc2;
1106+
int level = 0;
10881107

10891108
/*
10901109
* Find the "topmost" ancestor that is in this
@@ -1094,12 +1113,17 @@ get_rel_sync_entry(PGOutputData *data, Oid relid)
10941113
{
10951114
Oid ancestor = lfirst_oid(lc2);
10961115

1116+
level++;
1117+
10971118
if (list_member_oid(GetRelationPublications(ancestor),
10981119
pub->oid))
10991120
{
11001121
ancestor_published = true;
11011122
if (pub->pubviaroot)
1102-
publish_as_relid = ancestor;
1123+
{
1124+
pub_relid = ancestor;
1125+
ancestor_level = level;
1126+
}
11031127
}
11041128
}
11051129
}
@@ -1120,11 +1144,21 @@ get_rel_sync_entry(PGOutputData *data, Oid relid)
11201144
entry->pubactions.pubupdate |= pub->pubactions.pubupdate;
11211145
entry->pubactions.pubdelete |= pub->pubactions.pubdelete;
11221146
entry->pubactions.pubtruncate |= pub->pubactions.pubtruncate;
1123-
}
11241147

1125-
if (entry->pubactions.pubinsert && entry->pubactions.pubupdate &&
1126-
entry->pubactions.pubdelete && entry->pubactions.pubtruncate)
1127-
break;
1148+
/*
1149+
* We want to publish the changes as the top-most ancestor
1150+
* across all publications. So we need to check if the
1151+
* already calculated level is higher than the new one. If
1152+
* yes, we can ignore the new value (as it's a child).
1153+
* Otherwise the new value is an ancestor, so we keep it.
1154+
*/
1155+
if (publish_ancestor_level > ancestor_level)
1156+
continue;
1157+
1158+
/* The new value is an ancestor, so let's keep it. */
1159+
publish_as_relid = pub_relid;
1160+
publish_ancestor_level = ancestor_level;
1161+
}
11281162
}
11291163

11301164
list_free(pubids);

src/test/subscription/t/013_partition.pl

Lines changed: 55 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
use warnings;
77
use PostgresNode;
88
use TestLib;
9-
use Test::More tests => 63;
9+
use Test::More tests => 67;
1010

1111
# setup
1212

@@ -409,6 +409,12 @@ BEGIN
409409
"CREATE TABLE tab3 (a int PRIMARY KEY, b text) PARTITION BY LIST (a)");
410410
$node_publisher->safe_psql('postgres',
411411
"CREATE TABLE tab3_1 PARTITION OF tab3 FOR VALUES IN (0, 1, 2, 3, 5, 6)");
412+
$node_publisher->safe_psql('postgres',
413+
"CREATE TABLE tab4 (a int PRIMARY KEY) PARTITION BY LIST (a)");
414+
$node_publisher->safe_psql('postgres',
415+
"CREATE TABLE tab4_1 PARTITION OF tab4 FOR VALUES IN (0, 1) PARTITION BY LIST (a)");
416+
$node_publisher->safe_psql('postgres',
417+
"CREATE TABLE tab4_1_1 PARTITION OF tab4_1 FOR VALUES IN (0, 1)");
412418
$node_publisher->safe_psql('postgres',
413419
"ALTER PUBLICATION pub_all SET (publish_via_partition_root = true)");
414420
# Note: tab3_1's parent is not in the publication, in which case its
@@ -418,6 +424,9 @@ BEGIN
418424
$node_publisher->safe_psql('postgres',
419425
"CREATE PUBLICATION pub_viaroot FOR TABLE tab2, tab2_1, tab3_1 WITH (publish_via_partition_root = true)"
420426
);
427+
$node_publisher->safe_psql('postgres',
428+
"CREATE PUBLICATION pub_lower_level FOR TABLE tab4_1 WITH (publish_via_partition_root = true)"
429+
);
421430

422431
# prepare data for the initial sync
423432
$node_publisher->safe_psql('postgres', "INSERT INTO tab2 VALUES (1)");
@@ -462,10 +471,16 @@ BEGIN
462471
$node_subscriber2->safe_psql('postgres',
463472
"CREATE TABLE tab3_1 (a int PRIMARY KEY, c text DEFAULT 'sub2_tab3_1', b text)"
464473
);
474+
$node_subscriber2->safe_psql('postgres',
475+
"CREATE TABLE tab4 (a int PRIMARY KEY)"
476+
);
477+
$node_subscriber2->safe_psql('postgres',
478+
"CREATE TABLE tab4_1 (a int PRIMARY KEY)"
479+
);
465480
# Publication that sub2 points to now publishes via root, so must update
466481
# subscription target relations.
467482
$node_subscriber2->safe_psql('postgres',
468-
"ALTER SUBSCRIPTION sub2 REFRESH PUBLICATION");
483+
"ALTER SUBSCRIPTION sub2 SET PUBLICATION pub_lower_level, pub_all");
469484

470485
# Wait for initial sync of all subscriptions
471486
$node_subscriber1->poll_query_until('postgres', $synced_query)
@@ -486,6 +501,8 @@ BEGIN
486501
"INSERT INTO tab2 VALUES (0), (3), (5)");
487502
$node_publisher->safe_psql('postgres',
488503
"INSERT INTO tab3 VALUES (1), (0), (3), (5)");
504+
$node_publisher->safe_psql('postgres',
505+
"INSERT INTO tab4 VALUES (0)");
489506

490507
$node_publisher->wait_for_catchup('sub_viaroot');
491508
$node_publisher->wait_for_catchup('sub2');
@@ -525,6 +542,42 @@ BEGIN
525542
sub2_tab3|3
526543
sub2_tab3|5), 'inserts into tab3 replicated');
527544

545+
$result = $node_subscriber2->safe_psql('postgres',
546+
"SELECT a FROM tab4 ORDER BY 1");
547+
is( $result, qq(0), 'inserts into tab4 replicated');
548+
549+
$result = $node_subscriber2->safe_psql('postgres',
550+
"SELECT a FROM tab4_1 ORDER BY 1");
551+
is( $result, qq(), 'inserts into tab4_1 replicated');
552+
553+
# now switch the order of publications in the list, try again, the result
554+
# should be the same (no dependence on order of pulications)
555+
$node_subscriber2->safe_psql('postgres',
556+
"ALTER SUBSCRIPTION sub2 SET PUBLICATION pub_all, pub_lower_level");
557+
558+
# make sure the subscription on the second subscriber is synced, before
559+
# continuing
560+
$node_subscriber2->poll_query_until('postgres', $synced_query)
561+
or die "Timed out while waiting for subscriber to synchronize data";
562+
563+
# Insert a change into the leaf partition, should be replicated through
564+
# the partition root (thanks to the FOR ALL TABLES partition).
565+
$node_publisher->safe_psql('postgres',
566+
"INSERT INTO tab4 VALUES (1)");
567+
568+
$node_publisher->wait_for_catchup('sub2');
569+
570+
# tab4 change should be replicated through the root partition, which
571+
# maps to the tab4 relation on subscriber.
572+
$result = $node_subscriber2->safe_psql('postgres',
573+
"SELECT a FROM tab4 ORDER BY 1");
574+
is( $result, qq(0
575+
1), 'inserts into tab4 replicated');
576+
577+
$result = $node_subscriber2->safe_psql('postgres',
578+
"SELECT a FROM tab4_1 ORDER BY 1");
579+
is( $result, qq(), 'inserts into tab4_1 replicated');
580+
528581
# update (replicated as update)
529582
$node_publisher->safe_psql('postgres', "UPDATE tab1 SET a = 6 WHERE a = 5");
530583
$node_publisher->safe_psql('postgres', "UPDATE tab2 SET a = 6 WHERE a = 5");

0 commit comments

Comments
 (0)