Skip to content

Commit 0fcd073

Browse files
committed
Merge branch 'PGPROEE9_6_MULTIMASTER' of https://gitlab.postgrespro.ru/pgpro-dev/postgrespro into PGPROEE9_6_MULTIMASTER
2 parents 13426e8 + 75ce47b commit 0fcd073

File tree

5 files changed

+35
-68
lines changed

5 files changed

+35
-68
lines changed

contrib/mmts/Cluster.pm

Lines changed: 0 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ package Cluster;
33
use strict;
44
use warnings;
55

6-
use Proc::ProcessTable;
76
use PostgresNode;
87
use TestLib;
98
use Test::More;
@@ -166,45 +165,6 @@ sub stopid
166165
return stopnode($self->{nodes}->[$idx]);
167166
}
168167

169-
sub killtree
170-
{
171-
my $root = shift;
172-
diag("killtree $root\n");
173-
174-
my $t = new Proc::ProcessTable;
175-
176-
my %parent = ();
177-
#my %cmd = ();
178-
foreach my $p (@{$t->table}) {
179-
$parent{$p->pid} = $p->ppid;
180-
# $cmd{$p->pid} = $p->cmndline;
181-
}
182-
183-
if (!defined $root) {
184-
return;
185-
}
186-
my @queue = ($root);
187-
my @killist = ();
188-
189-
while (scalar @queue) {
190-
my $victim = shift @queue;
191-
while (my ($pid, $ppid) = each %parent) {
192-
if ($ppid == $victim) {
193-
push @queue, $pid;
194-
}
195-
}
196-
diag("SIGSTOP to $victim");
197-
kill 'STOP', $victim;
198-
unshift @killist, $victim;
199-
}
200-
201-
diag("SIGKILL to " . join(' ', @killist));
202-
kill 'KILL', @killist;
203-
#foreach my $victim (@killist) {
204-
# print("kill $victim " . $cmd{$victim} . "\n");
205-
#}
206-
}
207-
208168
sub stop
209169
{
210170
my ($self, $mode) = @_;

contrib/mmts/Makefile

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ DATA = multimaster--1.0.sql
66

77
.PHONY: all
88

9+
EXTRA_INSTALL=contrib/mmts
10+
911
all: multimaster.so
1012

1113
PG_CPPFLAGS = -I$(libpq_srcdir)
@@ -21,3 +23,7 @@ top_builddir = ../..
2123
include $(top_builddir)/src/Makefile.global
2224
include $(top_srcdir)/contrib/contrib-global.mk
2325
endif
26+
27+
check: temp-install
28+
$(prove_check)
29+

contrib/mmts/arbiter.c

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -336,7 +336,7 @@ static void MtmCheckResponse(MtmArbiterMessage* resp)
336336
&& Mtm->status != MTM_RECOVERED
337337
&& Mtm->nodes[MtmNodeId-1].lastStatusChangeTime + MSEC_TO_USEC(MtmNodeDisableDelay) < MtmGetSystemTime())
338338
{
339-
MTM_ELOG(WARNING, "Node %d thinks that I am dead, while I am %s (message %s)", resp->node, MtmNodeStatusMnem[Mtm->status], MtmMessageKindMnem[resp->code]);
339+
MTM_ELOG(WARNING, "Node %d thinks that I'm dead, while I'm %s (message %s)", resp->node, MtmNodeStatusMnem[Mtm->status], MtmMessageKindMnem[resp->code]);
340340
BIT_SET(Mtm->disabledNodeMask, MtmNodeId-1);
341341
Mtm->nConfigChanges += 1;
342342
MtmSwitchClusterMode(MTM_RECOVERY);
@@ -411,8 +411,8 @@ static void MtmSendHeartbeat()
411411

412412
}
413413

414-
/* This function shoudl be called from all places where sender can be blocked.
415-
* It checks send_heartbeat flag set by timer and if it is set hthen sends heartbeats to all alive nodes
414+
/* This function should be called from all places where sender can be blocked.
415+
* It checks send_heartbeat flag set by timer and if it is set then sends heartbeats to all alive nodes
416416
*/
417417
void MtmCheckHeartbeat()
418418
{
@@ -577,8 +577,8 @@ static bool MtmSendToNode(int node, void const* buf, int size, time_t reconnectT
577577
BIT_SET(busy_mask, node);
578578
while (true) {
579579
#if 0
580-
/* Original intention was to reestablish connectect when reconnet mask is set to avoid hanged-up connection.
581-
* But reconnectMask is set not only when connection is broken, so breaking connection in all this cases cause avalunch of connection failures.
580+
/* Original intention was to reestablish connection when reconnect mask is set to avoid hanged-up connection.
581+
* But reconnectMask is set not only when connection is broken, so breaking connection in all this cases cause avalanche of connection failures.
582582
*/
583583
if (sockets[node] >= 0 && BIT_CHECK(Mtm->reconnectMask, node)) {
584584
MTM_ELOG(WARNING, "Arbiter is forced to reconnect to node %d", node+1);
@@ -978,7 +978,7 @@ static void MtmReceiver(Datum arg)
978978
Assert(*msg->gid);
979979
tm = (MtmTransMap*)hash_search(MtmGid2State, msg->gid, HASH_FIND, NULL);
980980
if (tm == NULL || tm->state == NULL) {
981-
MTM_ELOG(WARNING, "Response for unexisted transaction %s from node %d", msg->gid, node);
981+
MTM_ELOG(WARNING, "Response for non-existing transaction %s from node %d", msg->gid, node);
982982
} else {
983983
ts = tm->state;
984984
BIT_SET(ts->votedMask, node-1);
@@ -1031,7 +1031,7 @@ static void MtmReceiver(Datum arg)
10311031
}
10321032
ts = (MtmTransState*)hash_search(MtmXid2State, &msg->dxid, HASH_FIND, NULL);
10331033
if (ts == NULL) {
1034-
MTM_ELOG(WARNING, "Ignore response for unexisted transaction %llu from node %d", (long64)msg->dxid, node);
1034+
MTM_ELOG(WARNING, "Ignore response for non-existing transaction %llu from node %d", (long64)msg->dxid, node);
10351035
continue;
10361036
}
10371037
Assert(msg->code == MSG_ABORTED || strcmp(msg->gid, ts->gid) == 0);
@@ -1130,7 +1130,7 @@ static void MtmReceiver(Datum arg)
11301130
} else {
11311131
Assert(ts->status == TRANSACTION_STATUS_ABORTED);
11321132
MTM_ELOG(WARNING, "Receive PRECOMMITTED response for aborted transaction %s (%llu) from node %d",
1133-
ts->gid, (long64)ts->xid, node); // How it can happen? SHould we use assert here?
1133+
ts->gid, (long64)ts->xid, node); // How it can happen? Should we use assert here?
11341134
if ((ts->participantsMask & ~Mtm->disabledNodeMask & ~ts->votedMask) == 0) {
11351135
MtmWakeUpBackend(ts);
11361136
}
@@ -1169,7 +1169,7 @@ static void MtmReceiver(Datum arg)
11691169
}
11701170
if (Mtm->status == MTM_ONLINE) {
11711171
now = MtmGetSystemTime();
1172-
/* Check for heartbeats only in case of timeout expiration: it means that we do not have unproceeded events.
1172+
/* Check for heartbeats only in case of timeout expiration: it means that we do not have non-processed events.
11731173
* It helps to avoid false node failure detection because of blocking receiver.
11741174
*/
11751175
if (n == 0) {

contrib/mmts/doc/configuration.md

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,29 @@
1-
# `Configuration parameters`
1+
# `GUC Variables`
22

3-
```multimaster.node_id``` Multimaster node ID, unique number identifying this node. Nodes should be numbered by natural numbers starting from 1 without gaps (e.g. 1, 2, 3, ...). node_id is also used as an offset in ```multimaster.conn_strings```, thus i-th node's connection string expected to be on i-th position in ```multimaster.conn_strings```. Mandatory.
3+
```multimaster.node_id``` Node ID - a unique natural number identifying the node of a multi-master cluster. You must start node numbering from 1 and cannot have any gaps in numbering. For example, for a cluster of five nodes, set node IDs to 1, 2, 3, 4, and 5.
44

5-
```multimaster.conn_strings``` Multimaster node connection strings separated by commas, i.e. 'dbname=mydb host=node1, dbname=mydb host=node2, dbname=mydb host=node3'. Order here is important and should be consistent with ```multimaster.node_id```. Multimaster allows to specify custom arbiter_port value for all connection strings. Also this parameter is expected to be identical on all nodes. Mandatory.
5+
```multimaster.conn_strings``` Connection strings for each node of a multi-master cluster, separated by commas. Each connection string must include the name of the database to replicate and the cluster node domain name. For example, 'dbname=mydb host=node1, dbname=mydb host=node2, dbname=mydb host=node3'. Connection strings must appear in the order of the node IDs specified in the ```multimaster.node_id``` variable. Connection string for the i-th node must be on the i-th position. This parameter must be identical on all nodes. You can specify a custom port for all connection strings using the `multimaster.arbiter_port` variable.
66

7-
```multimaster.arbiter_port``` Port for arbiter process to listen on. Default to 5433.
7+
```multimaster.arbiter_port``` Port for the arbiter process to listen on.
8+
Default: 5433
89

9-
```multimaster.heartbeat_send_timeout``` Period of broadcasting heartbeat messages by arbiter to all nodes. In milliseconds. Default to 1000.
10+
```multimaster.heartbeat_send_timeout``` Time interval between heartbeat messages, in milliseconds. An arbiter process broadcasts heartbeat messages to all nodes to detect connection problems. Default: 1000.
1011

11-
```multimaster.heartbeat_recv_timeout``` If no heartbeat message is received from node within this period, it assumed to be dead. In milliseconds. Default to 10000.
12+
```multimaster.heartbeat_recv_timeout``` Timeout, in milliseconds. If no heartbeat message is received from the node within this timeframe, the node is excluded from the cluster.
13+
Default: 10000
1214

13-
```multimaster.min_recovery_lag``` Minimal lag of WAL-sender performing recovery after which cluster is locked until recovery is completed. When wal-sender almost catch-up WAL current position we need to stop 'Achilles tortile competition' and temporary stop commit of new transactions until node will be completely repared. In bytes. Default to 100000.
1415

15-
```multimaster.max_recovery_lag``` Maximal lag of replication slot of failed node after which this slot is dropped to avoid transaction log overflow. Dropping slot makes it not possible to recover node using logical replication mechanism, it will be necessary to completely copy content of some alive node using pg_basebackup or similar tool. Zero value of parameter disable slot dropping. In bytes. Default to 100000000.
16+
```multimaster.min_recovery_lag``` Minimal WAL lag between the current cluster state and the node to be restored, in bytes. When this threshold is reached during node recovery, the cluster is locked for write transactions until the recovery is complete.
17+
Default: 100000
18+
19+
```multimaster.max_recovery_lag``` Maximal WAL lag size, in bytes. When a node is disconnected from the cluster, other nodes copy WALs for all new trasactions into the replication slot of this node. Upon reaching the `multimaster.max_recovery_lag` value, the replication slot for the disconnected node is deleted to avoid overflow. At this point, automatic recovery of the node is no longer possible. In this case, you can restore the node manually by cloning the data from one of the alive nodes using `pg_basebackup` or a similar tool. If you set this variable to zero, replication slot will not be deleted.
20+
Default: 10000000
21+
22+
```multimaster.ignore_tables_without_pk``` Boolean. This variable enables/disables replication of tables without primary keys. By default, replication of tables without primary keys is disabled because of the logical replication restrictions. To enable replication, you can set this variable to false. However, take into account that `multimaster` does not allow update operations on such tables. Default: true
23+
24+
```multimaster.cluster_name``` Name of the cluster. If you set this variable, `multimaster` checks that the cluster name is the same for all the cluster nodes.
1625

17-
```multimaster.ignore_tables_without_pk``` Do not replicate tables withpout primary key. Boolean.
1826

19-
```multimaster.cluster_name``` Name of the cluster, desn't affect anything. Just in case. If set that mmts will check name correspondence.
2027

2128
## Questionable
2229

@@ -38,8 +45,6 @@
3845

3946
```multimaster.gc_period``` Number of distributed transactions after which garbage collection is started. Multimaster is building xid->csn hash map which has to be cleaned to avoid hash overflow. This parameter specifies interval of invoking garbage collector for this map. default = MTM_HASH_SIZE/10
4047

41-
```multimaster.max_node``` Maximal number of cluster nodes. This parameters allows to add new nodes to the cluster, default value 0 restricts number of nodes to one specified in multimaster.conn_strings (May be just set that to 64 and allow user to add node when trey need without restart?) default = 0
42-
4348
```multimaster.node_disable_delay``` Minimal amount of time (msec) between node status change. This delay is used to avoid false detection of node failure and to prevent blinking of node status node. default = 2000. (We can just increase heartbeat_recv_timeout)
4449

4550
```multimaster.connect_timeout``` Multimaster nodes connect timeout. Interval in milliseconds for establishing connection with cluster node. default = 10000, /* 10 seconds */

contrib/mmts/multimaster.c

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -456,13 +456,9 @@ csn_t MtmDistributedTransactionSnapshot(TransactionId xid, int nodeId, nodemask_
456456

457457
Snapshot MtmGetSnapshot(Snapshot snapshot)
458458
{
459-
snapshot = PgGetSnapshotData(snapshot);
460-
#if 0
461-
if (snapshot != &CatalogSnapshotData) {
462-
RecentGlobalDataXmin = RecentGlobalXmin = Mtm->oldestXid;
463-
}
464-
#endif
465-
return snapshot;
459+
snapshot = PgGetSnapshotData(snapshot);
460+
RecentGlobalDataXmin = RecentGlobalXmin = Mtm->oldestXid;
461+
return snapshot;
466462
}
467463

468464

0 commit comments

Comments
 (0)