@@ -205,7 +205,7 @@ multimaster.conn_strings = 'dbname=mydb user=myuser host=node1, dbname=mydb user
205
205
</listitem>
206
206
<listitem>
207
207
<para>
208
- Allow replication in <literal >pg_hba.conf</literal >:
208
+ Allow replication in <filename >pg_hba.conf</filename >:
209
209
</para>
210
210
<programlisting>
211
211
host myuser all node1 trust
@@ -517,7 +517,7 @@ pg_ctl -D ./datadir -l ./pg.log start
517
517
</listitem>
518
518
<listitem>
519
519
<para>
520
- Make sure the <literal >pg_hba.conf</literal > file allows
520
+ Make sure the <filename >pg_hba.conf</filename > file allows
521
521
replication to the new node.
522
522
<programlisting>host replication all node3 trust</programlisting>
523
523
</para>
@@ -866,78 +866,131 @@ pg_ctl -D ./datadir -l ./pg.log start
866
866
<itemizedlist>
867
867
<listitem>
868
868
<para>
869
- <literal>id</literal>, <type>integer</type>
869
+ <parameter>id</parameter>, <type>integer</type>
870
+ </para>
871
+ <para>Node ID.
872
+ </para>
873
+ </listitem>
874
+ <listitem>
875
+ <para>
876
+ <parameter>enabled</parameter>, <type>boolean</type>
877
+ </para>
878
+ <para>Shows whether the node is excluded from the cluster. The node can only be disabled if responses to heartbeats are not received within the <varname>heartbeat_recv_timeout</> time interval. When the node starts responding to heartbeats, <filename>multimaster</filename> can automatically restore the node and switch it back to the enabled state.
879
+ Automatic recovery is only possible if the replication slot is still active. Otherwise, you can <link linkend="multimaster-restoring-a-node-manually">restore the node manually</link>.</para>
880
+ </listitem>
881
+ <listitem>
882
+ <para>
883
+ <parameter>connected</parameter>, <type>boolean</type>
884
+ </para>
885
+ <para>
886
+ Shows whether the node is connected to the WAL sender.
870
887
</para>
871
888
</listitem>
872
889
<listitem>
873
890
<para>
874
- <literal>disabled</literal>, <type>boolean</type>
891
+ <parameter>slot_active</parameter>, <type>boolean</type>
892
+ </para>
893
+ <para>Shows whether the node has an active replication slot. For a disabled node, the slot remains active until the <varname>max_recovery_lag</varname> value is reached.
875
894
</para>
876
895
</listitem>
877
896
<listitem>
878
897
<para>
879
- <literal>disconnected</literal>, <type>boolean</type>
898
+ <parameter>stopped</parameter>, <type>boolean</type>
899
+ </para>
900
+ <para>Shows whether replication to this node was stopped by the <function>mtm.stop_node()</function> function. A stopped node acts as a disabled one, but cannot be automatically recovered. Call <function>mtm.recover_node()</function> to re-enable such a node.
880
901
</para>
881
902
</listitem>
882
903
<listitem>
883
904
<para>
884
- <literal>catchUp</literal>, <type>bool</type>
905
+ <parameter>catchUp</parameter>, <type>boolean</type>
906
+ </para>
907
+ <para>During the node recovery, shows whether the data is recovered up to the <varname>min_recovery_lag</varname> value.
885
908
</para>
886
909
</listitem>
887
910
<listitem>
888
911
<para>
889
- <literal>slotLag</literal>, <type>bigint</type>
912
+ <parameter>slotLag</parameter>, <type>bigint</type>
913
+ </para>
914
+ <para>The size of WAL data that the replication slot holds for a disabled/stopped node. The slot is dropped when <literal>slotLag</literal> reaches the <literal>max_recovery_lag</literal> value.
890
915
</para>
891
916
</listitem>
892
917
<listitem>
893
918
<para>
894
- <literal>avgTransDelay</literal>, <type>bigint</type>
919
+ <parameter>avgTransDelay</parameter>, <type>bigint</type>
920
+ </para>
921
+ <para>An average commit delay caused by this node, in microseconds.
895
922
</para>
896
923
</listitem>
897
924
<listitem>
898
925
<para>
899
- <literal >lastStatusChange</literal >, <type>timestamp</type>
926
+ <parameter >lastStatusChange</parameter >, <type>timestamp</type>
900
927
</para>
928
+ <para>Last time when the node changed its status (enabled/disabled).</para>
901
929
</listitem>
902
930
<listitem>
903
931
<para>
904
- <literal >oldestSnapshot</literal >, <type>bigint</type>
932
+ <parameter >oldestSnapshot</parameter >, <type>bigint</type>
905
933
</para>
934
+ <para>The oldest global snapshot existing on this node.</para>
906
935
</listitem>
907
936
<listitem>
908
937
<para>
909
- <literal >SenderPid</literal> <type>integer</type>
938
+ <parameter >SenderPid</parameter>, <type>integer</type>
910
939
</para>
940
+ <para>Process ID of the WAL sender.</para>
911
941
</listitem>
912
942
<listitem>
913
943
<para>
914
- <literal >SenderStartTime</literal> <type>timestamp</type>
944
+ <parameter >SenderStartTime</parameter>, <type>timestamp</type>
915
945
</para>
946
+ <para>WAL sender start time.</para>
916
947
</listitem>
917
948
<listitem>
918
949
<para>
919
- <literal >ReceiverPid</literal> <type>integer</type>
950
+ <parameter >ReceiverPid</parameter>, <type>integer</type>
920
951
</para>
952
+ <para>Process ID of the WAL receiver.</para>
921
953
</listitem>
922
954
<listitem>
923
955
<para>
924
- <literal >ReceiverStartTime</literal> <type>timestamp</type>
956
+ <parameter >ReceiverStartTime</parameter>, <type>timestamp</type>
925
957
</para>
958
+ <para>WAL receiver start time.</para>
926
959
</listitem>
927
960
<listitem>
928
961
<para>
929
- <literal >connStr</literal> <type>text</type>
962
+ <parameter >connStr</parameter>, <type>text</type>
930
963
</para>
964
+ <para>Connection string to this node.</para>
931
965
</listitem>
932
966
<listitem>
933
967
<para>
934
- <literal >connectivityMask</literal> <type>bigint</type>
968
+ <parameter >connectivityMask</parameter>, <type>bigint</type>
935
969
</para>
970
+ <para>Bitmask representing connectivity to neighbor nodes. Each bit represents a connection to node.</para>
971
+ </listitem>
972
+ <listitem>
973
+ <para><parameter>nHeartbeats</parameter>, <type>integer</type></para>
974
+ <para>The number of heartbeat responses received from this node.</para>
936
975
</listitem>
937
976
</itemizedlist>
938
977
</para>
939
978
</listitem>
940
979
</varlistentry>
980
+
981
+ <varlistentry>
982
+ <term>
983
+ <function>mtm.collect_cluster_state()</function>
984
+ <indexterm>
985
+ <primary><function>mtm.collect_cluster_state</></primary>
986
+ </indexterm>
987
+ </term>
988
+ <listitem>
989
+ <para>Collects the data returned by the <function>mtm.get_cluster_state()</function> function from all available nodes. For this function to work, in addition to replication connections, <filename>pg_hba.conf</filename> must allow ordinary connections to the node with the specified connection string.
990
+ </para>
991
+ </listitem>
992
+ </varlistentry>
993
+
941
994
<varlistentry>
942
995
<term>
943
996
<function>mtm.get_cluster_state()</function>
@@ -946,87 +999,124 @@ pg_ctl -D ./datadir -l ./pg.log start
946
999
</indexterm>
947
1000
</term>
948
1001
<listitem>
949
- <para>Shows the status of the whole cluster . Returns a tuple of the following values:
1002
+ <para>Shows the status of the <filename>multimaster</filename> extension . Returns a tuple of the following values:
950
1003
</para>
951
1004
<itemizedlist>
952
1005
<listitem>
953
1006
<para>
954
- <literal>status</literal>, <type>text</type>
1007
+ <parameter>status</parameter>, <type>text</type>
1008
+ </para>
1009
+ <para>Node status. Possible values are: <literal>Initialization</literal>, <literal>Offline</literal>, <literal>Connected</literal>, <literal>Online</literal>, <literal>Recovery</literal>, <literal>Recovered</literal>, <literal>InMinor</literal>, <literal>OutOfService</literal>.</para>
1010
+ </listitem>
1011
+ <listitem>
1012
+ <para>
1013
+ <parameter>disabledNodeMask</parameter>, <type>bigint</type>
955
1014
</para>
1015
+ <para>Bitmask of disabled nodes.</para>
956
1016
</listitem>
957
1017
<listitem>
958
1018
<para>
959
- <literal>disabledNodeMask</literal >, <type>bigint</type>
1019
+ <parameter>disconnectedNodeMask</parameter >, <type>bigint</type>
960
1020
</para>
1021
+ <para>Bitmask of disconnected nodes.</para>
961
1022
</listitem>
962
1023
<listitem>
963
1024
<para>
964
- <literal>disconnectedNodeMask</literal >, <type>bigint</type>
1025
+ <parameter>catchUpNodeMask</parameter >, <type>bigint</type>
965
1026
</para>
1027
+ <para>Bitmask of nodes that completed the recovery.</para>
966
1028
</listitem>
967
1029
<listitem>
968
1030
<para>
969
- <literal>catchUpNodeMask</literal >, <type>bigint </type>
1031
+ <parameter>liveNodes</parameter >, <type>integer </type>
970
1032
</para>
1033
+ <para>Number of enabled nodes.</para>
971
1034
</listitem>
972
1035
<listitem>
973
1036
<para>
974
- <literal>liveNodes</literal >, <type>integer</type>
1037
+ <parameter>allNodes</parameter >, <type>integer</type>
975
1038
</para>
1039
+ <para>Number of nodes in the cluster. The majority of alive nodes is calculated based on this parameter.</para>
976
1040
</listitem>
977
1041
<listitem>
978
1042
<para>
979
- <literal>allNodes</literal >, <type>integer</type>
1043
+ <parameter>nActiveQueries</parameter >, <type>integer</type>
980
1044
</para>
1045
+ <para>Number of queries being currently processed on this node.</para>
981
1046
</listitem>
982
1047
<listitem>
983
1048
<para>
984
- <literal>nActiveQueries</literal >, <type>integer</type>
1049
+ <parameter>nPendingQueries</parameter >, <type>integer</type>
985
1050
</para>
1051
+ <para>Number of queries waiting for execution on this node.</para>
986
1052
</listitem>
987
1053
<listitem>
988
1054
<para>
989
- <literal>nPendingQueries</literal >, <type>integer </type>
1055
+ <parameter>queueSize</parameter >, <type>bigint </type>
990
1056
</para>
1057
+ <para>Size of the pending query queue, in bytes.</para>
991
1058
</listitem>
992
1059
<listitem>
993
1060
<para>
994
- <literal>queueSize</literal >, <type>bigint</type>
1061
+ <parameter>transCount</parameter >, <type>bigint</type>
995
1062
</para>
1063
+ <para>The total number of replicated transactions processed by this node.</para>
996
1064
</listitem>
997
1065
<listitem>
998
1066
<para>
999
- <literal>transCount</literal >, <type>bigint</type>
1067
+ <parameter>timeShift</parameter >, <type>bigint</type>
1000
1068
</para>
1069
+ <para>Global snapshot shift caused by unsynchronized clocks on nodes, in microseconds.</para>
1001
1070
</listitem>
1002
1071
<listitem>
1003
1072
<para>
1004
- <literal>timeShift</literal >, <type>bigint </type>
1073
+ <parameter>recoverySlot</parameter >, <type>integer </type>
1005
1074
</para>
1075
+ <para>The node from which a failed node gets data updates during automatic recovery.</para>
1006
1076
</listitem>
1007
1077
<listitem>
1008
1078
<para>
1009
- <literal>recoverySlot</literal >, <type>integer </type>
1079
+ <parameter>xidHashSize</parameter >, <type>bigint </type>
1010
1080
</para>
1081
+ <para>Size of xid2state hash.</para>
1011
1082
</listitem>
1012
1083
<listitem>
1013
1084
<para>
1014
- <literal>xidHashSize</literal >, <type>bigint</type>
1085
+ <parameter>gidHashSize</parameter >, <type>bigint</type>
1015
1086
</para>
1087
+ <para>Size of gid2state hash.</para>
1016
1088
</listitem>
1017
1089
<listitem>
1018
1090
<para>
1019
- <literal>gidHashSize</literal >, <type>bigint</type>
1091
+ <parameter>oldestXid</parameter >, <type>bigint</type>
1020
1092
</para>
1093
+ <para>The oldest transaction ID on this node.</para>
1021
1094
</listitem>
1022
1095
<listitem>
1023
1096
<para>
1024
- <literal>oldestXid</literal >, <type>bigint </type>
1097
+ <parameter>configChanges</parameter >, <type>integer </type>
1025
1098
</para>
1099
+ <para>Number of state changes (enabled/disabled) since the last reboot.</para>
1026
1100
</listitem>
1027
1101
<listitem>
1028
1102
<para>
1029
- <literal>configChanges</literal>, <type>integer</type>
1103
+ <parameter>stalledNodeMask</parameter>, <type>biint</type>
1104
+ </para>
1105
+ <para>Bitmask of nodes for which replication slots were dropped.
1106
+ </para>
1107
+ </listitem>
1108
+ <listitem>
1109
+ <para>
1110
+ <parameter>stoppedNodeMask</parameter>, <type>bigint</type>
1111
+ </para>
1112
+ <para>Bitmask of nodes that were stopped by <function>mtm.stop_node()</function>.
1113
+ </para>
1114
+ </listitem>
1115
+ <listitem>
1116
+ <para>
1117
+ <parameter>lastStatusChange</parameter>, <type>timestamp</type>
1118
+ </para>
1119
+ <para>Timestamp of the last state change.
1030
1120
</para>
1031
1121
</listitem>
1032
1122
</itemizedlist>
@@ -1153,7 +1243,7 @@ pg_ctl -D ./datadir -l ./pg.log start
1153
1243
<para>
1154
1244
The <filename>multimaster</filename> extension currently passes 162
1155
1245
of 166 <productname>PostgreSQL</productname> regression tests. We are working right now on
1156
- proving full compatibility with the standard <productname>PostgreSQL</productname>.
1246
+ providing full compatibility with the standard <productname>PostgreSQL</productname>.
1157
1247
</para>
1158
1248
</sect2>
1159
1249
<sect2 id="multimaster-authors">
0 commit comments