Skip to content

Commit 7ff623d

Browse files
knizhnikkelvich
authored andcommitted
Make reporting of errors in sockub less confusing
1 parent 2691039 commit 7ff623d

File tree

6 files changed

+40
-32
lines changed

6 files changed

+40
-32
lines changed

contrib/arbiter/sockhub/sockhub.c

Lines changed: 27 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@
2222
#define SOCKHUB_BUFFER_SIZE (1024*1024)
2323
#define ERR_BUF_SIZE 1024
2424

25+
#define SHUB_TRACE(fmt, ...)
26+
/* #define SHUB_TRACE(fmt, ...) fprintf(stderr, fmt, ## __VA_ARGS__) */
27+
2528
void ShubAddSocket(Shub* shub, int fd);
2629

2730
inline void ShubAddSocket(Shub* shub, int fd)
@@ -63,21 +66,23 @@ void ShubInitParams(ShubParams* params)
6366
params->leader = NULL;
6467
}
6568

66-
void ShubParamsSetHosts(ShubParams* params, char* hoststring)
69+
int ShubParamsSetHosts(ShubParams* params, char* hoststring)
6770
{
6871
char *hstate, *pstate;
6972
char *hostport, *host, *portstr;
7073
int port;
74+
int ok = 1;
7175

7276
char *hosts = strdup(hoststring);
73-
fprintf(stderr, "sockhub parsing hosts = '%s'\n", hosts);
77+
SHUB_TRACE("sockhub parsing hosts = '%s'\n", hosts);
7478
hostport = strtok_r(hosts, ",", &hstate);
7579

7680
while (hostport) {
77-
fprintf(stderr, "hostport = '%s'\n", hostport);
81+
SHUB_TRACE("hostport = '%s'\n", hostport);
7882
host = strtok_r(hostport, ":", &pstate);
7983
if (!host) {
80-
fprintf(stderr, "wrong host in host list\n");
84+
SHUB_TRACE("wrong host in host list\n");
85+
ok = 0;
8186
break;
8287
}
8388

@@ -88,20 +93,20 @@ void ShubParamsSetHosts(ShubParams* params, char* hoststring)
8893
port = 5431;
8994
}
9095

91-
fprintf(stderr, "adding host %s:%d\n", host, port);
96+
SHUB_TRACE("adding host %s:%d\n", host, port);
9297
host_t *h = malloc(sizeof(host_t));
9398
h->host = strdup(host);
9499
h->port = port;
95100
if (params->leader) {
96-
// update pointers from
101+
/* update pointers from */
97102
h->prev = params->leader->prev;
98103
h->next = params->leader;
99104

100-
// update pointers to
105+
/* update pointers to */
101106
h->prev->next = h;
102107
h->next->prev = h;
103108
} else {
104-
// the list is empty
109+
/* the list is empty */
105110
params->leader = h;
106111
h->prev = h;
107112
h->next = h;
@@ -111,6 +116,7 @@ void ShubParamsSetHosts(ShubParams* params, char* hoststring)
111116
}
112117

113118
free(hosts);
119+
return ok;
114120
}
115121

116122
static int resolve_host_by_name(const char *hostname, unsigned* addrs, unsigned* n_addrs)
@@ -202,20 +208,20 @@ static void reconnect(Shub* shub)
202208
char *host = shub->params->leader->host;
203209
int port = shub->params->leader->port;
204210

205-
fprintf(stderr, "shub leader = %s:%d\n", host, port);
211+
SHUB_TRACE("shub leader = %s:%d\n", host, port);
206212

207213
shub->params->leader = shub->params->leader->next;
208214

209215
ShubErrorSeverity severity = SHUB_RECOVERABLE_ERROR;
210216
sock_inet.sin_port = htons(port);
211217

212218
if (!resolve_host_by_name(host, addrs, &n_addrs)) {
213-
shub->params->error_handler("Failed to resolve host by name", severity);
219+
shub->params->error_handler("Sockhub failed to resolve host by name", severity);
214220
continue;
215221
}
216222
shub->output = socket(AF_INET, SOCK_STREAM, 0);
217223
if (shub->output < 0) {
218-
shub->params->error_handler("Failed to create inet socket", severity);
224+
shub->params->error_handler("Sockhub failed to create inet socket", severity);
219225
continue;
220226
}
221227
while (1) {
@@ -231,16 +237,16 @@ static void reconnect(Shub* shub)
231237
}
232238
}
233239
if (rc < 0) {
234-
if (errno != ENOENT && errno != ECONNREFUSED && errno != EINPROGRESS) {
235-
shub->params->error_handler("Connection can not be establish", severity);
236-
continue;
237-
}
238-
if (max_attempts-- != 0) {
239-
sleep(1);
240-
} else {
241-
shub->params->error_handler("Failed to connect to host", severity);
242-
continue;
243-
}
240+
if ((errno != ENOENT && errno != ECONNREFUSED && errno != EINPROGRESS) || max_attempts == 0) {
241+
char buf[ERR_BUF_SIZE];
242+
sprintf(buf, "Sockhub failed to connect to %s:%d: %d", host, port, errno);
243+
shub->params->error_handler(buf, severity);
244+
max_attempts = shub->params->max_attempts;
245+
} else {
246+
max_attempts -= 1;
247+
sleep(1);
248+
}
249+
continue;
244250
} else {
245251
int optval = 1;
246252
setsockopt(shub->output, IPPROTO_TCP, TCP_NODELAY, (char const*)&optval, sizeof(optval));
@@ -493,7 +499,6 @@ void ShubLoop(Shub* shub)
493499
char buf[ERR_BUF_SIZE];
494500
sprintf(buf, "Failed to read local socket chan=%d, rc=%d, min requested=%ld, max requested=%d, errno=%d", chan, rc, sizeof(ShubMessageHdr) - available, buffer_size - pos - available, errno);
495501
shub->params->error_handler(buf, SHUB_RECOVERABLE_ERROR);
496-
//shub->params->error_handler("Failed to read local socket", SHUB_RECOVERABLE_ERROR);
497502
close_socket(shub, i);
498503
shub->in_buffer_used = pos;
499504
notify_disconnect(shub, i);
@@ -532,7 +537,6 @@ void ShubLoop(Shub* shub)
532537
char buf[ERR_BUF_SIZE];
533538
sprintf(buf, "Failed to read local socket rc=%d, len=%d, errno=%d", rc, n, errno);
534539
shub->params->error_handler(buf, SHUB_RECOVERABLE_ERROR);
535-
//shub->params->error_handler("Failed to read local socket", SHUB_RECOVERABLE_ERROR);
536540
close_socket(shub, chan);
537541
if (hdr != NULL) { /* if message header is not yet sent to the server... */
538542
/* ... then skip this message */
@@ -607,4 +611,3 @@ void ShubLoop(Shub* shub)
607611
}
608612
}
609613

610-
// vim: sts=4 ts=4 sw=4 expandtab

contrib/arbiter/sockhub/sockhub.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ int ShubReadSocket(int sd, void* buf, int size);
7777
int ShubWriteSocket(int sd, void const* buf, int size);
7878

7979
void ShubInitParams(ShubParams* params);
80-
void ShubParamsSetHosts(ShubParams* params, char* hoststring);
80+
int ShubParamsSetHosts(ShubParams* params, char* hoststring);
8181
void ShubInitialize(Shub* shub, ShubParams* params);
8282
void ShubLoop(Shub* shub);
8383

contrib/multimaster/multimaster.c

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1114,8 +1114,13 @@ void DtmBackgroundWorker(Datum arg)
11141114
snprintf(unix_sock_path, sizeof(unix_sock_path), "%s/sh.unix", Unix_socket_directories);
11151115

11161116
ShubInitParams(&params);
1117-
1118-
ShubParamsSetHosts(&params, ArbitersCopy);
1117+
1118+
if (!ShubParamsSetHosts(&params, ArbitersCopy))
1119+
{
1120+
ereport(ERROR,
1121+
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1122+
errmsg("Invalid values of arbiters.multimaster parameter: %s", ArbitersCopy)));
1123+
}
11191124
params.file = unix_sock_path;
11201125
params.buffer_size = DtmBufferSize;
11211126

contrib/multimaster/multimaster.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
#include "bytebuf.h"
55

66
#define XTM_TRACE(fmt, ...)
7-
//#define XTM_INFO(fmt, ...) fprintf(stderr, fmt, ## __VA_ARGS__)
7+
/* #define XTM_INFO(fmt, ...) fprintf(stderr, fmt, ## __VA_ARGS__) */
88
#define XTM_INFO(fmt, ...)
99

1010
extern int MMStartReceivers(char* nodes, int node_id);

contrib/multimaster/tests/deploy_layouts/cluster.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
- name: start dtm
3131
shell: >
3232
nohup ~/pg_cluster/src/contrib/arbiter/bin/arbiter
33-
-d ~/pg_cluster/dtm_data -r "0.0.0.0:5431" -i 0 -l ~/pg_cluster/dtm_data/log &
33+
-d ~/pg_cluster/dtm_data -r 0.0.0.0:5431 -i 0 -l ~/pg_cluster/dtm_data/log &
3434
3535
- name: wait until dtm is available
3636
wait_for: port=5431 delay=1
@@ -83,8 +83,8 @@
8383
- "max_replication_slots = 10"
8484
- "max_worker_processes = 100"
8585
- "shared_preload_libraries = 'multimaster'"
86-
- "multimaster.arbiter_host = '{{ groups['nodes'][0] }}'"
87-
- "multimaster.conn_strings = '{{ connections }}'"
86+
- "multimaster.arbiters = '{{groups['nodes'][0]}}:5431'"
87+
- "multimaster.conn_strings = '{{connections}}'"
8888
- "multimaster.node_id = {{ node_id }}"
8989
- "multimaster.queue_size = 1073741824"
9090
- "multimaster.workers = 32"

contrib/multimaster/tests/reinit-mm.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ n_nodes=3
22
export PATH=~/postgres_cluster/dist/bin/:$PATH
33
ulimit -c unlimited
44
pkill -9 postgres
5-
pkill -9 dtmd
5+
pkill -9 arbiter
66
rm -fr node? *.log dtm
77
mkdir dtm
88
conn_str=""

0 commit comments

Comments
 (0)