Skip to content

Commit d25ee30

Browse files
committed
pg_upgrade: prevent check on live cluster from generating error
Previously an inaccurate but harmless error was generated when running --check on a live server before reporting the servers as compatible. The fix is to split error reporting and exit control in the exec_prog() API. Reported-by: Daniel Westermann Backpatch-through: 10
1 parent e35dba4 commit d25ee30

File tree

6 files changed

+35
-38
lines changed

6 files changed

+35
-38
lines changed

src/bin/pg_upgrade/dump.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ generate_old_dump(void)
2323
prep_status("Creating dump of global objects");
2424

2525
/* run new pg_dumpall binary for globals */
26-
exec_prog(UTILITY_LOG_FILE, NULL, true,
26+
exec_prog(UTILITY_LOG_FILE, NULL, true, true,
2727
"\"%s/pg_dumpall\" %s --globals-only --quote-all-identifiers "
2828
"--binary-upgrade %s -f %s",
2929
new_cluster.bindir, cluster_conn_opts(&old_cluster),

src/bin/pg_upgrade/exec.c

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -71,16 +71,14 @@ get_bin_version(ClusterInfo *cluster)
7171
* and attempts to execute that command. If the command executes
7272
* successfully, exec_prog() returns true.
7373
*
74-
* If the command fails, an error message is saved to the specified log_file.
75-
* If throw_error is true, this raises a PG_FATAL error and pg_upgrade
76-
* terminates; otherwise it is just reported as PG_REPORT and exec_prog()
77-
* returns false.
74+
* If the command fails, an error message is optionally written to the specified
75+
* log_file, and the program optionally exits.
7876
*
7977
* The code requires it be called first from the primary thread on Windows.
8078
*/
8179
bool
8280
exec_prog(const char *log_file, const char *opt_log_file,
83-
bool throw_error, const char *fmt,...)
81+
bool report_error, bool exit_on_error, const char *fmt,...)
8482
{
8583
int result = 0;
8684
int written;
@@ -173,20 +171,20 @@ exec_prog(const char *log_file, const char *opt_log_file,
173171
#endif
174172
result = system(cmd);
175173

176-
if (result != 0)
174+
if (result != 0 && report_error)
177175
{
178176
/* we might be in on a progress status line, so go to the next line */
179177
report_status(PG_REPORT, "\n*failure*");
180178
fflush(stdout);
181179

182180
pg_log(PG_VERBOSE, "There were problems executing \"%s\"\n", cmd);
183181
if (opt_log_file)
184-
pg_log(throw_error ? PG_FATAL : PG_REPORT,
182+
pg_log(exit_on_error ? PG_FATAL : PG_REPORT,
185183
"Consult the last few lines of \"%s\" or \"%s\" for\n"
186184
"the probable cause of the failure.\n",
187185
log_file, opt_log_file);
188186
else
189-
pg_log(throw_error ? PG_FATAL : PG_REPORT,
187+
pg_log(exit_on_error ? PG_FATAL : PG_REPORT,
190188
"Consult the last few lines of \"%s\" for\n"
191189
"the probable cause of the failure.\n",
192190
log_file);

src/bin/pg_upgrade/parallel.c

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -78,8 +78,8 @@ parallel_exec_prog(const char *log_file, const char *opt_log_file,
7878
va_end(args);
7979

8080
if (user_opts.jobs <= 1)
81-
/* throw_error must be true to allow jobs */
82-
exec_prog(log_file, opt_log_file, true, "%s", cmd);
81+
/* exit_on_error must be true to allow jobs */
82+
exec_prog(log_file, opt_log_file, true, true, "%s", cmd);
8383
else
8484
{
8585
/* parallel */
@@ -122,7 +122,7 @@ parallel_exec_prog(const char *log_file, const char *opt_log_file,
122122
child = fork();
123123
if (child == 0)
124124
/* use _exit to skip atexit() functions */
125-
_exit(!exec_prog(log_file, opt_log_file, true, "%s", cmd));
125+
_exit(!exec_prog(log_file, opt_log_file, true, true, "%s", cmd));
126126
else if (child < 0)
127127
/* fork failed */
128128
pg_fatal("could not create worker process: %s\n", strerror(errno));
@@ -160,7 +160,7 @@ win32_exec_prog(exec_thread_arg *args)
160160
{
161161
int ret;
162162

163-
ret = !exec_prog(args->log_file, args->opt_log_file, true, "%s", args->cmd);
163+
ret = !exec_prog(args->log_file, args->opt_log_file, true, true, "%s", args->cmd);
164164

165165
/* terminates thread */
166166
return ret;
@@ -187,7 +187,6 @@ parallel_transfer_all_new_dbs(DbInfoArr *old_db_arr, DbInfoArr *new_db_arr,
187187
#endif
188188

189189
if (user_opts.jobs <= 1)
190-
/* throw_error must be true to allow jobs */
191190
transfer_all_new_dbs(old_db_arr, new_db_arr, old_pgdata, new_pgdata, NULL);
192191
else
193192
{

src/bin/pg_upgrade/pg_upgrade.c

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -149,14 +149,14 @@ main(int argc, char **argv)
149149
* because there is no need to have the schema load use new oids.
150150
*/
151151
prep_status("Setting next OID for new cluster");
152-
exec_prog(UTILITY_LOG_FILE, NULL, true,
152+
exec_prog(UTILITY_LOG_FILE, NULL, true, true,
153153
"\"%s/pg_resetwal\" -o %u \"%s\"",
154154
new_cluster.bindir, old_cluster.controldata.chkpnt_nxtoid,
155155
new_cluster.pgdata);
156156
check_ok();
157157

158158
prep_status("Sync data directory to disk");
159-
exec_prog(UTILITY_LOG_FILE, NULL, true,
159+
exec_prog(UTILITY_LOG_FILE, NULL, true, true,
160160
"\"%s/initdb\" --sync-only \"%s\"", new_cluster.bindir,
161161
new_cluster.pgdata);
162162
check_ok();
@@ -249,7 +249,7 @@ prepare_new_cluster(void)
249249
* --analyze so autovacuum doesn't update statistics later
250250
*/
251251
prep_status("Analyzing all rows in the new cluster");
252-
exec_prog(UTILITY_LOG_FILE, NULL, true,
252+
exec_prog(UTILITY_LOG_FILE, NULL, true, true,
253253
"\"%s/vacuumdb\" %s --all --analyze %s",
254254
new_cluster.bindir, cluster_conn_opts(&new_cluster),
255255
log_opts.verbose ? "--verbose" : "");
@@ -262,7 +262,7 @@ prepare_new_cluster(void)
262262
* counter later.
263263
*/
264264
prep_status("Freezing all rows in the new cluster");
265-
exec_prog(UTILITY_LOG_FILE, NULL, true,
265+
exec_prog(UTILITY_LOG_FILE, NULL, true, true,
266266
"\"%s/vacuumdb\" %s --all --freeze %s",
267267
new_cluster.bindir, cluster_conn_opts(&new_cluster),
268268
log_opts.verbose ? "--verbose" : "");
@@ -289,7 +289,7 @@ prepare_new_databases(void)
289289
* support functions in template1 but pg_dumpall creates database using
290290
* the template0 template.
291291
*/
292-
exec_prog(UTILITY_LOG_FILE, NULL, true,
292+
exec_prog(UTILITY_LOG_FILE, NULL, true, true,
293293
"\"%s/psql\" " EXEC_PSQL_ARGS " %s -f \"%s\"",
294294
new_cluster.bindir, cluster_conn_opts(&new_cluster),
295295
GLOBALS_DUMP_FILE);
@@ -392,7 +392,7 @@ copy_subdir_files(const char *old_subdir, const char *new_subdir)
392392

393393
prep_status("Copying old %s to new server", old_subdir);
394394

395-
exec_prog(UTILITY_LOG_FILE, NULL, true,
395+
exec_prog(UTILITY_LOG_FILE, NULL, true, true,
396396
#ifndef WIN32
397397
"cp -Rf \"%s\" \"%s\"",
398398
#else
@@ -418,16 +418,16 @@ copy_xact_xlog_xid(void)
418418

419419
/* set the next transaction id and epoch of the new cluster */
420420
prep_status("Setting next transaction ID and epoch for new cluster");
421-
exec_prog(UTILITY_LOG_FILE, NULL, true,
421+
exec_prog(UTILITY_LOG_FILE, NULL, true, true,
422422
"\"%s/pg_resetwal\" -f -x %u \"%s\"",
423423
new_cluster.bindir, old_cluster.controldata.chkpnt_nxtxid,
424424
new_cluster.pgdata);
425-
exec_prog(UTILITY_LOG_FILE, NULL, true,
425+
exec_prog(UTILITY_LOG_FILE, NULL, true, true,
426426
"\"%s/pg_resetwal\" -f -e %u \"%s\"",
427427
new_cluster.bindir, old_cluster.controldata.chkpnt_nxtepoch,
428428
new_cluster.pgdata);
429429
/* must reset commit timestamp limits also */
430-
exec_prog(UTILITY_LOG_FILE, NULL, true,
430+
exec_prog(UTILITY_LOG_FILE, NULL, true, true,
431431
"\"%s/pg_resetwal\" -f -c %u,%u \"%s\"",
432432
new_cluster.bindir,
433433
old_cluster.controldata.chkpnt_nxtxid,
@@ -453,7 +453,7 @@ copy_xact_xlog_xid(void)
453453
* we preserve all files and contents, so we must preserve both "next"
454454
* counters here and the oldest multi present on system.
455455
*/
456-
exec_prog(UTILITY_LOG_FILE, NULL, true,
456+
exec_prog(UTILITY_LOG_FILE, NULL, true, true,
457457
"\"%s/pg_resetwal\" -O %u -m %u,%u \"%s\"",
458458
new_cluster.bindir,
459459
old_cluster.controldata.chkpnt_nxtmxoff,
@@ -481,7 +481,7 @@ copy_xact_xlog_xid(void)
481481
* might end up wrapped around (i.e. 0) if the old cluster had
482482
* next=MaxMultiXactId, but multixact.c can cope with that just fine.
483483
*/
484-
exec_prog(UTILITY_LOG_FILE, NULL, true,
484+
exec_prog(UTILITY_LOG_FILE, NULL, true, true,
485485
"\"%s/pg_resetwal\" -m %u,%u \"%s\"",
486486
new_cluster.bindir,
487487
old_cluster.controldata.chkpnt_nxtmulti + 1,
@@ -492,7 +492,7 @@ copy_xact_xlog_xid(void)
492492

493493
/* now reset the wal archives in the new cluster */
494494
prep_status("Resetting WAL archives");
495-
exec_prog(UTILITY_LOG_FILE, NULL, true,
495+
exec_prog(UTILITY_LOG_FILE, NULL, true, true,
496496
/* use timeline 1 to match controldata and no WAL history file */
497497
"\"%s/pg_resetwal\" -l 00000001%s \"%s\"", new_cluster.bindir,
498498
old_cluster.controldata.nextxlogfile + 8,

src/bin/pg_upgrade/pg_upgrade.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -360,7 +360,7 @@ void generate_old_dump(void);
360360
#define EXEC_PSQL_ARGS "--echo-queries --set ON_ERROR_STOP=on --no-psqlrc --dbname=template1"
361361

362362
bool exec_prog(const char *log_file, const char *opt_log_file,
363-
bool throw_error, const char *fmt,...) pg_attribute_printf(4, 5);
363+
bool report_error, bool exit_on_error, const char *fmt,...) pg_attribute_printf(5, 6);
364364
void verify_directories(void);
365365
bool pid_lock_file_exists(const char *datadir);
366366

@@ -416,8 +416,8 @@ PGresult *executeQueryOrDie(PGconn *conn, const char *fmt,...) pg_attribute_pr
416416

417417
char *cluster_conn_opts(ClusterInfo *cluster);
418418

419-
bool start_postmaster(ClusterInfo *cluster, bool throw_error);
420-
void stop_postmaster(bool fast);
419+
bool start_postmaster(ClusterInfo *cluster, bool report_and_exit_on_error);
420+
void stop_postmaster(bool in_atexit);
421421
uint32 get_major_server_version(ClusterInfo *cluster);
422422
void check_pghost_envvar(void);
423423

src/bin/pg_upgrade/server.c

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,7 @@ stop_postmaster_atexit(void)
191191

192192

193193
bool
194-
start_postmaster(ClusterInfo *cluster, bool throw_error)
194+
start_postmaster(ClusterInfo *cluster, bool report_and_exit_on_error)
195195
{
196196
char cmd[MAXPGPATH * 4 + 1000];
197197
PGconn *conn;
@@ -257,11 +257,11 @@ start_postmaster(ClusterInfo *cluster, bool throw_error)
257257
(strcmp(SERVER_LOG_FILE,
258258
SERVER_START_LOG_FILE) != 0) ?
259259
SERVER_LOG_FILE : NULL,
260-
false,
260+
report_and_exit_on_error, false,
261261
"%s", cmd);
262262

263263
/* Did it fail and we are just testing if the server could be started? */
264-
if (!pg_ctl_return && !throw_error)
264+
if (!pg_ctl_return && !report_and_exit_on_error)
265265
return false;
266266

267267
/*
@@ -305,9 +305,9 @@ start_postmaster(ClusterInfo *cluster, bool throw_error)
305305
PQfinish(conn);
306306

307307
/*
308-
* If pg_ctl failed, and the connection didn't fail, and throw_error is
309-
* enabled, fail now. This could happen if the server was already
310-
* running.
308+
* If pg_ctl failed, and the connection didn't fail, and
309+
* report_and_exit_on_error is enabled, fail now. This
310+
* could happen if the server was already running.
311311
*/
312312
if (!pg_ctl_return)
313313
{
@@ -322,7 +322,7 @@ start_postmaster(ClusterInfo *cluster, bool throw_error)
322322

323323

324324
void
325-
stop_postmaster(bool fast)
325+
stop_postmaster(bool in_atexit)
326326
{
327327
ClusterInfo *cluster;
328328

@@ -333,11 +333,11 @@ stop_postmaster(bool fast)
333333
else
334334
return; /* no cluster running */
335335

336-
exec_prog(SERVER_STOP_LOG_FILE, NULL, !fast,
336+
exec_prog(SERVER_STOP_LOG_FILE, NULL, !in_atexit, !in_atexit,
337337
"\"%s/pg_ctl\" -w -D \"%s\" -o \"%s\" %s stop",
338338
cluster->bindir, cluster->pgconfig,
339339
cluster->pgopts ? cluster->pgopts : "",
340-
fast ? "-m fast" : "-m smart");
340+
in_atexit ? "-m fast" : "-m smart");
341341

342342
os_info.running_cluster = NULL;
343343
}

0 commit comments

Comments
 (0)