Skip to content

Commit a326ca7

Browse files
committed
pg_upgrade: check for clean server shutdowns
Previously pg_upgrade checked for the pid file and started/stopped the server to force a clean shutdown. However, "pg_ctl -m immediate" removes the pid file but doesn't do a clean shutdown, so check pg_controldata for a clean shutdown too. Diagnosed-by: Vimalraj A Discussion: https://postgr.es/m/CAFKBAK5e4Q-oTUuPPJ56EU_d2Rzodq6GWKS3ncAk3xo7hAsOZg@mail.gmail.com Backpatch-through: 9.3
1 parent 5f2c589 commit a326ca7

File tree

2 files changed

+61
-1
lines changed

2 files changed

+61
-1
lines changed

src/bin/pg_upgrade/controldata.c

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ get_control_data(ClusterInfo *cluster, bool live_check)
5858
bool got_large_object = false;
5959
bool got_date_is_int = false;
6060
bool got_data_checksum_version = false;
61+
bool got_cluster_state = false;
6162
char *lc_collate = NULL;
6263
char *lc_ctype = NULL;
6364
char *lc_monetary = NULL;
@@ -416,6 +417,64 @@ get_control_data(ClusterInfo *cluster, bool live_check)
416417

417418
pclose(output);
418419

420+
/*
421+
* Check for clean shutdown
422+
*/
423+
424+
/* only pg_controldata outputs the cluster state */
425+
snprintf(cmd, sizeof(cmd), "\"%s/pg_controldata\" \"%s\"",
426+
cluster->bindir, cluster->pgdata);
427+
fflush(stdout);
428+
fflush(stderr);
429+
430+
if ((output = popen(cmd, "r")) == NULL)
431+
pg_fatal("could not get control data using %s: %s\n",
432+
cmd, strerror(errno));
433+
434+
/* we have the result of cmd in "output". so parse it line by line now */
435+
while (fgets(bufin, sizeof(bufin), output))
436+
{
437+
if ((!live_check || cluster == &new_cluster) &&
438+
(p = strstr(bufin, "Database cluster state:")) != NULL)
439+
{
440+
p = strchr(p, ':');
441+
442+
if (p == NULL || strlen(p) <= 1)
443+
pg_fatal("%d: database cluster state problem\n", __LINE__);
444+
445+
p++; /* remove ':' char */
446+
447+
/*
448+
* We checked earlier for a postmaster lock file, and if we found
449+
* one, we tried to start/stop the server to replay the WAL. However,
450+
* pg_ctl -m immediate doesn't leave a lock file, but does require
451+
* WAL replay, so we check here that the server was shut down cleanly,
452+
* from the controldata perspective.
453+
*/
454+
/* remove leading spaces */
455+
while (*p == ' ')
456+
p++;
457+
if (strcmp(p, "shut down\n") != 0)
458+
{
459+
if (cluster == &old_cluster)
460+
pg_fatal("The source cluster was not shut down cleanly.\n");
461+
else
462+
pg_fatal("The target cluster was not shut down cleanly.\n");
463+
}
464+
got_cluster_state = true;
465+
}
466+
}
467+
468+
pclose(output);
469+
470+
if (!got_cluster_state)
471+
{
472+
if (cluster == &old_cluster)
473+
pg_fatal("The source cluster lacks cluster state information:\n");
474+
else
475+
pg_fatal("The target cluster lacks cluster state information:\n");
476+
}
477+
419478
/*
420479
* Restore environment variables
421480
*/

src/bin/pg_upgrade/pg_upgrade.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,8 @@ setup(char *argv0, bool *live_check)
202202
* start, assume the server is running. If the pid file is left over
203203
* from a server crash, this also allows any committed transactions
204204
* stored in the WAL to be replayed so they are not lost, because WAL
205-
* files are not transferred from old to new servers.
205+
* files are not transferred from old to new servers. We later check
206+
* for a clean shutdown.
206207
*/
207208
if (start_postmaster(&old_cluster, false))
208209
stop_postmaster(false);

0 commit comments

Comments
 (0)