Skip to content

Commit f878781

Browse files
committed
pg_upgrade: check for clean server shutdowns
Previously pg_upgrade checked for the pid file and started/stopped the server to force a clean shutdown. However, "pg_ctl -m immediate" removes the pid file but doesn't do a clean shutdown, so check pg_controldata for a clean shutdown too. Diagnosed-by: Vimalraj A Discussion: https://postgr.es/m/CAFKBAK5e4Q-oTUuPPJ56EU_d2Rzodq6GWKS3ncAk3xo7hAsOZg@mail.gmail.com Backpatch-through: 9.3
1 parent 1cf1d2d commit f878781

File tree

2 files changed

+61
-1
lines changed

2 files changed

+61
-1
lines changed

contrib/pg_upgrade/controldata.c

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ get_control_data(ClusterInfo *cluster, bool live_check)
5757
bool got_date_is_int = false;
5858
bool got_float8_pass_by_value = false;
5959
bool got_data_checksum_version = false;
60+
bool got_cluster_state = false;
6061
char *lc_collate = NULL;
6162
char *lc_ctype = NULL;
6263
char *lc_monetary = NULL;
@@ -451,6 +452,64 @@ get_control_data(ClusterInfo *cluster, bool live_check)
451452
if (output)
452453
pclose(output);
453454

455+
/*
456+
* Check for clean shutdown
457+
*/
458+
459+
/* only pg_controldata outputs the cluster state */
460+
snprintf(cmd, sizeof(cmd), "\"%s/pg_controldata\" \"%s\"",
461+
cluster->bindir, cluster->pgdata);
462+
fflush(stdout);
463+
fflush(stderr);
464+
465+
if ((output = popen(cmd, "r")) == NULL)
466+
pg_fatal("could not get control data using %s: %s\n",
467+
cmd, strerror(errno));
468+
469+
/* we have the result of cmd in "output". so parse it line by line now */
470+
while (fgets(bufin, sizeof(bufin), output))
471+
{
472+
if ((!live_check || cluster == &new_cluster) &&
473+
(p = strstr(bufin, "Database cluster state:")) != NULL)
474+
{
475+
p = strchr(p, ':');
476+
477+
if (p == NULL || strlen(p) <= 1)
478+
pg_fatal("%d: database cluster state problem\n", __LINE__);
479+
480+
p++; /* remove ':' char */
481+
482+
/*
483+
* We checked earlier for a postmaster lock file, and if we found
484+
* one, we tried to start/stop the server to replay the WAL. However,
485+
* pg_ctl -m immediate doesn't leave a lock file, but does require
486+
* WAL replay, so we check here that the server was shut down cleanly,
487+
* from the controldata perspective.
488+
*/
489+
/* remove leading spaces */
490+
while (*p == ' ')
491+
p++;
492+
if (strcmp(p, "shut down\n") != 0)
493+
{
494+
if (cluster == &old_cluster)
495+
pg_fatal("The source cluster was not shut down cleanly.\n");
496+
else
497+
pg_fatal("The target cluster was not shut down cleanly.\n");
498+
}
499+
got_cluster_state = true;
500+
}
501+
}
502+
503+
pclose(output);
504+
505+
if (!got_cluster_state)
506+
{
507+
if (cluster == &old_cluster)
508+
pg_fatal("The source cluster lacks cluster state information:\n");
509+
else
510+
pg_fatal("The target cluster lacks cluster state information:\n");
511+
}
512+
454513
/*
455514
* Restore environment variables
456515
*/

contrib/pg_upgrade/pg_upgrade.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -366,7 +366,8 @@ setup(char *argv0, bool *live_check)
366366
* start, assume the server is running. If the pid file is left over
367367
* from a server crash, this also allows any committed transactions
368368
* stored in the WAL to be replayed so they are not lost, because WAL
369-
* files are not transfered from old to new servers.
369+
* files are not transfered from old to new servers. We later check
370+
* for a clean shutdown.
370371
*/
371372
if (start_postmaster(&old_cluster, false))
372373
stop_postmaster(false);

0 commit comments

Comments
 (0)