Skip to content

Commit e69f051

Browse files
committed
Remove recovery.signal from replicas created by pgBackRest
Patroni creates "standby.signal" before starting PostgreSQL, causing "recovery.signal" to remain even after recovery completes. This file does no harm on replicas nor when restoring, but it does cause the disk of the primary to enter recovery after a clean shutdown. Issue: [sc-14190]
1 parent 593f3cd commit e69f051

File tree

7 files changed

+118
-2
lines changed

7 files changed

+118
-2
lines changed

internal/pgbackrest/reconcile.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -410,6 +410,13 @@ func ReplicaCreateCommand(
410410
"--stanza=" + DefaultStanzaName,
411411
"--repo=" + strings.TrimPrefix(repoName, "repo"),
412412
"--link-map=pg_wal=" + postgres.WALDirectory(cluster, instance),
413+
414+
// Do not create a recovery signal file on PostgreSQL v12 or later;
415+
// Patroni creates a standby signal file which takes precedence.
416+
// Patroni manages recovery.conf prior to PostgreSQL v12.
417+
// - https://github.com/pgbackrest/pgbackrest/blob/release/2.38/src/command/restore/restore.c#L1824
418+
// - https://www.postgresql.org/docs/12/runtime-config-wal.html
419+
"--type=standby",
413420
}
414421
}
415422

internal/pgbackrest/reconcile_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -803,7 +803,7 @@ func TestReplicaCreateCommand(t *testing.T) {
803803

804804
assert.DeepEqual(t, ReplicaCreateCommand(cluster, instance), []string{
805805
"pgbackrest", "restore", "--delta", "--stanza=db", "--repo=2",
806-
"--link-map=pg_wal=/pgdata/pg0_wal",
806+
"--link-map=pg_wal=/pgdata/pg0_wal", "--type=standby",
807807
})
808808
})
809809

@@ -816,7 +816,7 @@ func TestReplicaCreateCommand(t *testing.T) {
816816

817817
assert.DeepEqual(t, ReplicaCreateCommand(cluster, instance), []string{
818818
"pgbackrest", "restore", "--delta", "--stanza=db", "--repo=7",
819-
"--link-map=pg_wal=/pgdata/pg0_wal",
819+
"--link-map=pg_wal=/pgdata/pg0_wal", "--type=standby",
820820
})
821821
})
822822
}

internal/postgres/config.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,15 @@ func startupCommand(
255255
// - https://git.postgresql.org/gitweb/?p=postgresql.git;f=src/bin/pg_basebackup/pg_basebackup.c;hb=REL_13_0#l2621
256256
`safelink "${pgwal_directory}" "${postgres_data_directory}/pg_wal"`,
257257
`results 'wal directory' "$(realpath "${postgres_data_directory}/pg_wal")"`,
258+
259+
// Early versions of PGO create replicas with a recovery signal file.
260+
// Patroni also creates a standby signal file before starting Postgres,
261+
// causing Postgres to remove only one, the standby. Remove the extra
262+
// signal file now, if it exists, and let Patroni manage the standby
263+
// signal file instead.
264+
// - https://git.postgresql.org/gitweb/?p=postgresql.git;f=src/backend/access/transam/xlog.c;hb=REL_12_0#l5318
265+
// TODO(cbandy): Remove this after 5.0 is EOL.
266+
`rm -f "${postgres_data_directory}/recovery.signal"`,
258267
}, "\n")
259268

260269
return append([]string{"bash", "-ceu", "--", script, "startup"}, args...)

internal/postgres/reconcile_test.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,7 @@ initContainers:
221221
[ "${postgres_data_version}" = "${expected_major_version}" ]
222222
safelink "${pgwal_directory}" "${postgres_data_directory}/pg_wal"
223223
results 'wal directory' "$(realpath "${postgres_data_directory}/pg_wal")"
224+
rm -f "${postgres_data_directory}/recovery.signal"
224225
- startup
225226
- "11"
226227
- /pgdata/pg11_wal
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
---
2+
apiVersion: postgres-operator.crunchydata.com/v1beta1
3+
kind: PostgresCluster
4+
metadata:
5+
name: init-pgbackrest
6+
spec:
7+
postgresVersion: ${KUTTL_PG_VERSION}
8+
instances:
9+
- name: instance1
10+
replicas: 2
11+
dataVolumeClaimSpec:
12+
accessModes:
13+
- "ReadWriteOnce"
14+
resources:
15+
requests:
16+
storage: 1Gi
17+
backups:
18+
pgbackrest:
19+
manual:
20+
repoName: repo2
21+
options:
22+
- --type=full
23+
repos:
24+
- name: repo1
25+
volume:
26+
volumeClaimSpec:
27+
accessModes:
28+
- "ReadWriteOnce"
29+
resources:
30+
requests:
31+
storage: 1Gi
32+
# Adding a second PVC repo for testing, rather than test with S3/GCS/Azure
33+
- name: repo2
34+
volume:
35+
volumeClaimSpec:
36+
accessModes:
37+
- "ReadWriteOnce"
38+
resources:
39+
requests:
40+
storage: 1Gi
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
apiVersion: postgres-operator.crunchydata.com/v1beta1
2+
kind: PostgresCluster
3+
metadata:
4+
name: init-pgbackrest
5+
status:
6+
instances:
7+
- name: instance1
8+
readyReplicas: 2
9+
replicas: 2
10+
updatedReplicas: 2
11+
pgbackrest:
12+
repoHost:
13+
apiVersion: apps/v1
14+
kind: StatefulSet
15+
ready: true
16+
repos:
17+
# Assert that the status has the two repos, with only the first having the `replicaCreateBackupComplete` field
18+
- bound: true
19+
name: repo1
20+
replicaCreateBackupComplete: true
21+
stanzaCreated: true
22+
- bound: true
23+
name: repo2
24+
stanzaCreated: true
25+
---
26+
apiVersion: batch/v1
27+
kind: Job
28+
metadata:
29+
labels:
30+
postgres-operator.crunchydata.com/cluster: init-pgbackrest
31+
postgres-operator.crunchydata.com/pgbackrest-backup: replica-create
32+
postgres-operator.crunchydata.com/pgbackrest-repo: repo1
33+
status:
34+
succeeded: 1
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
apiVersion: kuttl.dev/v1beta1
2+
kind: TestStep
3+
commands:
4+
- script: |
5+
# Assumes the cluster only has a single replica
6+
NEW_REPLICA=$(
7+
kubectl get pod --namespace "${NAMESPACE}" \
8+
--output name --selector '
9+
postgres-operator.crunchydata.com/cluster=init-pgbackrest,
10+
postgres-operator.crunchydata.com/role=replica'
11+
)
12+
13+
LIST=$(
14+
kubectl exec --namespace "${NAMESPACE}" "${NEW_REPLICA}" -- \
15+
ls /pgdata/pg${KUTTL_PG_VERSION}/
16+
)
17+
18+
contains() { bash -ceu '[[ "$1" == *"$2"* ]]' - "$@"; }
19+
{
20+
!(contains "${LIST}" 'recovery.signal')
21+
} || {
22+
echo >&2 'Signal file(s) found'
23+
echo "${LIST}"
24+
exit 1
25+
}

0 commit comments

Comments
 (0)