Skip to content

Commit 540d58d

Browse files
authored
Adding the support for standby cluster
This will set up a continuous wal streaming cluster, by adding the corresponding section in postgres manifest. Instead of having a full-fledged standby cluster as in Patroni, here we use only the wal path of the source cluster and stream from there. Since, standby cluster is streaming from the master and does not require to create or use databases of it's own. Hence, it bypasses the creation of users or databases. There is a separate sample manifest added to set up a standby-cluster.
1 parent 93bfed3 commit 540d58d

File tree

10 files changed

+129
-8
lines changed

10 files changed

+129
-8
lines changed

docs/reference/cluster_manifest.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,9 @@ explanation of `ttl` and `loop_wait` parameters.
199199
automatically created by Patroni for cluster members and permanent replication
200200
slots. Optional.
201201

202+
* **standby**
203+
initializes cluster as a standby creating a cascading replication, where standby leader is streaming from specified remote location
204+
202205
## Postgres container resources
203206

204207
Those parameters define [CPU and memory requests and

docs/user.md

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,23 @@ spec:
281281
s3_force_path_style: true
282282
```
283283

284+
## Setting up a standby cluster
285+
286+
Standby clusters are like normal cluster but they are streaming from a remote cluster. As the first version of this feature, the only scenario covered by operator is to stream from a wal archive of the master. Following the more popular infrastructure of using Amazon's S3 buckets, it is mentioned as s3_wal_path here. To make a cluster as standby add a section standby in the YAML file as follows.
287+
288+
```yaml
289+
spec:
290+
standby:
291+
s3_wal_path: "s3 bucket path to the master"
292+
```
293+
294+
Things to note:
295+
296+
- An empty string is provided in s3_wal_path of the standby cluster will result in error and no statefulset will be created.
297+
- Only one pod can be deployed for stand-by cluster.
298+
- To manually promote the standby_cluster, use patronictl and remove config entry.
299+
- There is no way to transform a non-standby cluster to standby cluster through operator. Hence, if a cluster is created without standby section in YAML and later modified by adding that section, there will be no effect on the cluster. However, it can be done through Patroni by adding the [standby_cluster] (https://github.com/zalando/patroni/blob/bd2c54581abb42a7d3a3da551edf0b8732eefd27/docs/replica_bootstrap.rst#standby-cluster) section using patronictl edit-config. Note that the transformed standby cluster will not be doing any streaming, rather will just be in standby mode and allow read-only transactions only.
300+
284301
## Sidecar Support
285302

286303
Each cluster can specify arbitrary sidecars to run. These containers could be used for

manifests/complete-postgres-manifest.yaml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ spec:
6666
# cluster: "acid-batman"
6767
# timestamp: "2017-12-19T12:40:33+01:00" # timezone required (offset relative to UTC, see RFC 3339 section 5.6)
6868
# s3_wal_path: "s3://custom/path/to/bucket"
69-
69+
7070
# run periodic backups with k8s cron jobs
7171
# enableLogicalBackup: true
7272
# logicalBackupSchedule: "30 00 * * *"
@@ -86,4 +86,3 @@ spec:
8686
# env:
8787
# - name: "USEFUL_VAR"
8888
# value: "perhaps-true"
89-

manifests/standby-manifest.yaml

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
2+
apiVersion: "acid.zalan.do/v1"
3+
kind: postgresql
4+
metadata:
5+
name: acid-standby-cluster
6+
namespace: default
7+
spec:
8+
teamId: "ACID"
9+
volume:
10+
size: 1Gi
11+
numberOfInstances: 1
12+
postgresql:
13+
version: "10"
14+
# Make this a standby cluster and provide the s3 bucket path of source cluster for continuous streaming.
15+
standby:
16+
s3_wal_path: "s3://path/to/bucket/containing/wal/of/source/cluster/"
17+
18+
maintenanceWindows:
19+
- 01:00-06:00 #UTC
20+
- Sat:00:00-04:00

pkg/apis/acid.zalan.do/v1/postgresql_type.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ type PostgresSpec struct {
5858
ShmVolume *bool `json:"enableShmVolume,omitempty"`
5959
EnableLogicalBackup bool `json:"enableLogicalBackup,omitempty"`
6060
LogicalBackupSchedule string `json:"logicalBackupSchedule,omitempty"`
61+
StandbyCluster *StandbyDescription `json:"standby"`
6162
}
6263

6364
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
@@ -114,6 +115,11 @@ type Patroni struct {
114115
Slots map[string]map[string]string `json:"slots"`
115116
}
116117

118+
//StandbyCluster
119+
type StandbyDescription struct {
120+
S3WalPath string `json:"s3_wal_path,omitempty"`
121+
}
122+
117123
// CloneDescription describes which cluster the new should clone and up to which point in time
118124
type CloneDescription struct {
119125
ClusterName string `json:"cluster,omitempty"`

pkg/apis/acid.zalan.do/v1/util_test.go

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -330,7 +330,7 @@ var unmarshalCluster = []struct {
330330
Status: PostgresStatus{PostgresClusterStatus: ClusterStatusInvalid},
331331
Error: errors.New("name must match {TEAM}-{NAME} format").Error(),
332332
},
333-
marshal: []byte(`{"kind":"Postgresql","apiVersion":"acid.zalan.do/v1","metadata":{"name":"teapot-testcluster1","creationTimestamp":null},"spec":{"postgresql":{"version":"","parameters":null},"volume":{"size":"","storageClass":""},"patroni":{"initdb":null,"pg_hba":null,"ttl":0,"loop_wait":0,"retry_timeout":0,"maximum_lag_on_failover":0,"slots":null},"resources":{"requests":{"cpu":"","memory":""},"limits":{"cpu":"","memory":""}},"teamId":"acid","allowedSourceRanges":null,"numberOfInstances":0,"users":null,"clone":{}},"status":{"PostgresClusterStatus":"Invalid"}}`),
333+
marshal: []byte(`{"kind":"Postgresql","apiVersion":"acid.zalan.do/v1","metadata":{"name":"teapot-testcluster1","creationTimestamp":null},"spec":{"postgresql":{"version":"","parameters":null},"volume":{"size":"","storageClass":""},"patroni":{"initdb":null,"pg_hba":null,"ttl":0,"loop_wait":0,"retry_timeout":0,"maximum_lag_on_failover":0,"slots":null} ,"resources":{"requests":{"cpu":"","memory":""},"limits":{"cpu":"","memory":""}},"teamId":"acid","allowedSourceRanges":null,"numberOfInstances":0,"users":null,"clone":{}},"status":{"PostgresClusterStatus":"Invalid"}}`),
334334
err: nil},
335335
// clone example
336336
{
@@ -354,6 +354,28 @@ var unmarshalCluster = []struct {
354354
},
355355
marshal: []byte(`{"kind":"Postgresql","apiVersion":"acid.zalan.do/v1","metadata":{"name":"acid-testcluster1","creationTimestamp":null},"spec":{"postgresql":{"version":"","parameters":null},"volume":{"size":"","storageClass":""},"patroni":{"initdb":null,"pg_hba":null,"ttl":0,"loop_wait":0,"retry_timeout":0,"maximum_lag_on_failover":0,"slots":null},"resources":{"requests":{"cpu":"","memory":""},"limits":{"cpu":"","memory":""}},"teamId":"acid","allowedSourceRanges":null,"numberOfInstances":0,"users":null,"clone":{"cluster":"team-batman"}},"status":{"PostgresClusterStatus":""}}`),
356356
err: nil},
357+
// standby example
358+
{
359+
in: []byte(`{"kind": "Postgresql","apiVersion": "acid.zalan.do/v1","metadata": {"name": "acid-testcluster1"}, "spec": {"teamId": "acid", "standby": {"s3_wal_path": "s3://custom/path/to/bucket/"}}}`),
360+
out: Postgresql{
361+
TypeMeta: metav1.TypeMeta{
362+
Kind: "Postgresql",
363+
APIVersion: "acid.zalan.do/v1",
364+
},
365+
ObjectMeta: metav1.ObjectMeta{
366+
Name: "acid-testcluster1",
367+
},
368+
Spec: PostgresSpec{
369+
TeamID: "acid",
370+
StandbyCluster: &StandbyDescription{
371+
S3WalPath: "s3://custom/path/to/bucket/",
372+
},
373+
ClusterName: "testcluster1",
374+
},
375+
Error: "",
376+
},
377+
marshal: []byte(`{"kind":"Postgresql","apiVersion":"acid.zalan.do/v1","metadata":{"name":"acid-testcluster1","creationTimestamp":null},"spec":{"postgresql":{"version":"","parameters":null},"volume":{"size":"","storageClass":""},"patroni":{"initdb":null,"pg_hba":null,"ttl":0,"loop_wait":0,"retry_timeout":0,"maximum_lag_on_failover":0,"slots":null},"resources":{"requests":{"cpu":"","memory":""},"limits":{"cpu":"","memory":""}},"teamId":"acid","allowedSourceRanges":null,"numberOfInstances":0,"users":null,"standby":{"s3_wal_path":"s3://custom/path/to/bucket/"}},"status":{"PostgresClusterStatus":""}}`),
378+
err: nil},
357379
// erroneous examples
358380
{
359381
in: []byte(`{"kind": "Postgresql","apiVersion": "acid.zalan.do/v1"`),

pkg/apis/acid.zalan.do/v1/zz_generated.deepcopy.go

Lines changed: 21 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pkg/cluster/cluster.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -287,7 +287,7 @@ func (c *Cluster) Create() error {
287287
c.logger.Infof("pods are ready")
288288

289289
// create database objects unless we are running without pods or disabled that feature explicitly
290-
if !(c.databaseAccessDisabled() || c.getNumberOfInstances(&c.Spec) <= 0) {
290+
if !(c.databaseAccessDisabled() || c.getNumberOfInstances(&c.Spec) <= 0 || c.Spec.StandbyCluster != nil) {
291291
if err = c.createRoles(); err != nil {
292292
return fmt.Errorf("could not create users: %v", err)
293293
}
@@ -626,7 +626,7 @@ func (c *Cluster) Update(oldSpec, newSpec *acidv1.Postgresql) error {
626626
}()
627627

628628
// Roles and Databases
629-
if !(c.databaseAccessDisabled() || c.getNumberOfInstances(&c.Spec) <= 0) {
629+
if !(c.databaseAccessDisabled() || c.getNumberOfInstances(&c.Spec) <= 0 || c.Spec.StandbyCluster != nil) {
630630
c.logger.Debugf("syncing roles")
631631
if err := c.syncRoles(); err != nil {
632632
c.logger.Errorf("could not sync roles: %v", err)

pkg/cluster/k8sres.go

Lines changed: 35 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -500,7 +500,7 @@ func generatePodTemplate(
500500
}
501501

502502
// generatePodEnvVars generates environment variables for the Spilo Pod
503-
func (c *Cluster) generateSpiloPodEnvVars(uid types.UID, spiloConfiguration string, cloneDescription *acidv1.CloneDescription, customPodEnvVarsList []v1.EnvVar) []v1.EnvVar {
503+
func (c *Cluster) generateSpiloPodEnvVars(uid types.UID, spiloConfiguration string, cloneDescription *acidv1.CloneDescription, standbyDescription *acidv1.StandbyDescription, customPodEnvVarsList []v1.EnvVar) []v1.EnvVar {
504504
envVars := []v1.EnvVar{
505505
{
506506
Name: "SCOPE",
@@ -604,6 +604,10 @@ func (c *Cluster) generateSpiloPodEnvVars(uid types.UID, spiloConfiguration stri
604604
envVars = append(envVars, c.generateCloneEnvironment(cloneDescription)...)
605605
}
606606

607+
if c.Spec.StandbyCluster != nil {
608+
envVars = append(envVars, c.generateStandbyEnvironment(standbyDescription)...)
609+
}
610+
607611
if len(customPodEnvVarsList) > 0 {
608612
envVars = append(envVars, customPodEnvVarsList...)
609613
}
@@ -793,6 +797,9 @@ func (c *Cluster) generateStatefulSet(spec *acidv1.PostgresSpec) (*v1beta1.State
793797
sort.Slice(customPodEnvVarsList,
794798
func(i, j int) bool { return customPodEnvVarsList[i].Name < customPodEnvVarsList[j].Name })
795799
}
800+
if spec.StandbyCluster != nil && spec.StandbyCluster.S3WalPath == "" {
801+
return nil, fmt.Errorf("s3_wal_path is empty for standby cluster")
802+
}
796803

797804
spiloConfiguration, err := generateSpiloJSONConfiguration(&spec.PostgresqlParam, &spec.Patroni, c.OpConfig.PamRoleName, c.logger)
798805
if err != nil {
@@ -802,7 +809,7 @@ func (c *Cluster) generateStatefulSet(spec *acidv1.PostgresSpec) (*v1beta1.State
802809
// generate environment variables for the spilo container
803810
spiloEnvVars := deduplicateEnvVars(
804811
c.generateSpiloPodEnvVars(c.Postgresql.GetUID(), spiloConfiguration, &spec.Clone,
805-
customPodEnvVarsList), c.containerName(), c.logger)
812+
spec.StandbyCluster, customPodEnvVarsList), c.containerName(), c.logger)
806813

807814
// pickup the docker image for the spilo container
808815
effectiveDockerImage := util.Coalesce(spec.DockerImage, c.OpConfig.DockerImage)
@@ -982,6 +989,11 @@ func (c *Cluster) getNumberOfInstances(spec *acidv1.PostgresSpec) int32 {
982989
cur := spec.NumberOfInstances
983990
newcur := cur
984991

992+
/* Limit the max number of pods to one, if this is standby-cluster */
993+
if spec.StandbyCluster != nil {
994+
c.logger.Info("Standby cluster can have maximum of 1 pod")
995+
max = 1
996+
}
985997
if max >= 0 && newcur > max {
986998
newcur = max
987999
}
@@ -1328,6 +1340,27 @@ func (c *Cluster) generateCloneEnvironment(description *acidv1.CloneDescription)
13281340
return result
13291341
}
13301342

1343+
func (c *Cluster) generateStandbyEnvironment(description *acidv1.StandbyDescription) []v1.EnvVar {
1344+
result := make([]v1.EnvVar, 0)
1345+
1346+
if description.S3WalPath == "" {
1347+
return nil
1348+
}
1349+
// standby with S3, find out the bucket to setup standby
1350+
msg := "Standby from S3 bucket using custom parsed S3WalPath from the manifest %s "
1351+
c.logger.Infof(msg, description.S3WalPath)
1352+
1353+
result = append(result, v1.EnvVar{
1354+
Name: "STANDBY_WALE_S3_PREFIX",
1355+
Value: description.S3WalPath,
1356+
})
1357+
1358+
result = append(result, v1.EnvVar{Name: "STANDBY_METHOD", Value: "STANDBY_WITH_WALE"})
1359+
result = append(result, v1.EnvVar{Name: "STANDBY_WAL_BUCKET_SCOPE_PREFIX", Value: ""})
1360+
1361+
return result
1362+
}
1363+
13311364
func (c *Cluster) generatePodDisruptionBudget() *policybeta1.PodDisruptionBudget {
13321365
minAvailable := intstr.FromInt(1)
13331366
pdbEnabled := c.OpConfig.EnablePodDisruptionBudget

pkg/cluster/sync.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ func (c *Cluster) Sync(newSpec *acidv1.Postgresql) error {
9090
}
9191

9292
// create database objects unless we are running without pods or disabled that feature explicitly
93-
if !(c.databaseAccessDisabled() || c.getNumberOfInstances(&newSpec.Spec) <= 0) {
93+
if !(c.databaseAccessDisabled() || c.getNumberOfInstances(&newSpec.Spec) <= 0 || c.Spec.StandbyCluster != nil) {
9494
c.logger.Debugf("syncing roles")
9595
if err = c.syncRoles(); err != nil {
9696
err = fmt.Errorf("could not sync roles: %v", err)

0 commit comments

Comments
 (0)