Skip to content

Commit 6918394

Browse files
authored
Add PDB configuration toggle (zalando#583)
* Don't create an impossible disruption budget for smaller clusters. * sync PDB also on update
1 parent 3553144 commit 6918394

File tree

13 files changed

+205
-40
lines changed

13 files changed

+205
-40
lines changed

charts/postgres-operator/values.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ configKubernetesCRD:
114114
cluster_name_label: cluster-name
115115
enable_pod_antiaffinity: false
116116
pod_antiaffinity_topology_key: "kubernetes.io/hostname"
117+
enable_pod_disruption_budget: true
117118
secret_name_template: "{username}.{cluster}.credentials.{tprkind}.{tprgroup}"
118119
# inherited_labels:
119120
# - application
@@ -161,7 +162,7 @@ serviceAccount:
161162
# The name of the ServiceAccount to use.
162163
# If not set and create is true, a name is generated using the fullname template
163164
# When relying solely on the OperatorConfiguration CRD, set this value to "operator"
164-
# Otherwise, the operator tries to use the "default" service account which is forbidden
165+
# Otherwise, the operator tries to use the "default" service account which is forbidden
165166
name: ""
166167

167168
priorityClassName: ""

docs/administrator.md

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,22 @@ data:
154154
pod_antiaffinity_topology_key: "failure-domain.beta.kubernetes.io/zone"
155155
```
156156

157+
### Pod Disruption Budget
158+
159+
By default the operator uses a PodDisruptionBudget (PDB) to protect the cluster
160+
from voluntarily disruptions and hence unwanted DB downtime. The `MinAvailable`
161+
parameter of the PDB is set to `1` which prevents killing masters in single-node
162+
clusters and/or the last remaining running instance in a multi-node cluster.
163+
164+
The PDB is only relaxed in two scenarios:
165+
* If a cluster is scaled down to `0` instances (e.g. for draining nodes)
166+
* If the PDB is disabled in the configuration (`enable_pod_disruption_budget`)
167+
168+
The PDB is still in place having `MinAvailable` set to `0`. If enabled it will
169+
be automatically set to `1` on scale up. Disabling PDBs helps avoiding blocking
170+
Kubernetes upgrades in managed K8s environments at the cost of prolonged DB
171+
downtime. See PR #384 for the use case.
172+
157173
### Add cluster-specific labels
158174

159175
In some cases, you might want to add `labels` that are specific to a given

docs/reference/operator_parameters.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,13 @@ configuration they are grouped under the `kubernetes` key.
161161
replaced by the cluster name. Only the `{cluster}` placeholders is allowed in
162162
the template.
163163

164+
* **enable_pod_disruption_budget**
165+
PDB is enabled by default to protect the cluster from voluntarily disruptions
166+
and hence unwanted DB downtime. However, on some cloud providers it could be
167+
necessary to temporarily disabled it, e.g. for node updates. See
168+
[admin docs](../administrator.md#pod-disruption-budget) for more information.
169+
Default is true.
170+
164171
* **secret_name_template**
165172
a template for the name of the database user secrets generated by the
166173
operator. `{username}` is replaced with name of the secret, `{cluster}` with

manifests/postgresql-operator-default-configuration.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ configuration:
2020
pod_service_account_name: operator
2121
pod_terminate_grace_period: 5m
2222
pdb_name_format: "postgres-{cluster}-pdb"
23+
enable_pod_disruption_budget: true
2324
secret_name_template: "{username}.{cluster}.credentials.{tprkind}.{tprgroup}"
2425
cluster_domain: cluster.local
2526
oauth_token_secret_name: postgresql-operator

pkg/apis/acid.zalan.do/v1/operator_configuration_type.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ type KubernetesMetaConfiguration struct {
4949
SpiloFSGroup *int64 `json:"spilo_fsgroup,omitempty"`
5050
WatchedNamespace string `json:"watched_namespace,omitempty"`
5151
PDBNameFormat config.StringTemplate `json:"pdb_name_format,omitempty"`
52+
EnablePodDisruptionBudget *bool `json:"enable_pod_disruption_budget,omitempty"`
5253
SecretNameTemplate config.StringTemplate `json:"secret_name_template,omitempty"`
5354
ClusterDomain string `json:"cluster_domain"`
5455
OAuthTokenSecretName spec.NamespacedName `json:"oauth_token_secret_name,omitempty"`

pkg/apis/acid.zalan.do/v1/zz_generated.deepcopy.go

Lines changed: 5 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pkg/cluster/cluster.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -579,6 +579,15 @@ func (c *Cluster) Update(oldSpec, newSpec *acidv1.Postgresql) error {
579579
}
580580
}()
581581

582+
// pod disruption budget
583+
if oldSpec.Spec.NumberOfInstances != newSpec.Spec.NumberOfInstances {
584+
c.logger.Debug("syncing pod disruption budgets")
585+
if err := c.syncPodDisruptionBudget(true); err != nil {
586+
c.logger.Errorf("could not sync pod disruption budget: %v", err)
587+
updateFailed = true
588+
}
589+
}
590+
582591
// logical backup job
583592
func() {
584593

pkg/cluster/k8sres.go

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1272,26 +1272,26 @@ func (c *Cluster) generateCloneEnvironment(description *acidv1.CloneDescription)
12721272
result = append(result, v1.EnvVar{Name: "CLONE_WAL_BUCKET_SCOPE_PREFIX", Value: ""})
12731273

12741274
if description.S3Endpoint != "" {
1275-
result = append(result, v1.EnvVar{Name: "CLONE_AWS_ENDPOINT", Value: description.S3Endpoint})
1276-
result = append(result, v1.EnvVar{Name: "CLONE_WALE_S3_ENDPOINT", Value: description.S3Endpoint})
1275+
result = append(result, v1.EnvVar{Name: "CLONE_AWS_ENDPOINT", Value: description.S3Endpoint})
1276+
result = append(result, v1.EnvVar{Name: "CLONE_WALE_S3_ENDPOINT", Value: description.S3Endpoint})
12771277
}
12781278

12791279
if description.S3AccessKeyId != "" {
1280-
result = append(result, v1.EnvVar{Name: "CLONE_AWS_ACCESS_KEY_ID", Value: description.S3AccessKeyId})
1280+
result = append(result, v1.EnvVar{Name: "CLONE_AWS_ACCESS_KEY_ID", Value: description.S3AccessKeyId})
12811281
}
12821282

12831283
if description.S3SecretAccessKey != "" {
1284-
result = append(result, v1.EnvVar{Name: "CLONE_AWS_SECRET_ACCESS_KEY", Value: description.S3SecretAccessKey})
1284+
result = append(result, v1.EnvVar{Name: "CLONE_AWS_SECRET_ACCESS_KEY", Value: description.S3SecretAccessKey})
12851285
}
12861286

12871287
if description.S3ForcePathStyle != nil {
1288-
s3ForcePathStyle := "0"
1288+
s3ForcePathStyle := "0"
12891289

1290-
if *description.S3ForcePathStyle {
1291-
s3ForcePathStyle = "1"
1292-
}
1290+
if *description.S3ForcePathStyle {
1291+
s3ForcePathStyle = "1"
1292+
}
12931293

1294-
result = append(result, v1.EnvVar{Name: "CLONE_AWS_S3_FORCE_PATH_STYLE", Value: s3ForcePathStyle})
1294+
result = append(result, v1.EnvVar{Name: "CLONE_AWS_S3_FORCE_PATH_STYLE", Value: s3ForcePathStyle})
12951295
}
12961296
}
12971297

@@ -1300,6 +1300,12 @@ func (c *Cluster) generateCloneEnvironment(description *acidv1.CloneDescription)
13001300

13011301
func (c *Cluster) generatePodDisruptionBudget() *policybeta1.PodDisruptionBudget {
13021302
minAvailable := intstr.FromInt(1)
1303+
pdbEnabled := c.OpConfig.EnablePodDisruptionBudget
1304+
1305+
// if PodDisruptionBudget is disabled or if there are no DB pods, set the budget to 0.
1306+
if (pdbEnabled != nil && !*pdbEnabled) || c.Spec.NumberOfInstances <= 0 {
1307+
minAvailable = intstr.FromInt(0)
1308+
}
13031309

13041310
return &policybeta1.PodDisruptionBudget{
13051311
ObjectMeta: metav1.ObjectMeta{

pkg/cluster/k8sres_test.go

Lines changed: 118 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
package cluster
22

33
import (
4+
"reflect"
5+
46
"k8s.io/api/core/v1"
57

68
"testing"
@@ -9,6 +11,10 @@ import (
911
"github.com/zalando/postgres-operator/pkg/util/config"
1012
"github.com/zalando/postgres-operator/pkg/util/constants"
1113
"github.com/zalando/postgres-operator/pkg/util/k8sutil"
14+
15+
policyv1beta1 "k8s.io/api/policy/v1beta1"
16+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
17+
"k8s.io/apimachinery/pkg/util/intstr"
1218
)
1319

1420
func True() *bool {
@@ -21,6 +27,11 @@ func False() *bool {
2127
return &b
2228
}
2329

30+
func toIntStr(val int) *intstr.IntOrString {
31+
b := intstr.FromInt(val)
32+
return &b
33+
}
34+
2435
func TestGenerateSpiloJSONConfiguration(t *testing.T) {
2536
var cluster = New(
2637
Config{
@@ -143,6 +154,113 @@ func TestCreateLoadBalancerLogic(t *testing.T) {
143154
}
144155
}
145156

157+
func TestGeneratePodDisruptionBudget(t *testing.T) {
158+
tests := []struct {
159+
c *Cluster
160+
out policyv1beta1.PodDisruptionBudget
161+
}{
162+
// With multiple instances.
163+
{
164+
New(
165+
Config{OpConfig: config.Config{Resources: config.Resources{ClusterNameLabel: "cluster-name", PodRoleLabel: "spilo-role"}, PDBNameFormat: "postgres-{cluster}-pdb"}},
166+
k8sutil.KubernetesClient{},
167+
acidv1.Postgresql{
168+
ObjectMeta: metav1.ObjectMeta{Name: "myapp-database", Namespace: "myapp"},
169+
Spec: acidv1.PostgresSpec{TeamID: "myapp", NumberOfInstances: 3}},
170+
logger),
171+
policyv1beta1.PodDisruptionBudget{
172+
ObjectMeta: metav1.ObjectMeta{
173+
Name: "postgres-myapp-database-pdb",
174+
Namespace: "myapp",
175+
Labels: map[string]string{"team": "myapp", "cluster-name": "myapp-database"},
176+
},
177+
Spec: policyv1beta1.PodDisruptionBudgetSpec{
178+
MinAvailable: toIntStr(1),
179+
Selector: &metav1.LabelSelector{
180+
MatchLabels: map[string]string{"spilo-role": "master", "cluster-name": "myapp-database"},
181+
},
182+
},
183+
},
184+
},
185+
// With zero instances.
186+
{
187+
New(
188+
Config{OpConfig: config.Config{Resources: config.Resources{ClusterNameLabel: "cluster-name", PodRoleLabel: "spilo-role"}, PDBNameFormat: "postgres-{cluster}-pdb"}},
189+
k8sutil.KubernetesClient{},
190+
acidv1.Postgresql{
191+
ObjectMeta: metav1.ObjectMeta{Name: "myapp-database", Namespace: "myapp"},
192+
Spec: acidv1.PostgresSpec{TeamID: "myapp", NumberOfInstances: 0}},
193+
logger),
194+
policyv1beta1.PodDisruptionBudget{
195+
ObjectMeta: metav1.ObjectMeta{
196+
Name: "postgres-myapp-database-pdb",
197+
Namespace: "myapp",
198+
Labels: map[string]string{"team": "myapp", "cluster-name": "myapp-database"},
199+
},
200+
Spec: policyv1beta1.PodDisruptionBudgetSpec{
201+
MinAvailable: toIntStr(0),
202+
Selector: &metav1.LabelSelector{
203+
MatchLabels: map[string]string{"spilo-role": "master", "cluster-name": "myapp-database"},
204+
},
205+
},
206+
},
207+
},
208+
// With PodDisruptionBudget disabled.
209+
{
210+
New(
211+
Config{OpConfig: config.Config{Resources: config.Resources{ClusterNameLabel: "cluster-name", PodRoleLabel: "spilo-role"}, PDBNameFormat: "postgres-{cluster}-pdb", EnablePodDisruptionBudget: False()}},
212+
k8sutil.KubernetesClient{},
213+
acidv1.Postgresql{
214+
ObjectMeta: metav1.ObjectMeta{Name: "myapp-database", Namespace: "myapp"},
215+
Spec: acidv1.PostgresSpec{TeamID: "myapp", NumberOfInstances: 3}},
216+
logger),
217+
policyv1beta1.PodDisruptionBudget{
218+
ObjectMeta: metav1.ObjectMeta{
219+
Name: "postgres-myapp-database-pdb",
220+
Namespace: "myapp",
221+
Labels: map[string]string{"team": "myapp", "cluster-name": "myapp-database"},
222+
},
223+
Spec: policyv1beta1.PodDisruptionBudgetSpec{
224+
MinAvailable: toIntStr(0),
225+
Selector: &metav1.LabelSelector{
226+
MatchLabels: map[string]string{"spilo-role": "master", "cluster-name": "myapp-database"},
227+
},
228+
},
229+
},
230+
},
231+
// With non-default PDBNameFormat and PodDisruptionBudget explicitly enabled.
232+
{
233+
New(
234+
Config{OpConfig: config.Config{Resources: config.Resources{ClusterNameLabel: "cluster-name", PodRoleLabel: "spilo-role"}, PDBNameFormat: "postgres-{cluster}-databass-budget", EnablePodDisruptionBudget: True()}},
235+
k8sutil.KubernetesClient{},
236+
acidv1.Postgresql{
237+
ObjectMeta: metav1.ObjectMeta{Name: "myapp-database", Namespace: "myapp"},
238+
Spec: acidv1.PostgresSpec{TeamID: "myapp", NumberOfInstances: 3}},
239+
logger),
240+
policyv1beta1.PodDisruptionBudget{
241+
ObjectMeta: metav1.ObjectMeta{
242+
Name: "postgres-myapp-database-databass-budget",
243+
Namespace: "myapp",
244+
Labels: map[string]string{"team": "myapp", "cluster-name": "myapp-database"},
245+
},
246+
Spec: policyv1beta1.PodDisruptionBudgetSpec{
247+
MinAvailable: toIntStr(1),
248+
Selector: &metav1.LabelSelector{
249+
MatchLabels: map[string]string{"spilo-role": "master", "cluster-name": "myapp-database"},
250+
},
251+
},
252+
},
253+
},
254+
}
255+
256+
for _, tt := range tests {
257+
result := tt.c.generatePodDisruptionBudget()
258+
if !reflect.DeepEqual(*result, tt.out) {
259+
t.Errorf("Expected PodDisruptionBudget: %#v, got %#v", tt.out, *result)
260+
}
261+
}
262+
}
263+
146264
func TestShmVolume(t *testing.T) {
147265
testName := "TestShmVolume"
148266
tests := []struct {
@@ -269,6 +387,5 @@ func TestCloneEnv(t *testing.T) {
269387
t.Errorf("%s %s: Expected env value %s, have %s instead",
270388
testName, tt.subTest, tt.env.Value, env.Value)
271389
}
272-
273390
}
274391
}

pkg/cluster/sync.go

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -73,20 +73,6 @@ func (c *Cluster) Sync(newSpec *acidv1.Postgresql) error {
7373
}
7474
}
7575

76-
// create database objects unless we are running without pods or disabled that feature explicitly
77-
if !(c.databaseAccessDisabled() || c.getNumberOfInstances(&newSpec.Spec) <= 0) {
78-
c.logger.Debugf("syncing roles")
79-
if err = c.syncRoles(); err != nil {
80-
err = fmt.Errorf("could not sync roles: %v", err)
81-
return err
82-
}
83-
c.logger.Debugf("syncing databases")
84-
if err = c.syncDatabases(); err != nil {
85-
err = fmt.Errorf("could not sync databases: %v", err)
86-
return err
87-
}
88-
}
89-
9076
c.logger.Debug("syncing pod disruption budgets")
9177
if err = c.syncPodDisruptionBudget(false); err != nil {
9278
err = fmt.Errorf("could not sync pod disruption budget: %v", err)
@@ -103,6 +89,20 @@ func (c *Cluster) Sync(newSpec *acidv1.Postgresql) error {
10389
}
10490
}
10591

92+
// create database objects unless we are running without pods or disabled that feature explicitly
93+
if !(c.databaseAccessDisabled() || c.getNumberOfInstances(&newSpec.Spec) <= 0) {
94+
c.logger.Debugf("syncing roles")
95+
if err = c.syncRoles(); err != nil {
96+
err = fmt.Errorf("could not sync roles: %v", err)
97+
return err
98+
}
99+
c.logger.Debugf("syncing databases")
100+
if err = c.syncDatabases(); err != nil {
101+
err = fmt.Errorf("could not sync databases: %v", err)
102+
return err
103+
}
104+
}
105+
106106
return err
107107
}
108108

pkg/controller/operator_config.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ func (c *Controller) importConfigurationFromCRD(fromCRD *acidv1.OperatorConfigur
4646
result.ClusterDomain = fromCRD.Kubernetes.ClusterDomain
4747
result.WatchedNamespace = fromCRD.Kubernetes.WatchedNamespace
4848
result.PDBNameFormat = fromCRD.Kubernetes.PDBNameFormat
49+
result.EnablePodDisruptionBudget = fromCRD.Kubernetes.EnablePodDisruptionBudget
4950
result.SecretNameTemplate = fromCRD.Kubernetes.SecretNameTemplate
5051
result.OAuthTokenSecretName = fromCRD.Kubernetes.OAuthTokenSecretName
5152
result.InfrastructureRolesSecretName = fromCRD.Kubernetes.InfrastructureRolesSecretName

pkg/util/config/config.go

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -110,20 +110,21 @@ type Config struct {
110110
EnablePodAntiAffinity bool `name:"enable_pod_antiaffinity" default:"false"`
111111
PodAntiAffinityTopologyKey string `name:"pod_antiaffinity_topology_key" default:"kubernetes.io/hostname"`
112112
// deprecated and kept for backward compatibility
113-
EnableLoadBalancer *bool `name:"enable_load_balancer"`
114-
MasterDNSNameFormat StringTemplate `name:"master_dns_name_format" default:"{cluster}.{team}.{hostedzone}"`
115-
ReplicaDNSNameFormat StringTemplate `name:"replica_dns_name_format" default:"{cluster}-repl.{team}.{hostedzone}"`
116-
PDBNameFormat StringTemplate `name:"pdb_name_format" default:"postgres-{cluster}-pdb"`
117-
Workers uint32 `name:"workers" default:"4"`
118-
APIPort int `name:"api_port" default:"8080"`
119-
RingLogLines int `name:"ring_log_lines" default:"100"`
120-
ClusterHistoryEntries int `name:"cluster_history_entries" default:"1000"`
121-
TeamAPIRoleConfiguration map[string]string `name:"team_api_role_configuration" default:"log_statement:all"`
122-
PodTerminateGracePeriod time.Duration `name:"pod_terminate_grace_period" default:"5m"`
123-
PodManagementPolicy string `name:"pod_management_policy" default:"ordered_ready"`
124-
ProtectedRoles []string `name:"protected_role_names" default:"admin"`
125-
PostgresSuperuserTeams []string `name:"postgres_superuser_teams" default:""`
126-
SetMemoryRequestToLimit bool `name:"set_memory_request_to_limit" defaults:"false"`
113+
EnableLoadBalancer *bool `name:"enable_load_balancer"`
114+
MasterDNSNameFormat StringTemplate `name:"master_dns_name_format" default:"{cluster}.{team}.{hostedzone}"`
115+
ReplicaDNSNameFormat StringTemplate `name:"replica_dns_name_format" default:"{cluster}-repl.{team}.{hostedzone}"`
116+
PDBNameFormat StringTemplate `name:"pdb_name_format" default:"postgres-{cluster}-pdb"`
117+
EnablePodDisruptionBudget *bool `name:"enable_pod_disruption_budget" default:"true"`
118+
Workers uint32 `name:"workers" default:"4"`
119+
APIPort int `name:"api_port" default:"8080"`
120+
RingLogLines int `name:"ring_log_lines" default:"100"`
121+
ClusterHistoryEntries int `name:"cluster_history_entries" default:"1000"`
122+
TeamAPIRoleConfiguration map[string]string `name:"team_api_role_configuration" default:"log_statement:all"`
123+
PodTerminateGracePeriod time.Duration `name:"pod_terminate_grace_period" default:"5m"`
124+
PodManagementPolicy string `name:"pod_management_policy" default:"ordered_ready"`
125+
ProtectedRoles []string `name:"protected_role_names" default:"admin"`
126+
PostgresSuperuserTeams []string `name:"postgres_superuser_teams" default:""`
127+
SetMemoryRequestToLimit bool `name:"set_memory_request_to_limit" default:"false"`
127128
}
128129

129130
// MustMarshal marshals the config or panics

pkg/util/k8sutil/k8sutil.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ func SamePDB(cur, new *policybeta1.PodDisruptionBudget) (match bool, reason stri
158158
//TODO: improve comparison
159159
match = reflect.DeepEqual(new.Spec, cur.Spec)
160160
if !match {
161-
reason = "new service spec doesn't match the current one"
161+
reason = "new PDB spec doesn't match the current one"
162162
}
163163

164164
return

0 commit comments

Comments
 (0)