Skip to content

Commit 05b1b0f

Browse files
committed
Support keepers that cannot become master/sync
Keeper new flags: --can-be-master, prevent keeper from being elected as master --can-be-synchronous-replica, prevent keeper from being chosen as synchronous replica Updates sentinel to support keepers with new flags: - findBestNewMasters: ignoring keepers that cannot become master - updateKeeperStatus: update `KeeperStatus` to have `NeverMaster` and `NeverSynchronousReplica` properties - updateCluster: ignore standbys that cannot be synchronous standbys
1 parent 5374939 commit 05b1b0f

File tree

6 files changed

+452
-4
lines changed

6 files changed

+452
-4
lines changed

cmd/keeper/cmd/keeper.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,9 @@ type config struct {
112112
pgSUUsername string
113113
pgSUPassword string
114114
pgSUPasswordFile string
115+
116+
canBeMaster bool
117+
canBeSynchronousReplica bool
115118
}
116119

117120
var cfg config
@@ -137,6 +140,9 @@ func init() {
137140
CmdKeeper.PersistentFlags().StringVar(&cfg.pgSUPasswordFile, "pg-su-passwordfile", "", "postgres superuser password file. Only one of --pg-su-password or --pg-su-passwordfile must be provided. Must be the same for all keepers)")
138141
CmdKeeper.PersistentFlags().BoolVar(&cfg.debug, "debug", false, "enable debug logging")
139142

143+
CmdKeeper.PersistentFlags().BoolVar(&cfg.canBeMaster, "can-be-master", true, "prevent keeper from being elected as master")
144+
CmdKeeper.PersistentFlags().BoolVar(&cfg.canBeSynchronousReplica, "can-be-synchronous-replica", true, "prevent keeper from being chosen as synchronous replica")
145+
140146
if err := CmdKeeper.PersistentFlags().MarkDeprecated("id", "please use --uid"); err != nil {
141147
log.Fatal(err)
142148
}
@@ -461,6 +467,9 @@ type PostgresKeeper struct {
461467
lastPGState *cluster.PostgresState
462468

463469
waitSyncStandbysSynced bool
470+
471+
canBeMaster *bool
472+
canBeSynchronousReplica *bool
464473
}
465474

466475
func NewPostgresKeeper(cfg *config, end chan error) (*PostgresKeeper, error) {
@@ -500,6 +509,9 @@ func NewPostgresKeeper(cfg *config, end chan error) (*PostgresKeeper, error) {
500509
keeperLocalState: &KeeperLocalState{},
501510
dbLocalState: &DBLocalState{},
502511

512+
canBeMaster: &cfg.canBeMaster,
513+
canBeSynchronousReplica: &cfg.canBeSynchronousReplica,
514+
503515
e: e,
504516
end: end,
505517
}
@@ -567,6 +579,9 @@ func (p *PostgresKeeper) updateKeeperInfo() error {
567579
Min: min,
568580
},
569581
PostgresState: p.getLastPGState(),
582+
583+
CanBeMaster: p.canBeMaster,
584+
CanBeSynchronousReplica: p.canBeSynchronousReplica,
570585
}
571586

572587
// The time to live is just to automatically remove old entries, it's

cmd/sentinel/cmd/sentinel.go

Lines changed: 40 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,16 @@ func (s *Sentinel) updateKeepersStatus(cd *cluster.ClusterData, keepersInfo clus
225225
}
226226
}
227227

228+
// Keepers support several command line arguments that should be populated in the
229+
// KeeperStatus by the sentinel. This allows us to make decisions about how to arrange
230+
// the cluster that take into consideration the configuration of each keeper.
231+
for keeperUID, k := range cd.Keepers {
232+
if ki, ok := keepersInfo[keeperUID]; ok {
233+
k.Status.CanBeMaster = ki.CanBeMaster
234+
k.Status.CanBeSynchronousReplica = ki.CanBeSynchronousReplica
235+
}
236+
}
237+
228238
// Mark keepers without a keeperInfo (cleaned up above from not updated
229239
// ones) as in error
230240
for keeperUID, k := range cd.Keepers {
@@ -720,20 +730,35 @@ func (s *Sentinel) findBestStandbys(cd *cluster.ClusterData, masterDB *cluster.D
720730
return bestDBs
721731
}
722732

733+
// findBestNewMasters identifies the DBs that are elegible to become a new master. We do
734+
// this by selecting from valid standbys (those keepers that follow the same timeline as
735+
// our master, and have an acceptable replication lag) and also selecting from those nodes
736+
// that are valid to become master by their status.
723737
func (s *Sentinel) findBestNewMasters(cd *cluster.ClusterData, masterDB *cluster.DB) []*cluster.DB {
724-
bestNewMasters := s.findBestStandbys(cd, masterDB)
738+
bestNewMasters := []*cluster.DB{}
739+
for _, db := range s.findBestStandbys(cd, masterDB) {
740+
if k, ok := cd.Keepers[db.Spec.KeeperUID]; ok && (k.Status.CanBeMaster != nil && !*k.Status.CanBeMaster) {
741+
log.Infow("ignoring keeper since it cannot be master (--can-be-master=false)", "db", db.UID, "keeper", db.Spec.KeeperUID)
742+
continue
743+
}
744+
745+
bestNewMasters = append(bestNewMasters, db)
746+
}
747+
725748
// Add the previous masters to the best standbys (if valid and in good state)
726-
goodMasters, _, _ := s.validMastersByStatus(cd)
727-
log.Debugf("goodMasters: %s", spew.Sdump(goodMasters))
728-
for _, db := range goodMasters {
749+
validMastersByStatus, _, _ := s.validMastersByStatus(cd)
750+
log.Debugf("validMastersByStatus: %s", spew.Sdump(validMastersByStatus))
751+
for _, db := range validMastersByStatus {
729752
if db.UID == masterDB.UID {
730753
log.Debugw("ignoring db since it's the current master", "db", db.UID, "keeper", db.Spec.KeeperUID)
731754
continue
732755
}
756+
733757
if db.Status.TimelineID != masterDB.Status.TimelineID {
734758
log.Debugw("ignoring keeper since its pg timeline is different than master timeline", "db", db.UID, "dbTimeline", db.Status.TimelineID, "masterTimeline", masterDB.Status.TimelineID)
735759
continue
736760
}
761+
737762
// do this only when not using synchronous replication since in sync repl we
738763
// have to ignore the last reported xlogpos or valid sync standby will be
739764
// skipped
@@ -743,8 +768,10 @@ func (s *Sentinel) findBestNewMasters(cd *cluster.ClusterData, masterDB *cluster
743768
continue
744769
}
745770
}
771+
746772
bestNewMasters = append(bestNewMasters, db)
747773
}
774+
748775
// Sort by XLogPos
749776
sort.Sort(dbSlice(bestNewMasters))
750777
log.Debugf("bestNewMasters: %s", spew.Sdump(bestNewMasters))
@@ -1302,6 +1329,15 @@ func (s *Sentinel) updateCluster(cd *cluster.ClusterData, pis cluster.ProxiesInf
13021329
if _, ok := synchronousStandbys[bestStandby.UID]; ok {
13031330
continue
13041331
}
1332+
1333+
// ignore standbys that cannot be synchronous standbys
1334+
if db, ok := newcd.DBs[bestStandby.UID]; ok {
1335+
if keeper, ok := newcd.Keepers[db.Spec.KeeperUID]; ok && (keeper.Status.CanBeSynchronousReplica != nil && !*keeper.Status.CanBeSynchronousReplica) {
1336+
log.Infow("cannot choose standby as synchronous (--can-be-synchronous-replica=false)", "db", db.UID, "keeper", keeper.UID)
1337+
continue
1338+
}
1339+
}
1340+
13051341
log.Infow("adding new synchronous standby in good state trying to reach MaxSynchronousStandbys", "masterDB", masterDB.UID, "synchronousStandbyDB", bestStandby.UID, "keeper", bestStandby.Spec.KeeperUID)
13061342
synchronousStandbys[bestStandby.UID] = struct{}{}
13071343
addedCount++

0 commit comments

Comments
 (0)