Skip to content

Commit e9b56d9

Browse files
committed
WIP: adding unit-tests for reconciliation loop
Signed-off-by: Danny Kopping <danny@coder.com>
1 parent c88c04c commit e9b56d9

File tree

4 files changed

+428
-136
lines changed

4 files changed

+428
-136
lines changed

enterprise/coderd/prebuilds/controller.go

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,22 @@ func (c *Controller) ReconcileTemplate(templateID uuid.UUID) {
9393
c.nudgeCh <- &templateID
9494
}
9595

96+
// reconcile will attempt to resolve the desired vs actual state of all templates which have presets with prebuilds configured.
97+
//
98+
// NOTE:
99+
//
100+
// This function will kick of n provisioner jobs, based on the calculated state modifications.
101+
//
102+
// These provisioning jobs are fire-and-forget. We DO NOT wait for the prebuilt workspaces to complete their
103+
// provisioning. As a consequence, it's possible that another reconciliation run will occur, which will mean that
104+
// multiple preset versions could be reconciling at once. This may mean some temporary over-provisioning, but the
105+
// reconciliation loop will bring these resources back into their desired numbers in an EVENTUALLY-consistent way.
106+
//
107+
// For example: we could decide to provision 1 new instance in this reconciliation.
108+
// While that workspace is being provisioned, another template version is created which means this same preset will
109+
// be reconciled again, leading to another workspace being provisioned. Two workspace builds will be occurring
110+
// simultaneously for the same preset, but once both jobs have completed the reconciliation loop will notice the
111+
// extraneous instance and delete it.
96112
func (c *Controller) reconcile(ctx context.Context, templateID *uuid.UUID) {
97113
var logger slog.Logger
98114
if templateID == nil {
@@ -121,7 +137,7 @@ func (c *Controller) reconcile(ctx context.Context, templateID *uuid.UUID) {
121137
err := c.store.InTx(func(db database.Store) error {
122138
start := time.Now()
123139

124-
// TODO: give up after some time waiting on this?
140+
// TODO: use TryAcquireLock here and bail out early.
125141
err := db.AcquireLock(ctx, database.LockIDReconcileTemplatePrebuilds)
126142
if err != nil {
127143
logger.Warn(ctx, "failed to acquire top-level prebuilds reconciliation lock; likely running on another coderd replica", slog.Error(err))
@@ -183,7 +199,7 @@ func (c *Controller) reconcile(ctx context.Context, templateID *uuid.UUID) {
183199
}
184200

185201
// determineState determines the current state of prebuilds & the presets which define them.
186-
// This function MUST be called within
202+
// An application-level lock is used
187203
func (c *Controller) determineState(ctx context.Context, store database.Store, id uuid.NullUUID) (*reconciliationState, error) {
188204
if err := ctx.Err(); err != nil {
189205
return nil, err
@@ -259,14 +275,15 @@ func (c *Controller) reconcilePrebuildsForPreset(ctx context.Context, ps *preset
259275
levelFn = vlogger.Info
260276
}
261277
levelFn(ctx, "template prebuild state retrieved",
262-
slog.F("to_create", actions.create), slog.F("to_delete", len(actions.deleteIDs)),
278+
slog.F("create_count", actions.create), slog.F("delete_count", len(actions.deleteIDs)),
279+
slog.F("to_delete", actions.deleteIDs),
263280
slog.F("desired", actions.desired), slog.F("actual", actions.actual),
264281
slog.F("outdated", actions.outdated), slog.F("extraneous", actions.extraneous),
265282
slog.F("starting", actions.starting), slog.F("stopping", actions.stopping),
266283
slog.F("deleting", actions.deleting), slog.F("eligible", actions.eligible))
267284

268285
// Provision workspaces within the same tx so we don't get any timing issues here.
269-
// i.e. we hold the advisory lock until all reconciliatory actions have been taken.
286+
// i.e. we hold the advisory lock until all "reconciliatory" actions have been taken.
270287
// TODO: max per reconciliation iteration?
271288

272289
// TODO: i've removed the surrounding tx, but if we restore it then we need to pass down the store to these funcs.

enterprise/coderd/prebuilds/controller_test.go

Lines changed: 0 additions & 130 deletions
This file was deleted.

enterprise/coderd/prebuilds/reconciliation.go

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ func (s reconciliationState) filterByPreset(presetID uuid.UUID) (*presetState, e
4747
}
4848

4949
running := slice.Filter(s.runningPrebuilds, func(prebuild database.GetRunningPrebuildsRow) bool {
50-
if !prebuild.DesiredPresetID.Valid && !prebuild.CurrentPresetID.Valid {
50+
if !prebuild.CurrentPresetID.Valid {
5151
return false
5252
}
5353
return prebuild.CurrentPresetID.UUID == preset.PresetID &&
@@ -57,8 +57,10 @@ func (s reconciliationState) filterByPreset(presetID uuid.UUID) (*presetState, e
5757
// These aren't preset-specific, but they need to inhibit all presets of this template from operating since they could
5858
// be in-progress builds which might impact another preset. For example, if a template goes from no defined prebuilds to defined prebuilds
5959
// and back, or a template is updated from one version to another.
60+
// We group by the template so that all prebuilds being provisioned for a prebuild are inhibited if any prebuild for
61+
// any preset in that template are in progress, to prevent clobbering.
6062
inProgress := slice.Filter(s.prebuildsInProgress, func(prebuild database.GetPrebuildsInProgressRow) bool {
61-
return prebuild.TemplateVersionID == preset.TemplateVersionID
63+
return prebuild.TemplateID == preset.TemplateID
6264
})
6365

6466
return &presetState{
@@ -103,6 +105,8 @@ func (p presetState) calculateActions() (*reconciliationActions, error) {
103105
}
104106
}
105107

108+
// In-progress builds are common across all presets belonging to a given template.
109+
// In other words: these values will be identical across all presets belonging to this template.
106110
for _, progress := range p.inProgress {
107111
switch progress.Transition {
108112
case database.WorkspaceTransitionStart:
@@ -138,6 +142,7 @@ func (p presetState) calculateActions() (*reconciliationActions, error) {
138142
)
139143

140144
// Bail early to avoid scheduling new prebuilds while operations are in progress.
145+
// TODO: optimization: we should probably be able to create prebuilds while others are deleting for a given preset.
141146
if (toCreate+toDelete) > 0 && (starting+stopping+deleting) > 0 {
142147
// TODO: move up
143148
//c.logger.Warn(ctx, "prebuild operations in progress, skipping reconciliation",

0 commit comments

Comments
 (0)