@@ -93,6 +93,22 @@ func (c *Controller) ReconcileTemplate(templateID uuid.UUID) {
93
93
c .nudgeCh <- & templateID
94
94
}
95
95
96
+ // reconcile will attempt to resolve the desired vs actual state of all templates which have presets with prebuilds configured.
97
+ //
98
+ // NOTE:
99
+ //
100
+ // This function will kick of n provisioner jobs, based on the calculated state modifications.
101
+ //
102
+ // These provisioning jobs are fire-and-forget. We DO NOT wait for the prebuilt workspaces to complete their
103
+ // provisioning. As a consequence, it's possible that another reconciliation run will occur, which will mean that
104
+ // multiple preset versions could be reconciling at once. This may mean some temporary over-provisioning, but the
105
+ // reconciliation loop will bring these resources back into their desired numbers in an EVENTUALLY-consistent way.
106
+ //
107
+ // For example: we could decide to provision 1 new instance in this reconciliation.
108
+ // While that workspace is being provisioned, another template version is created which means this same preset will
109
+ // be reconciled again, leading to another workspace being provisioned. Two workspace builds will be occurring
110
+ // simultaneously for the same preset, but once both jobs have completed the reconciliation loop will notice the
111
+ // extraneous instance and delete it.
96
112
func (c * Controller ) reconcile (ctx context.Context , templateID * uuid.UUID ) {
97
113
var logger slog.Logger
98
114
if templateID == nil {
@@ -121,7 +137,7 @@ func (c *Controller) reconcile(ctx context.Context, templateID *uuid.UUID) {
121
137
err := c .store .InTx (func (db database.Store ) error {
122
138
start := time .Now ()
123
139
124
- // TODO: give up after some time waiting on this?
140
+ // TODO: use TryAcquireLock here and bail out early.
125
141
err := db .AcquireLock (ctx , database .LockIDReconcileTemplatePrebuilds )
126
142
if err != nil {
127
143
logger .Warn (ctx , "failed to acquire top-level prebuilds reconciliation lock; likely running on another coderd replica" , slog .Error (err ))
@@ -183,7 +199,7 @@ func (c *Controller) reconcile(ctx context.Context, templateID *uuid.UUID) {
183
199
}
184
200
185
201
// determineState determines the current state of prebuilds & the presets which define them.
186
- // This function MUST be called within
202
+ // An application-level lock is used
187
203
func (c * Controller ) determineState (ctx context.Context , store database.Store , id uuid.NullUUID ) (* reconciliationState , error ) {
188
204
if err := ctx .Err (); err != nil {
189
205
return nil , err
@@ -259,14 +275,15 @@ func (c *Controller) reconcilePrebuildsForPreset(ctx context.Context, ps *preset
259
275
levelFn = vlogger .Info
260
276
}
261
277
levelFn (ctx , "template prebuild state retrieved" ,
262
- slog .F ("to_create" , actions .create ), slog .F ("to_delete" , len (actions .deleteIDs )),
278
+ slog .F ("create_count" , actions .create ), slog .F ("delete_count" , len (actions .deleteIDs )),
279
+ slog .F ("to_delete" , actions .deleteIDs ),
263
280
slog .F ("desired" , actions .desired ), slog .F ("actual" , actions .actual ),
264
281
slog .F ("outdated" , actions .outdated ), slog .F ("extraneous" , actions .extraneous ),
265
282
slog .F ("starting" , actions .starting ), slog .F ("stopping" , actions .stopping ),
266
283
slog .F ("deleting" , actions .deleting ), slog .F ("eligible" , actions .eligible ))
267
284
268
285
// Provision workspaces within the same tx so we don't get any timing issues here.
269
- // i.e. we hold the advisory lock until all reconciliatory actions have been taken.
286
+ // i.e. we hold the advisory lock until all " reconciliatory" actions have been taken.
270
287
// TODO: max per reconciliation iteration?
271
288
272
289
// TODO: i've removed the surrounding tx, but if we restore it then we need to pass down the store to these funcs.
0 commit comments