Skip to content

feat: implement expiration policy logic for prebuilds #17996

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
May 26, 2025
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
15 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
chore: update Actual state count to include both expired and non-expi…
…red prebuilds
  • Loading branch information
ssncferreira committed May 26, 2025
commit 04c0e7cf18d75a743de3db9ecf8101e0918d1edc
19 changes: 11 additions & 8 deletions coderd/prebuilds/preset_snapshot.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ type PresetSnapshot struct {
// For example, it calculates how many prebuilds are expired, eligible,
// how many are extraneous, and how many are in various transition states.
type ReconciliationState struct {
Actual int32 // Number of currently valid running prebuilds, i.e., non-expired prebuilds
Actual int32 // Number of currently running prebuilds, i.e., non-expired, expired and extraneous prebuilds
Expired int32 // Number of currently running prebuilds that exceeded their allowed time-to-live (TTL)
Desired int32 // Number of prebuilds desired as defined in the preset
Eligible int32 // Number of prebuilds that are ready to be claimed
Expand Down Expand Up @@ -84,7 +84,7 @@ func (ra *ReconciliationActions) IsNoop() bool {
}

// CalculateState computes the current state of prebuilds for a preset, including:
// - Actual: Number of currently valid running prebuilds, i.e., non-expired prebuilds
// - Actual: Number of currently running prebuilds, i.e., non-expired and expired prebuilds
// - Expired: Number of currently running expired prebuilds
// - Desired: Number of prebuilds desired as defined in the preset
// - Eligible: Number of prebuilds that are ready to be claimed
Expand All @@ -104,16 +104,16 @@ func (p PresetSnapshot) CalculateState() *ReconciliationState {
extraneous int32
)

// #nosec G115 - Safe conversion as p.Running slice length is expected to be within int32 range
actual = int32(len(p.Running))
// #nosec G115 - Safe conversion as p.Running and p.Expired slice length is expected to be within int32 range
actual = int32(len(p.Running) + len(p.Expired))

// #nosec G115 - Safe conversion as p.Expired slice length is expected to be within int32 range
expired = int32(len(p.Expired))

if p.isActive() {
desired = p.Preset.DesiredInstances.Int32
eligible = p.countEligible()
extraneous = max(actual-desired, 0)
extraneous = max(actual-expired-desired, 0)
}

starting, stopping, deleting := p.countInProgress()
Expand All @@ -137,7 +137,7 @@ func (p PresetSnapshot) CalculateState() *ReconciliationState {
// 2. If the preset is inactive (template version is not active), it will delete all running prebuilds
// 3. For active presets, it calculates the number of prebuilds to create or delete based on:
// - The desired number of instances
// - Currently running non-expired prebuilds
// - Currently running prebuilds
// - Currently running expired prebuilds
// - Prebuilds in transition states (starting/stopping/deleting)
// - Any extraneous prebuilds that need to be removed
Expand Down Expand Up @@ -176,7 +176,7 @@ func (p PresetSnapshot) isActive() bool {
//
// The reconciliation follows this order:
// 1. Delete expired prebuilds: These are no longer valid and must be removed first.
// 2. Delete extraneous prebuilds: After expired ones are removed, if the number of running prebuilds (excluding expired)
// 2. Delete extraneous prebuilds: After expired ones are removed, if the number of running non-expired prebuilds
// still exceeds the desired count, the oldest prebuilds are deleted to reduce excess.
// 3. Create missing prebuilds: If the number of non-expired, non-starting prebuilds is still below the desired count,
// create the necessary number of prebuilds to reach the target.
Expand Down Expand Up @@ -207,9 +207,12 @@ func (p PresetSnapshot) handleActiveTemplateVersion() (actions []*Reconciliation
})
}

// Number of running prebuilds excluding the recently deleted Expired
runningValid := state.Actual - state.Expired

// Calculate how many new prebuilds we need to create
// We subtract starting prebuilds since they're already being created
prebuildsToCreate := max(state.Desired-state.Actual-state.Starting, 0)
prebuildsToCreate := max(state.Desired-runningValid-state.Starting, 0)
if prebuildsToCreate > 0 {
actions = append(actions,
&ReconciliationActions{
Expand Down
8 changes: 4 additions & 4 deletions coderd/prebuilds/preset_snapshot_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -555,7 +555,7 @@ func TestExpiredPrebuilds(t *testing.T) {
desired: 2,
expired: 1,
checkFn: func(runningPrebuilds []database.GetRunningPrebuiltWorkspacesRow, state prebuilds.ReconciliationState, actions []*prebuilds.ReconciliationActions) {
expectedState := prebuilds.ReconciliationState{Actual: 1, Desired: 2, Expired: 1}
expectedState := prebuilds.ReconciliationState{Actual: 2, Desired: 2, Expired: 1}
expectedActions := []*prebuilds.ReconciliationActions{
{
ActionType: prebuilds.ActionTypeDelete,
Expand All @@ -579,7 +579,7 @@ func TestExpiredPrebuilds(t *testing.T) {
desired: 2,
expired: 2,
checkFn: func(runningPrebuilds []database.GetRunningPrebuiltWorkspacesRow, state prebuilds.ReconciliationState, actions []*prebuilds.ReconciliationActions) {
expectedState := prebuilds.ReconciliationState{Actual: 0, Desired: 2, Expired: 2}
expectedState := prebuilds.ReconciliationState{Actual: 2, Desired: 2, Expired: 2}
expectedActions := []*prebuilds.ReconciliationActions{
{
ActionType: prebuilds.ActionTypeDelete,
Expand All @@ -604,7 +604,7 @@ func TestExpiredPrebuilds(t *testing.T) {
desired: 2,
expired: 2,
checkFn: func(runningPrebuilds []database.GetRunningPrebuiltWorkspacesRow, state prebuilds.ReconciliationState, actions []*prebuilds.ReconciliationActions) {
expectedState := prebuilds.ReconciliationState{Actual: 2, Desired: 2, Expired: 2, Extraneous: 0}
expectedState := prebuilds.ReconciliationState{Actual: 4, Desired: 2, Expired: 2, Extraneous: 0}
expectedActions := []*prebuilds.ReconciliationActions{
{
ActionType: prebuilds.ActionTypeDelete,
Expand All @@ -626,7 +626,7 @@ func TestExpiredPrebuilds(t *testing.T) {
desired: 2,
expired: 1,
checkFn: func(runningPrebuilds []database.GetRunningPrebuiltWorkspacesRow, state prebuilds.ReconciliationState, actions []*prebuilds.ReconciliationActions) {
expectedState := prebuilds.ReconciliationState{Actual: 3, Desired: 2, Expired: 1, Extraneous: 1}
expectedState := prebuilds.ReconciliationState{Actual: 4, Desired: 2, Expired: 1, Extraneous: 1}
expectedActions := []*prebuilds.ReconciliationActions{
// First action correspond to deleting the expired prebuild,
// and the second action corresponds to deleting the extraneous prebuild
Expand Down
20 changes: 9 additions & 11 deletions enterprise/coderd/prebuilds/reconcile.go
Original file line number Diff line number Diff line change
Expand Up @@ -393,12 +393,11 @@ func (c *StoreReconciler) ReconcilePreset(ctx context.Context, ps prebuilds.Pres
state := ps.CalculateState()
actions, err := c.CalculateActions(ctx, ps)
if err != nil {
logger.Error(ctx, "failed to calculate actions for preset", slog.F("preset_id", ps.Preset.ID), slog.Error(err))
logger.Error(ctx, "failed to calculate actions for preset", slog.Error(err))
return err
}

fields := []any{
slog.F("preset_id", ps.Preset.ID),
slog.F("desired", state.Desired), slog.F("actual", state.Actual),
slog.F("extraneous", state.Extraneous), slog.F("starting", state.Starting),
slog.F("stopping", state.Stopping), slog.F("deleting", state.Deleting),
Expand Down Expand Up @@ -520,6 +519,8 @@ func (c *StoreReconciler) WithReconciliationLock(
// This method handles logging at appropriate levels and performs the necessary operations
// according to the action type. It returns an error if any part of the action fails.
func (c *StoreReconciler) executeReconciliationAction(ctx context.Context, logger slog.Logger, ps prebuilds.PresetSnapshot, action *prebuilds.ReconciliationActions) error {
levelFn := logger.Debug

// Nothing has to be done.
if !ps.Preset.UsingActiveVersion && action.IsNoop() {
logger.Debug(ctx, "skipping reconciliation for preset - nothing has to be done",
Expand All @@ -532,7 +533,12 @@ func (c *StoreReconciler) executeReconciliationAction(ctx context.Context, logge
// nolint:gocritic // ReconcilePreset needs Prebuilds Orchestrator permissions.
prebuildsCtx := dbauthz.AsPrebuildsOrchestrator(ctx)

levelFn := logger.Debug
fields := []any{
slog.F("action_type", action.ActionType), slog.F("create_count", action.Create),
slog.F("delete_count", len(action.DeleteIDs)), slog.F("to_delete", action.DeleteIDs),
}
levelFn(ctx, "calculated reconciliation action for preset", fields...)

switch {
case action.ActionType == prebuilds.ActionTypeBackoff:
levelFn = logger.Warn
Expand All @@ -543,14 +549,6 @@ func (c *StoreReconciler) executeReconciliationAction(ctx context.Context, logge
levelFn = logger.Info
}

fields := []any{
slog.F("preset_id", ps.Preset.ID), slog.F("action_type", action.ActionType),
slog.F("create_count", action.Create), slog.F("delete_count", len(action.DeleteIDs)),
slog.F("to_delete", action.DeleteIDs),
}

levelFn(ctx, "calculated reconciliation action for preset", fields...)

switch action.ActionType {
case prebuilds.ActionTypeBackoff:
// If there is anything to backoff for (usually a cycle of failed prebuilds), then log and bail out.
Expand Down
Loading