Skip to content

[pull] main from coder:main #58

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
May 26, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions coderd/database/queries.sql.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions coderd/database/queries/prebuilds.sql
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ SELECT
tvp.id,
tvp.name,
tvp.desired_instances AS desired_instances,
tvp.invalidate_after_secs AS ttl,
tvp.prebuild_status,
t.deleted,
t.deprecated != '' AS deprecated
Expand Down
32 changes: 31 additions & 1 deletion coderd/prebuilds/global_snapshot.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package prebuilds

import (
"time"

"github.com/google/uuid"
"golang.org/x/xerrors"

Expand Down Expand Up @@ -41,13 +43,17 @@ func (s GlobalSnapshot) FilterByPreset(presetID uuid.UUID) (*PresetSnapshot, err
return nil, xerrors.Errorf("no preset found with ID %q", presetID)
}

// Only include workspaces that have successfully started
running := slice.Filter(s.RunningPrebuilds, func(prebuild database.GetRunningPrebuiltWorkspacesRow) bool {
if !prebuild.CurrentPresetID.Valid {
return false
}
return prebuild.CurrentPresetID.UUID == preset.ID
})

// Separate running workspaces into non-expired and expired based on the preset's TTL
nonExpired, expired := filterExpiredWorkspaces(preset, running)

inProgress := slice.Filter(s.PrebuildsInProgress, func(prebuild database.CountInProgressPrebuildsRow) bool {
return prebuild.PresetID.UUID == preset.ID
})
Expand All @@ -66,9 +72,33 @@ func (s GlobalSnapshot) FilterByPreset(presetID uuid.UUID) (*PresetSnapshot, err

return &PresetSnapshot{
Preset: preset,
Running: running,
Running: nonExpired,
Expired: expired,
InProgress: inProgress,
Backoff: backoffPtr,
IsHardLimited: isHardLimited,
}, nil
}

// filterExpiredWorkspaces splits running workspaces into expired and non-expired
// based on the preset's TTL.
// If TTL is missing or zero, all workspaces are considered non-expired.
func filterExpiredWorkspaces(preset database.GetTemplatePresetsWithPrebuildsRow, runningWorkspaces []database.GetRunningPrebuiltWorkspacesRow) (nonExpired []database.GetRunningPrebuiltWorkspacesRow, expired []database.GetRunningPrebuiltWorkspacesRow) {
if !preset.Ttl.Valid {
return runningWorkspaces, expired
}

ttl := time.Duration(preset.Ttl.Int32) * time.Second
if ttl <= 0 {
return runningWorkspaces, expired
}

for _, prebuild := range runningWorkspaces {
if time.Since(prebuild.CreatedAt) > ttl {
expired = append(expired, prebuild)
} else {
nonExpired = append(nonExpired, prebuild)
}
}
return nonExpired, expired
}
105 changes: 76 additions & 29 deletions coderd/prebuilds/preset_snapshot.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,14 @@ const (
// PresetSnapshot is a filtered view of GlobalSnapshot focused on a single preset.
// It contains the raw data needed to calculate the current state of a preset's prebuilds,
// including running prebuilds, in-progress builds, and backoff information.
// - Running: prebuilds running and non-expired
// - Expired: prebuilds running and expired due to the preset's TTL
// - InProgress: prebuilds currently in progress
// - Backoff: holds failure info to decide if prebuild creation should be backed off
type PresetSnapshot struct {
Preset database.GetTemplatePresetsWithPrebuildsRow
Running []database.GetRunningPrebuiltWorkspacesRow
Expired []database.GetRunningPrebuiltWorkspacesRow
InProgress []database.CountInProgressPrebuildsRow
Backoff *database.GetPresetsBackoffRow
IsHardLimited bool
Expand All @@ -43,10 +48,11 @@ type PresetSnapshot struct {
// calculated from a PresetSnapshot. While PresetSnapshot contains raw data,
// ReconciliationState contains derived metrics that are directly used to
// determine what actions are needed (create, delete, or backoff).
// For example, it calculates how many prebuilds are eligible, how many are
// extraneous, and how many are in various transition states.
// For example, it calculates how many prebuilds are expired, eligible,
// how many are extraneous, and how many are in various transition states.
type ReconciliationState struct {
Actual int32 // Number of currently running prebuilds
Actual int32 // Number of currently running prebuilds, i.e., non-expired, expired and extraneous prebuilds
Expired int32 // Number of currently running prebuilds that exceeded their allowed time-to-live (TTL)
Desired int32 // Number of prebuilds desired as defined in the preset
Eligible int32 // Number of prebuilds that are ready to be claimed
Extraneous int32 // Number of extra running prebuilds beyond the desired count
Expand Down Expand Up @@ -78,7 +84,8 @@ func (ra *ReconciliationActions) IsNoop() bool {
}

// CalculateState computes the current state of prebuilds for a preset, including:
// - Actual: Number of currently running prebuilds
// - Actual: Number of currently running prebuilds, i.e., non-expired and expired prebuilds
// - Expired: Number of currently running expired prebuilds
// - Desired: Number of prebuilds desired as defined in the preset
// - Eligible: Number of prebuilds that are ready to be claimed
// - Extraneous: Number of extra running prebuilds beyond the desired count
Expand All @@ -92,23 +99,28 @@ func (p PresetSnapshot) CalculateState() *ReconciliationState {
var (
actual int32
desired int32
expired int32
eligible int32
extraneous int32
)

// #nosec G115 - Safe conversion as p.Running slice length is expected to be within int32 range
actual = int32(len(p.Running))
// #nosec G115 - Safe conversion as p.Running and p.Expired slice length is expected to be within int32 range
actual = int32(len(p.Running) + len(p.Expired))

// #nosec G115 - Safe conversion as p.Expired slice length is expected to be within int32 range
expired = int32(len(p.Expired))

if p.isActive() {
desired = p.Preset.DesiredInstances.Int32
eligible = p.countEligible()
extraneous = max(actual-desired, 0)
extraneous = max(actual-expired-desired, 0)
}

starting, stopping, deleting := p.countInProgress()

return &ReconciliationState{
Actual: actual,
Expired: expired,
Desired: desired,
Eligible: eligible,
Extraneous: extraneous,
Expand All @@ -126,14 +138,15 @@ func (p PresetSnapshot) CalculateState() *ReconciliationState {
// 3. For active presets, it calculates the number of prebuilds to create or delete based on:
// - The desired number of instances
// - Currently running prebuilds
// - Currently running expired prebuilds
// - Prebuilds in transition states (starting/stopping/deleting)
// - Any extraneous prebuilds that need to be removed
//
// The function returns a ReconciliationActions struct that will have exactly one action type set:
// - ActionTypeBackoff: Only BackoffUntil is set, indicating when to retry
// - ActionTypeCreate: Only Create is set, indicating how many prebuilds to create
// - ActionTypeDelete: Only DeleteIDs is set, containing IDs of prebuilds to delete
func (p PresetSnapshot) CalculateActions(clock quartz.Clock, backoffInterval time.Duration) (*ReconciliationActions, error) {
func (p PresetSnapshot) CalculateActions(clock quartz.Clock, backoffInterval time.Duration) ([]*ReconciliationActions, error) {
// TODO: align workspace states with how we represent them on the FE and the CLI
// right now there's some slight differences which can lead to additional prebuilds being created

Expand All @@ -158,45 +171,77 @@ func (p PresetSnapshot) isActive() bool {
return p.Preset.UsingActiveVersion && !p.Preset.Deleted && !p.Preset.Deprecated
}

// handleActiveTemplateVersion deletes excess prebuilds if there are too many,
// otherwise creates new ones to reach the desired count.
func (p PresetSnapshot) handleActiveTemplateVersion() (*ReconciliationActions, error) {
// handleActiveTemplateVersion determines the reconciliation actions for a preset with an active template version.
// It ensures the system moves towards the desired number of healthy prebuilds.
//
// The reconciliation follows this order:
// 1. Delete expired prebuilds: These are no longer valid and must be removed first.
// 2. Delete extraneous prebuilds: After expired ones are removed, if the number of running non-expired prebuilds
// still exceeds the desired count, the oldest prebuilds are deleted to reduce excess.
// 3. Create missing prebuilds: If the number of non-expired, non-starting prebuilds is still below the desired count,
// create the necessary number of prebuilds to reach the target.
//
// The function returns a list of actions to be executed to achieve the desired state.
func (p PresetSnapshot) handleActiveTemplateVersion() (actions []*ReconciliationActions, err error) {
state := p.CalculateState()

// If we have more prebuilds than desired, delete the oldest ones
// If we have expired prebuilds, delete them
if state.Expired > 0 {
var deleteIDs []uuid.UUID
for _, expired := range p.Expired {
deleteIDs = append(deleteIDs, expired.ID)
}
actions = append(actions,
&ReconciliationActions{
ActionType: ActionTypeDelete,
DeleteIDs: deleteIDs,
})
}

// If we still have more prebuilds than desired, delete the oldest ones
if state.Extraneous > 0 {
return &ReconciliationActions{
ActionType: ActionTypeDelete,
DeleteIDs: p.getOldestPrebuildIDs(int(state.Extraneous)),
}, nil
actions = append(actions,
&ReconciliationActions{
ActionType: ActionTypeDelete,
DeleteIDs: p.getOldestPrebuildIDs(int(state.Extraneous)),
})
}

// Number of running prebuilds excluding the recently deleted Expired
runningValid := state.Actual - state.Expired

// Calculate how many new prebuilds we need to create
// We subtract starting prebuilds since they're already being created
prebuildsToCreate := max(state.Desired-state.Actual-state.Starting, 0)
prebuildsToCreate := max(state.Desired-runningValid-state.Starting, 0)
if prebuildsToCreate > 0 {
actions = append(actions,
&ReconciliationActions{
ActionType: ActionTypeCreate,
Create: prebuildsToCreate,
})
}

return &ReconciliationActions{
ActionType: ActionTypeCreate,
Create: prebuildsToCreate,
}, nil
return actions, nil
}

// handleInactiveTemplateVersion deletes all running prebuilds except those already being deleted
// to avoid duplicate deletion attempts.
func (p PresetSnapshot) handleInactiveTemplateVersion() (*ReconciliationActions, error) {
func (p PresetSnapshot) handleInactiveTemplateVersion() ([]*ReconciliationActions, error) {
prebuildsToDelete := len(p.Running)
deleteIDs := p.getOldestPrebuildIDs(prebuildsToDelete)

return &ReconciliationActions{
ActionType: ActionTypeDelete,
DeleteIDs: deleteIDs,
return []*ReconciliationActions{
{
ActionType: ActionTypeDelete,
DeleteIDs: deleteIDs,
},
}, nil
}

// needsBackoffPeriod checks if we should delay prebuild creation due to recent failures.
// If there were failures, it calculates a backoff period based on the number of failures
// and returns true if we're still within that period.
func (p PresetSnapshot) needsBackoffPeriod(clock quartz.Clock, backoffInterval time.Duration) (*ReconciliationActions, bool) {
func (p PresetSnapshot) needsBackoffPeriod(clock quartz.Clock, backoffInterval time.Duration) ([]*ReconciliationActions, bool) {
if p.Backoff == nil || p.Backoff.NumFailed == 0 {
return nil, false
}
Expand All @@ -205,9 +250,11 @@ func (p PresetSnapshot) needsBackoffPeriod(clock quartz.Clock, backoffInterval t
return nil, false
}

return &ReconciliationActions{
ActionType: ActionTypeBackoff,
BackoffUntil: backoffUntil,
return []*ReconciliationActions{
{
ActionType: ActionTypeBackoff,
BackoffUntil: backoffUntil,
},
}, true
}

Expand Down
Loading
Loading