|
| 1 | +package prebuilds |
| 2 | + |
| 3 | +import ( |
| 4 | + "math" |
| 5 | + "slices" |
| 6 | + "time" |
| 7 | + |
| 8 | + "github.com/coder/quartz" |
| 9 | + |
| 10 | + "github.com/coder/coder/v2/coderd/database" |
| 11 | +) |
| 12 | + |
| 13 | +func (p PresetState) CalculateActions(clock quartz.Clock, backoffInterval time.Duration) (*ReconciliationActions, error) { |
| 14 | + // TODO: align workspace states with how we represent them on the FE and the CLI |
| 15 | + // right now there's some slight differences which can lead to additional prebuilds being created |
| 16 | + |
| 17 | + // TODO: add mechanism to prevent prebuilds being reconciled from being claimable by users; i.e. if a prebuild is |
| 18 | + // about to be deleted, it should not be deleted if it has been claimed - beware of TOCTOU races! |
| 19 | + |
| 20 | + var ( |
| 21 | + actual int32 // Running prebuilds for active version. |
| 22 | + desired int32 // Active template version's desired instances as defined in preset. |
| 23 | + eligible int32 // Prebuilds which can be claimed. |
| 24 | + outdated int32 // Prebuilds which no longer match the active template version. |
| 25 | + extraneous int32 // Extra running prebuilds for active version (somehow). |
| 26 | + starting, stopping, deleting int32 // Prebuilds currently being provisioned up or down. |
| 27 | + ) |
| 28 | + |
| 29 | + if p.Preset.UsingActiveVersion { |
| 30 | + actual = int32(len(p.Running)) |
| 31 | + desired = p.Preset.DesiredInstances |
| 32 | + } |
| 33 | + |
| 34 | + for _, prebuild := range p.Running { |
| 35 | + if p.Preset.UsingActiveVersion { |
| 36 | + if prebuild.Ready { |
| 37 | + eligible++ |
| 38 | + } |
| 39 | + |
| 40 | + extraneous = int32(math.Max(float64(actual-p.Preset.DesiredInstances), 0)) |
| 41 | + } |
| 42 | + |
| 43 | + if prebuild.TemplateVersionID == p.Preset.TemplateVersionID && !p.Preset.UsingActiveVersion { |
| 44 | + outdated++ |
| 45 | + } |
| 46 | + } |
| 47 | + |
| 48 | + // In-progress builds are common across all presets belonging to a given template. |
| 49 | + // In other words: these values will be identical across all presets belonging to this template. |
| 50 | + for _, progress := range p.InProgress { |
| 51 | + switch progress.Transition { |
| 52 | + case database.WorkspaceTransitionStart: |
| 53 | + starting++ |
| 54 | + case database.WorkspaceTransitionStop: |
| 55 | + stopping++ |
| 56 | + case database.WorkspaceTransitionDelete: |
| 57 | + deleting++ |
| 58 | + } |
| 59 | + } |
| 60 | + |
| 61 | + var ( |
| 62 | + toCreate = int(math.Max(0, float64( |
| 63 | + desired-(actual+starting)), // The number of prebuilds currently being stopped (should be 0) |
| 64 | + )) |
| 65 | + toDelete = int(math.Max(0, float64( |
| 66 | + outdated- // The number of prebuilds running above the desired count for active version |
| 67 | + deleting), // The number of prebuilds currently being deleted |
| 68 | + )) |
| 69 | + |
| 70 | + actions = &ReconciliationActions{ |
| 71 | + Actual: actual, |
| 72 | + Desired: desired, |
| 73 | + Eligible: eligible, |
| 74 | + Outdated: outdated, |
| 75 | + Extraneous: extraneous, |
| 76 | + Starting: starting, |
| 77 | + Stopping: stopping, |
| 78 | + Deleting: deleting, |
| 79 | + } |
| 80 | + ) |
| 81 | + |
| 82 | + // If the template has become deleted or deprecated since the last reconciliation, we need to ensure we |
| 83 | + // scale those prebuilds down to zero. |
| 84 | + if p.Preset.Deleted || p.Preset.Deprecated { |
| 85 | + toCreate = 0 |
| 86 | + toDelete = int(actual + outdated) |
| 87 | + actions.Desired = 0 |
| 88 | + } |
| 89 | + |
| 90 | + // We backoff when the last build failed, to give the operator some time to investigate the issue and to not provision |
| 91 | + // a tonne of prebuilds (_n_ on each reconciliation iteration). |
| 92 | + if p.Backoff != nil && p.Backoff.NumFailed > 0 { |
| 93 | + backoffUntil := p.Backoff.LastBuildAt.Add(time.Duration(p.Backoff.NumFailed) * backoffInterval) |
| 94 | + |
| 95 | + if clock.Now().Before(backoffUntil) { |
| 96 | + actions.Create = 0 |
| 97 | + actions.DeleteIDs = nil |
| 98 | + actions.BackoffUntil = backoffUntil |
| 99 | + |
| 100 | + // Return early here; we should not perform any reconciliation actions if we're in a backoff period. |
| 101 | + return actions, nil |
| 102 | + } |
| 103 | + } |
| 104 | + |
| 105 | + // It's possible that an operator could stop/start prebuilds which interfere with the reconciliation loop, so |
| 106 | + // we check if there are somehow more prebuilds than we expect, and then pick random victims to be deleted. |
| 107 | + if extraneous > 0 { |
| 108 | + // Sort running IDs by creation time so we always delete the oldest prebuilds. |
| 109 | + // In general, we want fresher prebuilds (imagine a mono-repo is cloned; newer is better). |
| 110 | + slices.SortFunc(p.Running, func(a, b database.GetRunningPrebuildsRow) int { |
| 111 | + if a.CreatedAt.Before(b.CreatedAt) { |
| 112 | + return -1 |
| 113 | + } |
| 114 | + if a.CreatedAt.After(b.CreatedAt) { |
| 115 | + return 1 |
| 116 | + } |
| 117 | + |
| 118 | + return 0 |
| 119 | + }) |
| 120 | + |
| 121 | + for i := 0; i < int(extraneous); i++ { |
| 122 | + if i >= len(p.Running) { |
| 123 | + // This should never happen. |
| 124 | + // TODO: move up |
| 125 | + // c.logger.Warn(ctx, "unexpected reconciliation state; extraneous count exceeds running prebuilds count!", |
| 126 | + // slog.F("running_count", len(p.Running)), |
| 127 | + // slog.F("extraneous", extraneous)) |
| 128 | + continue |
| 129 | + } |
| 130 | + |
| 131 | + actions.DeleteIDs = append(actions.DeleteIDs, p.Running[i].WorkspaceID) |
| 132 | + } |
| 133 | + |
| 134 | + // TODO: move up |
| 135 | + // c.logger.Warn(ctx, "found extra prebuilds running, picking random victim(s)", |
| 136 | + // slog.F("template_id", p.Preset.TemplateID.String()), slog.F("desired", desired), slog.F("actual", actual), slog.F("extra", extraneous), |
| 137 | + // slog.F("victims", victims)) |
| 138 | + |
| 139 | + // Prevent the rest of the reconciliation from completing |
| 140 | + return actions, nil |
| 141 | + } |
| 142 | + |
| 143 | + actions.Create = int32(toCreate) |
| 144 | + |
| 145 | + if toDelete > 0 && len(p.Running) != toDelete { |
| 146 | + // TODO: move up |
| 147 | + // c.logger.Warn(ctx, "mismatch between running prebuilds and expected deletion count!", |
| 148 | + // slog.F("template_id", s.preset.TemplateID.String()), slog.F("running", len(p.Running)), slog.F("to_delete", toDelete)) |
| 149 | + } |
| 150 | + |
| 151 | + // TODO: implement lookup to not perform same action on workspace multiple times in $period |
| 152 | + // i.e. a workspace cannot be deleted for some reason, which continually makes it eligible for deletion |
| 153 | + for i := 0; i < toDelete; i++ { |
| 154 | + if i >= len(p.Running) { |
| 155 | + // TODO: move up |
| 156 | + // Above warning will have already addressed this. |
| 157 | + continue |
| 158 | + } |
| 159 | + |
| 160 | + actions.DeleteIDs = append(actions.DeleteIDs, p.Running[i].WorkspaceID) |
| 161 | + } |
| 162 | + |
| 163 | + return actions, nil |
| 164 | +} |
0 commit comments