Skip to content

feat: automatically stop workspaces based on failure_ttl #7989

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Jun 22, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
move enterprise tests
  • Loading branch information
sreya committed Jun 21, 2023
commit 4fdaf19cd579fbd6ec5708b58209bfc858482379
1 change: 0 additions & 1 deletion coderd/autobuild/lifecycle_executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,6 @@ func (e *Executor) runOnce(t time.Time) Stats {
log.Warn(e.ctx, "get latest workspace build", slog.Error(err))
return nil
}

templateSchedule, err := (*(e.templateScheduleStore.Load())).GetTemplateScheduleOptions(e.ctx, tx, ws.TemplateID)
if err != nil {
log.Warn(e.ctx, "get template schedule options", slog.Error(err))
Expand Down
157 changes: 4 additions & 153 deletions coderd/autobuild/lifecycle_executor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -653,10 +653,13 @@ func TestExecutorAutostartTemplateDisabled(t *testing.T) {

// TesetExecutorFailedWorkspace tests that failed workspaces that breach
// their template failed_ttl threshold trigger a stop job.
// For enterprise functionality see enterprise/coderd/workspaces_test.go
func TestExecutorFailedWorkspace(t *testing.T) {
t.Parallel()

t.Run("AGPLOK", func(t *testing.T) {
// Test that an AGPL TemplateScheduleStore properly disables
// functionality.
t.Run("OK", func(t *testing.T) {
t.Parallel()

var (
Expand Down Expand Up @@ -700,93 +703,6 @@ func TestExecutorFailedWorkspace(t *testing.T) {
// Expect no transitions since we're using AGPL.
require.Len(t, stats.Transitions, 0)
})

t.Run("EnterpriseOK", func(t *testing.T) {
t.Parallel()

var (
ticker = make(chan time.Time)
statCh = make(chan autobuild.Stats)
logger = slogtest.Make(t, &slogtest.Options{
// We ignore errors here since we expect to fail
// builds.
IgnoreErrors: true,
})
failureTTL = time.Millisecond

client = coderdtest.New(t, &coderdtest.Options{
Logger: &logger,
AutobuildTicker: ticker,
IncludeProvisionerDaemon: true,
AutobuildStats: statCh,
TemplateScheduleStore: mockEnterpriseTemplateScheduler(t),
})
)
user := coderdtest.CreateFirstUser(t, client)
version := coderdtest.CreateTemplateVersion(t, client, user.OrganizationID, &echo.Responses{
Parse: echo.ParseComplete,
ProvisionPlan: echo.ProvisionComplete,
ProvisionApply: echo.ProvisionFailed,
})
template := coderdtest.CreateTemplate(t, client, user.OrganizationID, version.ID, func(ctr *codersdk.CreateTemplateRequest) {
ctr.FailureTTLMillis = ptr.Ref[int64](failureTTL.Milliseconds())
})
coderdtest.AwaitTemplateVersionJob(t, client, version.ID)
ws := coderdtest.CreateWorkspace(t, client, user.OrganizationID, template.ID)
build := coderdtest.AwaitWorkspaceBuildJob(t, client, ws.LatestBuild.ID)
require.Equal(t, codersdk.WorkspaceStatusFailed, build.Status)
require.Eventually(t,
func() bool {
return database.Now().Sub(*build.Job.CompletedAt) > failureTTL
},
testutil.IntervalMedium, testutil.IntervalFast)
ticker <- time.Now()
stats := <-statCh
// Expect workspace to transition to stopped state for breaching
// failure TTL.
require.Len(t, stats.Transitions, 1)
require.Equal(t, stats.Transitions[ws.ID], database.WorkspaceTransitionStop)
})

t.Run("TooEarly", func(t *testing.T) {
t.Parallel()

var (
ticker = make(chan time.Time)
statCh = make(chan autobuild.Stats)
logger = slogtest.Make(t, &slogtest.Options{
// We ignore errors here since we expect to fail
// builds.
IgnoreErrors: true,
})
failureTTL = time.Minute

client = coderdtest.New(t, &coderdtest.Options{
Logger: &logger,
AutobuildTicker: ticker,
IncludeProvisionerDaemon: true,
AutobuildStats: statCh,
TemplateScheduleStore: mockEnterpriseTemplateScheduler(t),
})
)
user := coderdtest.CreateFirstUser(t, client)
version := coderdtest.CreateTemplateVersion(t, client, user.OrganizationID, &echo.Responses{
Parse: echo.ParseComplete,
ProvisionPlan: echo.ProvisionComplete,
ProvisionApply: echo.ProvisionFailed,
})
template := coderdtest.CreateTemplate(t, client, user.OrganizationID, version.ID, func(ctr *codersdk.CreateTemplateRequest) {
ctr.FailureTTLMillis = ptr.Ref[int64](failureTTL.Milliseconds())
})
coderdtest.AwaitTemplateVersionJob(t, client, version.ID)
ws := coderdtest.CreateWorkspace(t, client, user.OrganizationID, template.ID)
build := coderdtest.AwaitWorkspaceBuildJob(t, client, ws.LatestBuild.ID)
require.Equal(t, codersdk.WorkspaceStatusFailed, build.Status)
ticker <- time.Now()
stats := <-statCh
// Expect no transitions since not enough time has elapsed.
require.Len(t, stats.Transitions, 0)
})
}

func mustProvisionWorkspace(t *testing.T, client *codersdk.Client, mut ...func(*codersdk.CreateWorkspaceRequest)) codersdk.Workspace {
Expand Down Expand Up @@ -846,68 +762,3 @@ func mustWorkspaceParameters(t *testing.T, client *codersdk.Client, workspaceID
func TestMain(m *testing.M) {
goleak.VerifyTestMain(m)
}

func mockEnterpriseTemplateScheduler(t *testing.T) schedule.TemplateScheduleStore {
return schedule.MockTemplateScheduleStore{
GetFn: func(ctx context.Context, store database.Store, id uuid.UUID) (schedule.TemplateScheduleOptions, error) {
t.Helper()

template, err := store.GetTemplateByID(ctx, id)
require.NoError(t, err)

return schedule.TemplateScheduleOptions{
UserAutostartEnabled: template.AllowUserAutostart,
UserAutostopEnabled: template.AllowUserAutostop,
DefaultTTL: time.Duration(template.DefaultTTL),
MaxTTL: time.Duration(template.MaxTTL),
FailureTTL: time.Duration(template.FailureTTL),
}, nil
},
SetFn: func(ctx context.Context, db database.Store, tpl database.Template, opts schedule.TemplateScheduleOptions) (database.Template, error) {
t.Helper()

if int64(opts.DefaultTTL) == tpl.DefaultTTL &&
int64(opts.MaxTTL) == tpl.MaxTTL &&
int64(opts.FailureTTL) == tpl.FailureTTL &&
int64(opts.InactivityTTL) == tpl.InactivityTTL &&
int64(opts.LockedTTL) == tpl.LockedTTL &&
opts.UserAutostartEnabled == tpl.AllowUserAutostart &&
opts.UserAutostopEnabled == tpl.AllowUserAutostop {
// Avoid updating the UpdatedAt timestamp if nothing will be changed.
return tpl, nil
}

template, err := db.UpdateTemplateScheduleByID(ctx, database.UpdateTemplateScheduleByIDParams{
ID: tpl.ID,
UpdatedAt: database.Now(),
AllowUserAutostart: opts.UserAutostartEnabled,
AllowUserAutostop: opts.UserAutostopEnabled,
DefaultTTL: int64(opts.DefaultTTL),
MaxTTL: int64(opts.MaxTTL),
FailureTTL: int64(opts.FailureTTL),
InactivityTTL: int64(opts.InactivityTTL),
LockedTTL: int64(opts.LockedTTL),
})
require.NoError(t, err)
// Update all workspaces using the template to set the user defined schedule
// to be within the new bounds. This essentially does the following for each
// workspace using the template.
// if (template.ttl != NULL) {
// workspace.ttl = min(workspace.ttl, template.ttl)
// }
//
// NOTE: this does not apply to currently running workspaces as their
// schedule information is committed to the workspace_build during start.
// This limitation is displayed to the user while editing the template.
if opts.MaxTTL > 0 {
err = db.UpdateWorkspaceTTLToBeWithinTemplateMax(ctx, database.UpdateWorkspaceTTLToBeWithinTemplateMaxParams{
TemplateID: template.ID,
TemplateMaxTTL: int64(opts.MaxTTL),
})
require.NoError(t, err)
}

return template, nil
},
}
}
2 changes: 1 addition & 1 deletion enterprise/coderd/coderd.go
Original file line number Diff line number Diff line change
Expand Up @@ -402,7 +402,7 @@ func (api *API) updateEntitlements(ctx context.Context) error {

if changed, enabled := featureChanged(codersdk.FeatureAdvancedTemplateScheduling); changed {
if enabled {
store := &enterpriseTemplateScheduleStore{}
store := &EnterpriseTemplateScheduleStore{}
ptr := schedule.TemplateScheduleStore(store)
api.AGPL.TemplateScheduleStore.Store(&ptr)
} else {
Expand Down
8 changes: 4 additions & 4 deletions enterprise/coderd/provisionerdaemons.go
Original file line number Diff line number Diff line change
Expand Up @@ -310,11 +310,11 @@ func websocketNetConn(ctx context.Context, conn *websocket.Conn, msgType websock
}
}

type enterpriseTemplateScheduleStore struct{}
type EnterpriseTemplateScheduleStore struct{}

var _ schedule.TemplateScheduleStore = &enterpriseTemplateScheduleStore{}
var _ schedule.TemplateScheduleStore = &EnterpriseTemplateScheduleStore{}

func (*enterpriseTemplateScheduleStore) GetTemplateScheduleOptions(ctx context.Context, db database.Store, templateID uuid.UUID) (schedule.TemplateScheduleOptions, error) {
func (*EnterpriseTemplateScheduleStore) GetTemplateScheduleOptions(ctx context.Context, db database.Store, templateID uuid.UUID) (schedule.TemplateScheduleOptions, error) {
tpl, err := db.GetTemplateByID(ctx, templateID)
if err != nil {
return schedule.TemplateScheduleOptions{}, err
Expand All @@ -331,7 +331,7 @@ func (*enterpriseTemplateScheduleStore) GetTemplateScheduleOptions(ctx context.C
}, nil
}

func (*enterpriseTemplateScheduleStore) SetTemplateScheduleOptions(ctx context.Context, db database.Store, tpl database.Template, opts schedule.TemplateScheduleOptions) (database.Template, error) {
func (*EnterpriseTemplateScheduleStore) SetTemplateScheduleOptions(ctx context.Context, db database.Store, tpl database.Template, opts schedule.TemplateScheduleOptions) (database.Template, error) {
if int64(opts.DefaultTTL) == tpl.DefaultTTL &&
int64(opts.MaxTTL) == tpl.MaxTTL &&
int64(opts.FailureTTL) == tpl.FailureTTL &&
Expand Down
156 changes: 156 additions & 0 deletions enterprise/coderd/workspaces_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,17 @@ import (
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"

"cdr.dev/slog/sloggers/slogtest"

"github.com/coder/coder/coderd/autobuild"
"github.com/coder/coder/coderd/coderdtest"
"github.com/coder/coder/coderd/database"
"github.com/coder/coder/coderd/util/ptr"
"github.com/coder/coder/codersdk"
"github.com/coder/coder/enterprise/coderd"
"github.com/coder/coder/enterprise/coderd/coderdenttest"
"github.com/coder/coder/enterprise/coderd/license"
"github.com/coder/coder/provisioner/echo"
"github.com/coder/coder/testutil"
)

Expand Down Expand Up @@ -74,6 +79,157 @@ func TestCreateWorkspace(t *testing.T) {
})
}

func TestWorkspaceAutobuild(t *testing.T) {
t.Parallel()

t.Run("FailureTTLOK", func(t *testing.T) {
t.Parallel()

var (
ticker = make(chan time.Time)
statCh = make(chan autobuild.Stats)
logger = slogtest.Make(t, &slogtest.Options{
// We ignore errors here since we expect to fail
// builds.
IgnoreErrors: true,
})
failureTTL = time.Millisecond

client = coderdenttest.New(t, &coderdenttest.Options{
Options: &coderdtest.Options{
Logger: &logger,
AutobuildTicker: ticker,
IncludeProvisionerDaemon: true,
AutobuildStats: statCh,
TemplateScheduleStore: &coderd.EnterpriseTemplateScheduleStore{},
},
})
)
user := coderdtest.CreateFirstUser(t, client)
_ = coderdenttest.AddLicense(t, client, coderdenttest.LicenseOptions{
Features: license.Features{
codersdk.FeatureAdvancedTemplateScheduling: 1,
},
})

version := coderdtest.CreateTemplateVersion(t, client, user.OrganizationID, &echo.Responses{
Parse: echo.ParseComplete,
ProvisionPlan: echo.ProvisionComplete,
ProvisionApply: echo.ProvisionFailed,
})
template := coderdtest.CreateTemplate(t, client, user.OrganizationID, version.ID, func(ctr *codersdk.CreateTemplateRequest) {
ctr.FailureTTLMillis = ptr.Ref[int64](failureTTL.Milliseconds())
})
coderdtest.AwaitTemplateVersionJob(t, client, version.ID)
ws := coderdtest.CreateWorkspace(t, client, user.OrganizationID, template.ID)
build := coderdtest.AwaitWorkspaceBuildJob(t, client, ws.LatestBuild.ID)
require.Equal(t, codersdk.WorkspaceStatusFailed, build.Status)
require.Eventually(t,
func() bool {
return database.Now().Sub(*build.Job.CompletedAt) > failureTTL
},
testutil.IntervalMedium, testutil.IntervalFast)
ticker <- time.Now()
stats := <-statCh
// Expect workspace to transition to stopped state for breaching
// failure TTL.
require.Len(t, stats.Transitions, 1)
require.Equal(t, stats.Transitions[ws.ID], database.WorkspaceTransitionStop)
})

t.Run("FailureTTLTooEarly", func(t *testing.T) {
t.Parallel()

var (
ticker = make(chan time.Time)
statCh = make(chan autobuild.Stats)
logger = slogtest.Make(t, &slogtest.Options{
// We ignore errors here since we expect to fail
// builds.
IgnoreErrors: true,
})
failureTTL = time.Minute

client = coderdenttest.New(t, &coderdenttest.Options{
Options: &coderdtest.Options{
Logger: &logger,
AutobuildTicker: ticker,
IncludeProvisionerDaemon: true,
AutobuildStats: statCh,
TemplateScheduleStore: &coderd.EnterpriseTemplateScheduleStore{},
},
})
)
user := coderdtest.CreateFirstUser(t, client)
_ = coderdenttest.AddLicense(t, client, coderdenttest.LicenseOptions{
Features: license.Features{
codersdk.FeatureAdvancedTemplateScheduling: 1,
},
})
version := coderdtest.CreateTemplateVersion(t, client, user.OrganizationID, &echo.Responses{
Parse: echo.ParseComplete,
ProvisionPlan: echo.ProvisionComplete,
ProvisionApply: echo.ProvisionFailed,
})
template := coderdtest.CreateTemplate(t, client, user.OrganizationID, version.ID, func(ctr *codersdk.CreateTemplateRequest) {
ctr.FailureTTLMillis = ptr.Ref[int64](failureTTL.Milliseconds())
})
coderdtest.AwaitTemplateVersionJob(t, client, version.ID)
ws := coderdtest.CreateWorkspace(t, client, user.OrganizationID, template.ID)
build := coderdtest.AwaitWorkspaceBuildJob(t, client, ws.LatestBuild.ID)
require.Equal(t, codersdk.WorkspaceStatusFailed, build.Status)
ticker <- time.Now()
stats := <-statCh
// Expect no transitions since not enough time has elapsed.
require.Len(t, stats.Transitions, 0)
})

t.Run("FailureTTLUnset", func(t *testing.T) {
t.Parallel()

var (
ticker = make(chan time.Time)
statCh = make(chan autobuild.Stats)
logger = slogtest.Make(t, &slogtest.Options{
// We ignore errors here since we expect to fail
// builds.
IgnoreErrors: true,
})

client = coderdenttest.New(t, &coderdenttest.Options{
Options: &coderdtest.Options{
Logger: &logger,
AutobuildTicker: ticker,
IncludeProvisionerDaemon: true,
AutobuildStats: statCh,
TemplateScheduleStore: &coderd.EnterpriseTemplateScheduleStore{},
},
})
)
user := coderdtest.CreateFirstUser(t, client)
_ = coderdenttest.AddLicense(t, client, coderdenttest.LicenseOptions{
Features: license.Features{
codersdk.FeatureAdvancedTemplateScheduling: 1,
},
})
version := coderdtest.CreateTemplateVersion(t, client, user.OrganizationID, &echo.Responses{
Parse: echo.ParseComplete,
ProvisionPlan: echo.ProvisionComplete,
ProvisionApply: echo.ProvisionFailed,
})
// Create a template without setting a failure_ttl.
template := coderdtest.CreateTemplate(t, client, user.OrganizationID, version.ID)
coderdtest.AwaitTemplateVersionJob(t, client, version.ID)
ws := coderdtest.CreateWorkspace(t, client, user.OrganizationID, template.ID)
build := coderdtest.AwaitWorkspaceBuildJob(t, client, ws.LatestBuild.ID)
require.Equal(t, codersdk.WorkspaceStatusFailed, build.Status)
ticker <- time.Now()
stats := <-statCh
// Expect no transitions since the field is unset on the template.
require.Len(t, stats.Transitions, 0)
})
}

func TestWorkspacesFiltering(t *testing.T) {
t.Parallel()

Expand Down