diff --git a/cli/server.go b/cli/server.go index f9e744761b22e..5018007e2b4e8 100644 --- a/cli/server.go +++ b/cli/server.go @@ -62,12 +62,6 @@ import ( "github.com/coder/serpent" "github.com/coder/wgtunnel/tunnelsdk" - "github.com/coder/coder/v2/coderd/entitlements" - "github.com/coder/coder/v2/coderd/notifications/reports" - "github.com/coder/coder/v2/coderd/runtimeconfig" - "github.com/coder/coder/v2/coderd/webpush" - "github.com/coder/coder/v2/codersdk/drpcsdk" - "github.com/coder/coder/v2/buildinfo" "github.com/coder/coder/v2/cli/clilog" "github.com/coder/coder/v2/cli/cliui" @@ -83,15 +77,19 @@ import ( "github.com/coder/coder/v2/coderd/database/migrations" "github.com/coder/coder/v2/coderd/database/pubsub" "github.com/coder/coder/v2/coderd/devtunnel" + "github.com/coder/coder/v2/coderd/entitlements" "github.com/coder/coder/v2/coderd/externalauth" "github.com/coder/coder/v2/coderd/gitsshkey" "github.com/coder/coder/v2/coderd/httpmw" "github.com/coder/coder/v2/coderd/jobreaper" "github.com/coder/coder/v2/coderd/notifications" + "github.com/coder/coder/v2/coderd/notifications/reports" "github.com/coder/coder/v2/coderd/oauthpki" "github.com/coder/coder/v2/coderd/prometheusmetrics" "github.com/coder/coder/v2/coderd/prometheusmetrics/insights" "github.com/coder/coder/v2/coderd/promoauth" + "github.com/coder/coder/v2/coderd/provisionerdserver" + "github.com/coder/coder/v2/coderd/runtimeconfig" "github.com/coder/coder/v2/coderd/schedule" "github.com/coder/coder/v2/coderd/telemetry" "github.com/coder/coder/v2/coderd/tracing" @@ -99,9 +97,11 @@ import ( "github.com/coder/coder/v2/coderd/util/ptr" "github.com/coder/coder/v2/coderd/util/slice" stringutil "github.com/coder/coder/v2/coderd/util/strings" + "github.com/coder/coder/v2/coderd/webpush" "github.com/coder/coder/v2/coderd/workspaceapps/appurl" "github.com/coder/coder/v2/coderd/workspacestats" "github.com/coder/coder/v2/codersdk" + "github.com/coder/coder/v2/codersdk/drpcsdk" "github.com/coder/coder/v2/cryptorand" "github.com/coder/coder/v2/provisioner/echo" "github.com/coder/coder/v2/provisioner/terraform" @@ -280,6 +280,12 @@ func enablePrometheus( } } + provisionerdserverMetrics := provisionerdserver.NewMetrics(logger) + if err := provisionerdserverMetrics.Register(options.PrometheusRegistry); err != nil { + return nil, xerrors.Errorf("failed to register provisionerd_server metrics: %w", err) + } + options.ProvisionerdServerMetrics = provisionerdserverMetrics + //nolint:revive return ServeHandler( ctx, logger, promhttp.InstrumentMetricHandler( diff --git a/coderd/coderd.go b/coderd/coderd.go index 724952bde7bb9..053880ce31b89 100644 --- a/coderd/coderd.go +++ b/coderd/coderd.go @@ -241,6 +241,8 @@ type Options struct { UpdateAgentMetrics func(ctx context.Context, labels prometheusmetrics.AgentMetricLabels, metrics []*agentproto.Stats_Metric) StatsBatcher workspacestats.Batcher + ProvisionerdServerMetrics *provisionerdserver.Metrics + // WorkspaceAppAuditSessionTimeout allows changing the timeout for audit // sessions. Raising or lowering this value will directly affect the write // load of the audit log table. This is used for testing. Default 1 hour. @@ -1930,6 +1932,7 @@ func (api *API) CreateInMemoryTaggedProvisionerDaemon(dialCtx context.Context, n }, api.NotificationsEnqueuer, &api.PrebuildsReconciler, + api.ProvisionerdServerMetrics, ) if err != nil { return nil, err diff --git a/coderd/coderdtest/coderdtest.go b/coderd/coderdtest/coderdtest.go index 34ba84a85e33a..f773053c3a56c 100644 --- a/coderd/coderdtest/coderdtest.go +++ b/coderd/coderdtest/coderdtest.go @@ -184,6 +184,8 @@ type Options struct { OIDCConvertKeyCache cryptokeys.SigningKeycache Clock quartz.Clock TelemetryReporter telemetry.Reporter + + ProvisionerdServerMetrics *provisionerdserver.Metrics } // New constructs a codersdk client connected to an in-memory API instance. @@ -604,6 +606,7 @@ func NewOptions(t testing.TB, options *Options) (func(http.Handler), context.Can Clock: options.Clock, AppEncryptionKeyCache: options.APIKeyEncryptionCache, OIDCConvertKeyCache: options.OIDCConvertKeyCache, + ProvisionerdServerMetrics: options.ProvisionerdServerMetrics, } } diff --git a/coderd/database/dbauthz/dbauthz.go b/coderd/database/dbauthz/dbauthz.go index d1363c974214f..53c58a5de15a7 100644 --- a/coderd/database/dbauthz/dbauthz.go +++ b/coderd/database/dbauthz/dbauthz.go @@ -2699,6 +2699,13 @@ func (q *querier) GetQuotaConsumedForUser(ctx context.Context, params database.G return q.db.GetQuotaConsumedForUser(ctx, params) } +func (q *querier) GetRegularWorkspaceCreateMetrics(ctx context.Context) ([]database.GetRegularWorkspaceCreateMetricsRow, error) { + if err := q.authorizeContext(ctx, policy.ActionRead, rbac.ResourceWorkspace.All()); err != nil { + return nil, err + } + return q.db.GetRegularWorkspaceCreateMetrics(ctx) +} + func (q *querier) GetReplicaByID(ctx context.Context, id uuid.UUID) (database.Replica, error) { if err := q.authorizeContext(ctx, policy.ActionRead, rbac.ResourceSystem); err != nil { return database.Replica{}, err diff --git a/coderd/database/dbauthz/dbauthz_test.go b/coderd/database/dbauthz/dbauthz_test.go index cda914cc47617..68bed8f2ef5e9 100644 --- a/coderd/database/dbauthz/dbauthz_test.go +++ b/coderd/database/dbauthz/dbauthz_test.go @@ -73,7 +73,9 @@ func TestAsNoActor(t *testing.T) { func TestPing(t *testing.T) { t.Parallel() - db, _ := dbtestutil.NewDB(t) + db := dbmock.NewMockStore(gomock.NewController(t)) + db.EXPECT().Wrappers().Times(1).Return([]string{}) + db.EXPECT().Ping(gomock.Any()).Times(1).Return(time.Second, nil) q := dbauthz.New(db, &coderdtest.RecordingAuthorizer{}, slog.Make(), coderdtest.AccessControlStorePointer()) _, err := q.Ping(context.Background()) require.NoError(t, err, "must not error") @@ -83,34 +85,39 @@ func TestPing(t *testing.T) { func TestInTX(t *testing.T) { t.Parallel() - db, _ := dbtestutil.NewDB(t) + var ( + ctrl = gomock.NewController(t) + db = dbmock.NewMockStore(ctrl) + mTx = dbmock.NewMockStore(ctrl) // to record the 'in tx' calls + faker = gofakeit.New(0) + w = testutil.Fake(t, faker, database.Workspace{}) + actor = rbac.Subject{ + ID: uuid.NewString(), + Roles: rbac.RoleIdentifiers{rbac.RoleOwner()}, + Groups: []string{}, + Scope: rbac.ScopeAll, + } + ctx = dbauthz.As(context.Background(), actor) + ) + + db.EXPECT().Wrappers().Times(1).Return([]string{}) // called by dbauthz.New q := dbauthz.New(db, &coderdtest.RecordingAuthorizer{ Wrapped: (&coderdtest.FakeAuthorizer{}).AlwaysReturn(xerrors.New("custom error")), }, slog.Make(), coderdtest.AccessControlStorePointer()) - actor := rbac.Subject{ - ID: uuid.NewString(), - Roles: rbac.RoleIdentifiers{rbac.RoleOwner()}, - Groups: []string{}, - Scope: rbac.ScopeAll, - } - u := dbgen.User(t, db, database.User{}) - o := dbgen.Organization(t, db, database.Organization{}) - tpl := dbgen.Template(t, db, database.Template{ - CreatedBy: u.ID, - OrganizationID: o.ID, - }) - w := dbgen.Workspace(t, db, database.WorkspaceTable{ - OwnerID: u.ID, - TemplateID: tpl.ID, - OrganizationID: o.ID, - }) - ctx := dbauthz.As(context.Background(), actor) + + db.EXPECT().InTx(gomock.Any(), gomock.Any()).Times(1).DoAndReturn( + func(f func(database.Store) error, _ *database.TxOptions) error { + return f(mTx) + }, + ) + mTx.EXPECT().Wrappers().Times(1).Return([]string{}) + mTx.EXPECT().GetWorkspaceByID(gomock.Any(), gomock.Any()).Times(1).Return(w, nil) err := q.InTx(func(tx database.Store) error { // The inner tx should use the parent's authz _, err := tx.GetWorkspaceByID(ctx, w.ID) return err }, nil) - require.Error(t, err, "must error") + require.ErrorContains(t, err, "custom error", "must be our custom error") require.ErrorAs(t, err, &dbauthz.NotAuthorizedError{}, "must be an authorized error") require.True(t, dbauthz.IsNotAuthorizedError(err), "must be an authorized error") } @@ -120,24 +127,18 @@ func TestNew(t *testing.T) { t.Parallel() var ( - db, _ = dbtestutil.NewDB(t) + ctrl = gomock.NewController(t) + db = dbmock.NewMockStore(ctrl) + faker = gofakeit.New(0) rec = &coderdtest.RecordingAuthorizer{ Wrapped: &coderdtest.FakeAuthorizer{}, } subj = rbac.Subject{} ctx = dbauthz.As(context.Background(), rbac.Subject{}) ) - u := dbgen.User(t, db, database.User{}) - org := dbgen.Organization(t, db, database.Organization{}) - tpl := dbgen.Template(t, db, database.Template{ - OrganizationID: org.ID, - CreatedBy: u.ID, - }) - exp := dbgen.Workspace(t, db, database.WorkspaceTable{ - OwnerID: u.ID, - OrganizationID: org.ID, - TemplateID: tpl.ID, - }) + db.EXPECT().Wrappers().Times(1).Return([]string{}).Times(2) // two calls to New() + exp := testutil.Fake(t, faker, database.Workspace{}) + db.EXPECT().GetWorkspaceByID(gomock.Any(), exp.ID).Times(1).Return(exp, nil) // Double wrap should not cause an actual double wrap. So only 1 rbac call // should be made. az := dbauthz.New(db, rec, slog.Make(), coderdtest.AccessControlStorePointer()) @@ -145,7 +146,7 @@ func TestNew(t *testing.T) { w, err := az.GetWorkspaceByID(ctx, exp.ID) require.NoError(t, err, "must not error") - require.Equal(t, exp, w.WorkspaceTable(), "must be equal") + require.Equal(t, exp, w, "must be equal") rec.AssertActor(t, subj, rec.Pair(policy.ActionRead, exp)) require.NoError(t, rec.AllAsserted(), "should only be 1 rbac call") @@ -154,6 +155,8 @@ func TestNew(t *testing.T) { // TestDBAuthzRecursive is a simple test to search for infinite recursion // bugs. It isn't perfect, and only catches a subset of the possible bugs // as only the first db call will be made. But it is better than nothing. +// This can be removed when all tests in this package are migrated to +// dbmock as it will immediately detect recursive calls. func TestDBAuthzRecursive(t *testing.T) { t.Parallel() db, _ := dbtestutil.NewDB(t) @@ -2174,6 +2177,10 @@ func (s *MethodTestSuite) TestWorkspace() { dbm.EXPECT().GetWorkspaceAgentDevcontainersByAgentID(gomock.Any(), agt.ID).Return([]database.WorkspaceAgentDevcontainer{d}, nil).AnyTimes() check.Args(agt.ID).Asserts(w, policy.ActionRead).Returns([]database.WorkspaceAgentDevcontainer{d}) })) + s.Run("GetRegularWorkspaceCreateMetrics", s.Subtest(func(_ database.Store, check *expects) { + check.Args(). + Asserts(rbac.ResourceWorkspace.All(), policy.ActionRead) + })) } func (s *MethodTestSuite) TestWorkspacePortSharing() { diff --git a/coderd/database/dbmetrics/querymetrics.go b/coderd/database/dbmetrics/querymetrics.go index 4b5e953d771dd..3f729acdccf23 100644 --- a/coderd/database/dbmetrics/querymetrics.go +++ b/coderd/database/dbmetrics/querymetrics.go @@ -1356,6 +1356,13 @@ func (m queryMetricsStore) GetQuotaConsumedForUser(ctx context.Context, ownerID return consumed, err } +func (m queryMetricsStore) GetRegularWorkspaceCreateMetrics(ctx context.Context) ([]database.GetRegularWorkspaceCreateMetricsRow, error) { + start := time.Now() + r0, r1 := m.s.GetRegularWorkspaceCreateMetrics(ctx) + m.queryLatencies.WithLabelValues("GetRegularWorkspaceCreateMetrics").Observe(time.Since(start).Seconds()) + return r0, r1 +} + func (m queryMetricsStore) GetReplicaByID(ctx context.Context, id uuid.UUID) (database.Replica, error) { start := time.Now() replica, err := m.s.GetReplicaByID(ctx, id) diff --git a/coderd/database/dbmock/dbmock.go b/coderd/database/dbmock/dbmock.go index 02415d6cb8ea4..4f01933baf42b 100644 --- a/coderd/database/dbmock/dbmock.go +++ b/coderd/database/dbmock/dbmock.go @@ -2851,6 +2851,21 @@ func (mr *MockStoreMockRecorder) GetQuotaConsumedForUser(ctx, arg any) *gomock.C return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetQuotaConsumedForUser", reflect.TypeOf((*MockStore)(nil).GetQuotaConsumedForUser), ctx, arg) } +// GetRegularWorkspaceCreateMetrics mocks base method. +func (m *MockStore) GetRegularWorkspaceCreateMetrics(ctx context.Context) ([]database.GetRegularWorkspaceCreateMetricsRow, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "GetRegularWorkspaceCreateMetrics", ctx) + ret0, _ := ret[0].([]database.GetRegularWorkspaceCreateMetricsRow) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// GetRegularWorkspaceCreateMetrics indicates an expected call of GetRegularWorkspaceCreateMetrics. +func (mr *MockStoreMockRecorder) GetRegularWorkspaceCreateMetrics(ctx any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetRegularWorkspaceCreateMetrics", reflect.TypeOf((*MockStore)(nil).GetRegularWorkspaceCreateMetrics), ctx) +} + // GetReplicaByID mocks base method. func (m *MockStore) GetReplicaByID(ctx context.Context, id uuid.UUID) (database.Replica, error) { m.ctrl.T.Helper() diff --git a/coderd/database/querier.go b/coderd/database/querier.go index 28ed7609c53d6..6e955b82b0bce 100644 --- a/coderd/database/querier.go +++ b/coderd/database/querier.go @@ -306,6 +306,9 @@ type sqlcQuerier interface { GetProvisionerLogsAfterID(ctx context.Context, arg GetProvisionerLogsAfterIDParams) ([]ProvisionerJobLog, error) GetQuotaAllowanceForUser(ctx context.Context, arg GetQuotaAllowanceForUserParams) (int64, error) GetQuotaConsumedForUser(ctx context.Context, arg GetQuotaConsumedForUserParams) (int64, error) + // Count regular workspaces: only those whose first successful 'start' build + // was not initiated by the prebuild system user. + GetRegularWorkspaceCreateMetrics(ctx context.Context) ([]GetRegularWorkspaceCreateMetricsRow, error) GetReplicaByID(ctx context.Context, id uuid.UUID) (Replica, error) GetReplicasUpdatedAfter(ctx context.Context, updatedAt time.Time) ([]Replica, error) GetRunningPrebuiltWorkspaces(ctx context.Context) ([]GetRunningPrebuiltWorkspacesRow, error) diff --git a/coderd/database/queries.sql.go b/coderd/database/queries.sql.go index d527d90887093..d5495c4df5a8c 100644 --- a/coderd/database/queries.sql.go +++ b/coderd/database/queries.sql.go @@ -7309,7 +7309,7 @@ const getPrebuildMetrics = `-- name: GetPrebuildMetrics :many SELECT t.name as template_name, tvp.name as preset_name, - o.name as organization_name, + o.name as organization_name, COUNT(*) as created_count, COUNT(*) FILTER (WHERE pj.job_status = 'failed'::provisioner_job_status) as failed_count, COUNT(*) FILTER ( @@ -20131,6 +20131,75 @@ func (q *sqlQuerier) GetDeploymentWorkspaceStats(ctx context.Context) (GetDeploy return i, err } +const getRegularWorkspaceCreateMetrics = `-- name: GetRegularWorkspaceCreateMetrics :many +WITH first_success_build AS ( + -- Earliest successful 'start' build per workspace + SELECT DISTINCT ON (wb.workspace_id) + wb.workspace_id, + wb.template_version_preset_id, + wb.initiator_id + FROM workspace_builds wb + JOIN provisioner_jobs pj ON pj.id = wb.job_id + WHERE + wb.transition = 'start'::workspace_transition + AND pj.job_status = 'succeeded'::provisioner_job_status + ORDER BY wb.workspace_id, wb.build_number, wb.id +) +SELECT + t.name AS template_name, + COALESCE(tvp.name, '') AS preset_name, + o.name AS organization_name, + COUNT(*) AS created_count +FROM first_success_build fsb + JOIN workspaces w ON w.id = fsb.workspace_id + JOIN templates t ON t.id = w.template_id + LEFT JOIN template_version_presets tvp ON tvp.id = fsb.template_version_preset_id + JOIN organizations o ON o.id = w.organization_id +WHERE + NOT t.deleted + -- Exclude workspaces whose first successful start was the prebuilds system user + AND fsb.initiator_id != 'c42fdf75-3097-471c-8c33-fb52454d81c0'::uuid +GROUP BY t.name, COALESCE(tvp.name, ''), o.name +ORDER BY t.name, preset_name, o.name +` + +type GetRegularWorkspaceCreateMetricsRow struct { + TemplateName string `db:"template_name" json:"template_name"` + PresetName string `db:"preset_name" json:"preset_name"` + OrganizationName string `db:"organization_name" json:"organization_name"` + CreatedCount int64 `db:"created_count" json:"created_count"` +} + +// Count regular workspaces: only those whose first successful 'start' build +// was not initiated by the prebuild system user. +func (q *sqlQuerier) GetRegularWorkspaceCreateMetrics(ctx context.Context) ([]GetRegularWorkspaceCreateMetricsRow, error) { + rows, err := q.db.QueryContext(ctx, getRegularWorkspaceCreateMetrics) + if err != nil { + return nil, err + } + defer rows.Close() + var items []GetRegularWorkspaceCreateMetricsRow + for rows.Next() { + var i GetRegularWorkspaceCreateMetricsRow + if err := rows.Scan( + &i.TemplateName, + &i.PresetName, + &i.OrganizationName, + &i.CreatedCount, + ); err != nil { + return nil, err + } + items = append(items, i) + } + if err := rows.Close(); err != nil { + return nil, err + } + if err := rows.Err(); err != nil { + return nil, err + } + return items, nil +} + const getWorkspaceACLByID = `-- name: GetWorkspaceACLByID :one SELECT group_acl as groups, diff --git a/coderd/database/queries/prebuilds.sql b/coderd/database/queries/prebuilds.sql index 8654453554e8c..2ad7f41d41fea 100644 --- a/coderd/database/queries/prebuilds.sql +++ b/coderd/database/queries/prebuilds.sql @@ -230,7 +230,7 @@ HAVING COUNT(*) = @hard_limit::bigint; SELECT t.name as template_name, tvp.name as preset_name, - o.name as organization_name, + o.name as organization_name, COUNT(*) as created_count, COUNT(*) FILTER (WHERE pj.job_status = 'failed'::provisioner_job_status) as failed_count, COUNT(*) FILTER ( diff --git a/coderd/database/queries/workspaces.sql b/coderd/database/queries/workspaces.sql index 802bded5b836b..80d8c7b920d74 100644 --- a/coderd/database/queries/workspaces.sql +++ b/coderd/database/queries/workspaces.sql @@ -923,3 +923,36 @@ SET user_acl = @user_acl WHERE id = @id; + +-- name: GetRegularWorkspaceCreateMetrics :many +-- Count regular workspaces: only those whose first successful 'start' build +-- was not initiated by the prebuild system user. +WITH first_success_build AS ( + -- Earliest successful 'start' build per workspace + SELECT DISTINCT ON (wb.workspace_id) + wb.workspace_id, + wb.template_version_preset_id, + wb.initiator_id + FROM workspace_builds wb + JOIN provisioner_jobs pj ON pj.id = wb.job_id + WHERE + wb.transition = 'start'::workspace_transition + AND pj.job_status = 'succeeded'::provisioner_job_status + ORDER BY wb.workspace_id, wb.build_number, wb.id +) +SELECT + t.name AS template_name, + COALESCE(tvp.name, '') AS preset_name, + o.name AS organization_name, + COUNT(*) AS created_count +FROM first_success_build fsb + JOIN workspaces w ON w.id = fsb.workspace_id + JOIN templates t ON t.id = w.template_id + LEFT JOIN template_version_presets tvp ON tvp.id = fsb.template_version_preset_id + JOIN organizations o ON o.id = w.organization_id +WHERE + NOT t.deleted + -- Exclude workspaces whose first successful start was the prebuilds system user + AND fsb.initiator_id != 'c42fdf75-3097-471c-8c33-fb52454d81c0'::uuid +GROUP BY t.name, COALESCE(tvp.name, ''), o.name +ORDER BY t.name, preset_name, o.name; diff --git a/coderd/prometheusmetrics/prometheusmetrics.go b/coderd/prometheusmetrics/prometheusmetrics.go index 6ea8615f3779a..ed55e4598dc21 100644 --- a/coderd/prometheusmetrics/prometheusmetrics.go +++ b/coderd/prometheusmetrics/prometheusmetrics.go @@ -165,6 +165,18 @@ func Workspaces(ctx context.Context, logger slog.Logger, registerer prometheus.R return nil, err } + workspaceCreationTotal := prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: "coderd", + Name: "workspace_creation_total", + Help: "Total regular (non-prebuilt) workspace creations by organization, template, and preset.", + }, + []string{"organization_name", "template_name", "preset_name"}, + ) + if err := registerer.Register(workspaceCreationTotal); err != nil { + return nil, err + } + ctx, cancelFunc := context.WithCancel(ctx) done := make(chan struct{}) @@ -200,6 +212,27 @@ func Workspaces(ctx context.Context, logger slog.Logger, registerer prometheus.R string(w.LatestBuildTransition), ).Add(1) } + + // Update regular workspaces (without a prebuild transition) creation counter + regularWorkspaces, err := db.GetRegularWorkspaceCreateMetrics(ctx) + if err != nil { + if errors.Is(err, sql.ErrNoRows) { + workspaceCreationTotal.Reset() + } else { + logger.Warn(ctx, "failed to load regular workspaces for metrics", slog.Error(err)) + } + return + } + + workspaceCreationTotal.Reset() + + for _, regularWorkspace := range regularWorkspaces { + workspaceCreationTotal.WithLabelValues( + regularWorkspace.OrganizationName, + regularWorkspace.TemplateName, + regularWorkspace.PresetName, + ).Add(float64(regularWorkspace.CreatedCount)) + } } // Use time.Nanosecond to force an initial tick. It will be reset to the diff --git a/coderd/prometheusmetrics/prometheusmetrics_test.go b/coderd/prometheusmetrics/prometheusmetrics_test.go index 28046c1dff3fb..3d8704f92460d 100644 --- a/coderd/prometheusmetrics/prometheusmetrics_test.go +++ b/coderd/prometheusmetrics/prometheusmetrics_test.go @@ -424,6 +424,107 @@ func TestWorkspaceLatestBuildStatuses(t *testing.T) { } } +func TestWorkspaceCreationTotal(t *testing.T) { + t.Parallel() + + for _, tc := range []struct { + Name string + Database func() database.Store + ExpectedWorkspaces int + }{ + { + Name: "None", + Database: func() database.Store { + db, _ := dbtestutil.NewDB(t) + return db + }, + ExpectedWorkspaces: 0, + }, + { + // Should count only the successfully created workspaces + Name: "Multiple", + Database: func() database.Store { + db, _ := dbtestutil.NewDB(t) + u := dbgen.User(t, db, database.User{}) + org := dbgen.Organization(t, db, database.Organization{}) + insertTemplates(t, db, u, org) + insertCanceled(t, db, u, org) + insertFailed(t, db, u, org) + insertFailed(t, db, u, org) + insertSuccess(t, db, u, org) + insertSuccess(t, db, u, org) + insertSuccess(t, db, u, org) + insertRunning(t, db, u, org) + return db + }, + ExpectedWorkspaces: 3, + }, + { + // Should not include prebuilt workspaces + Name: "MultipleWithPrebuild", + Database: func() database.Store { + ctx := context.Background() + db, _ := dbtestutil.NewDB(t) + u := dbgen.User(t, db, database.User{}) + prebuildUser, err := db.GetUserByID(ctx, database.PrebuildsSystemUserID) + require.NoError(t, err) + org := dbgen.Organization(t, db, database.Organization{}) + insertTemplates(t, db, u, org) + insertCanceled(t, db, u, org) + insertFailed(t, db, u, org) + insertSuccess(t, db, u, org) + insertSuccess(t, db, prebuildUser, org) + insertRunning(t, db, u, org) + return db + }, + ExpectedWorkspaces: 1, + }, + { + // Should include deleted workspaces + Name: "MultipleWithDeleted", + Database: func() database.Store { + db, _ := dbtestutil.NewDB(t) + u := dbgen.User(t, db, database.User{}) + org := dbgen.Organization(t, db, database.Organization{}) + insertTemplates(t, db, u, org) + insertCanceled(t, db, u, org) + insertFailed(t, db, u, org) + insertSuccess(t, db, u, org) + insertRunning(t, db, u, org) + insertDeleted(t, db, u, org) + return db + }, + ExpectedWorkspaces: 2, + }, + } { + t.Run(tc.Name, func(t *testing.T) { + t.Parallel() + registry := prometheus.NewRegistry() + closeFunc, err := prometheusmetrics.Workspaces(context.Background(), testutil.Logger(t), registry, tc.Database(), testutil.IntervalFast) + require.NoError(t, err) + t.Cleanup(closeFunc) + + require.Eventually(t, func() bool { + metrics, err := registry.Gather() + assert.NoError(t, err) + + sum := 0 + for _, m := range metrics { + if m.GetName() != "coderd_workspace_creation_total" { + continue + } + for _, metric := range m.Metric { + sum += int(metric.GetCounter().GetValue()) + } + } + + t.Logf("count = %d, expected == %d", sum, tc.ExpectedWorkspaces) + return sum == tc.ExpectedWorkspaces + }, testutil.WaitShort, testutil.IntervalFast) + }) + } +} + func TestAgents(t *testing.T) { t.Parallel() @@ -897,6 +998,7 @@ func insertRunning(t *testing.T, db database.Store, u database.User, org databas Transition: database.WorkspaceTransitionStart, Reason: database.BuildReasonInitiator, TemplateVersionID: templateVersionID, + InitiatorID: u.ID, }) require.NoError(t, err) // This marks the job as started. diff --git a/coderd/provisionerdserver/metrics.go b/coderd/provisionerdserver/metrics.go new file mode 100644 index 0000000000000..67bd997055e1a --- /dev/null +++ b/coderd/provisionerdserver/metrics.go @@ -0,0 +1,177 @@ +package provisionerdserver + +import ( + "context" + "time" + + "github.com/prometheus/client_golang/prometheus" + + "cdr.dev/slog" +) + +type Metrics struct { + logger slog.Logger + workspaceCreationTimings *prometheus.HistogramVec + workspaceClaimTimings *prometheus.HistogramVec +} + +type WorkspaceTimingType int + +const ( + Unsupported WorkspaceTimingType = iota + WorkspaceCreation + PrebuildCreation + PrebuildClaim +) + +const ( + workspaceTypeRegular = "regular" + workspaceTypePrebuild = "prebuild" +) + +type WorkspaceTimingFlags struct { + IsPrebuild bool + IsClaim bool + IsFirstBuild bool +} + +func NewMetrics(logger slog.Logger) *Metrics { + log := logger.Named("provisionerd_server_metrics") + + return &Metrics{ + logger: log, + workspaceCreationTimings: prometheus.NewHistogramVec(prometheus.HistogramOpts{ + Namespace: "coderd", + Name: "workspace_creation_duration_seconds", + Help: "Time to create a workspace by organization, template, preset, and type (regular or prebuild).", + Buckets: []float64{ + 1, // 1s + 10, + 30, + 60, // 1min + 60 * 5, + 60 * 10, + 60 * 30, // 30min + 60 * 60, // 1hr + }, + NativeHistogramBucketFactor: 1.1, + // Max number of native buckets kept at once to bound memory. + NativeHistogramMaxBucketNumber: 100, + // Merge/flush small buckets periodically to control churn. + NativeHistogramMinResetDuration: time.Hour, + // Treat tiny values as zero (helps with noisy near-zero latencies). + NativeHistogramZeroThreshold: 0, + NativeHistogramMaxZeroThreshold: 0, + }, []string{"organization_name", "template_name", "preset_name", "type"}), + workspaceClaimTimings: prometheus.NewHistogramVec(prometheus.HistogramOpts{ + Namespace: "coderd", + Name: "prebuilt_workspace_claim_duration_seconds", + Help: "Time to claim a prebuilt workspace by organization, template, and preset.", + // Higher resolution between 1–5m to show typical prebuild claim times. + // Cap at 5m since longer claims diminish prebuild value. + Buckets: []float64{ + 1, // 1s + 5, + 10, + 20, + 30, + 60, // 1m + 120, // 2m + 180, // 3m + 240, // 4m + 300, // 5m + }, + NativeHistogramBucketFactor: 1.1, + // Max number of native buckets kept at once to bound memory. + NativeHistogramMaxBucketNumber: 100, + // Merge/flush small buckets periodically to control churn. + NativeHistogramMinResetDuration: time.Hour, + // Treat tiny values as zero (helps with noisy near-zero latencies). + NativeHistogramZeroThreshold: 0, + NativeHistogramMaxZeroThreshold: 0, + }, []string{"organization_name", "template_name", "preset_name"}), + } +} + +func (m *Metrics) Register(reg prometheus.Registerer) error { + if err := reg.Register(m.workspaceCreationTimings); err != nil { + return err + } + return reg.Register(m.workspaceClaimTimings) +} + +func (f WorkspaceTimingFlags) count() int { + count := 0 + if f.IsPrebuild { + count++ + } + if f.IsClaim { + count++ + } + if f.IsFirstBuild { + count++ + } + return count +} + +// getWorkspaceTimingType returns the type of the workspace build: +// - isPrebuild: if the workspace build corresponds to the creation of a prebuilt workspace +// - isClaim: if the workspace build corresponds to the claim of a prebuilt workspace +// - isWorkspaceFirstBuild: if the workspace build corresponds to the creation of a regular workspace +// (not created from the prebuild pool) +func getWorkspaceTimingType(flags WorkspaceTimingFlags) WorkspaceTimingType { + switch { + case flags.IsPrebuild: + return PrebuildCreation + case flags.IsClaim: + return PrebuildClaim + case flags.IsFirstBuild: + return WorkspaceCreation + default: + return Unsupported + } +} + +// UpdateWorkspaceTimingsMetrics updates the workspace timing metrics based on the workspace build type +func (m *Metrics) UpdateWorkspaceTimingsMetrics( + ctx context.Context, + flags WorkspaceTimingFlags, + organizationName string, + templateName string, + presetName string, + buildTime float64, +) { + m.logger.Debug(ctx, "update workspace timings metrics", + "organizationName", organizationName, + "templateName", templateName, + "presetName", presetName, + "isPrebuild", flags.IsPrebuild, + "isClaim", flags.IsClaim, + "isWorkspaceFirstBuild", flags.IsFirstBuild) + + if flags.count() > 1 { + m.logger.Warn(ctx, "invalid workspace timing flags", + "isPrebuild", flags.IsPrebuild, + "isClaim", flags.IsClaim, + "isWorkspaceFirstBuild", flags.IsFirstBuild) + return + } + + workspaceTimingType := getWorkspaceTimingType(flags) + switch workspaceTimingType { + case WorkspaceCreation: + // Regular workspace creation (without prebuild pool) + m.workspaceCreationTimings. + WithLabelValues(organizationName, templateName, presetName, workspaceTypeRegular).Observe(buildTime) + case PrebuildCreation: + // Prebuilt workspace creation duration + m.workspaceCreationTimings. + WithLabelValues(organizationName, templateName, presetName, workspaceTypePrebuild).Observe(buildTime) + case PrebuildClaim: + // Prebuilt workspace claim duration + m.workspaceClaimTimings. + WithLabelValues(organizationName, templateName, presetName).Observe(buildTime) + default: + m.logger.Warn(ctx, "unsupported workspace timing flags") + } +} diff --git a/coderd/provisionerdserver/provisionerdserver.go b/coderd/provisionerdserver/provisionerdserver.go index 938fdf1774008..4685dad881674 100644 --- a/coderd/provisionerdserver/provisionerdserver.go +++ b/coderd/provisionerdserver/provisionerdserver.go @@ -129,6 +129,8 @@ type server struct { heartbeatInterval time.Duration heartbeatFn func(ctx context.Context) error + + metrics *Metrics } // We use the null byte (0x00) in generating a canonical map key for tags, so @@ -178,6 +180,7 @@ func NewServer( options Options, enqueuer notifications.Enqueuer, prebuildsOrchestrator *atomic.Pointer[prebuilds.ReconciliationOrchestrator], + metrics *Metrics, ) (proto.DRPCProvisionerDaemonServer, error) { // Fail-fast if pointers are nil if lifecycleCtx == nil { @@ -248,6 +251,7 @@ func NewServer( heartbeatFn: options.HeartbeatFn, PrebuildsOrchestrator: prebuildsOrchestrator, UsageInserter: usageInserter, + metrics: metrics, } if s.heartbeatFn == nil { @@ -2281,6 +2285,50 @@ func (s *server) completeWorkspaceBuildJob(ctx context.Context, job database.Pro } } + // Update workspace (regular and prebuild) timing metrics + if s.metrics != nil { + // Only consider 'start' workspace builds + if workspaceBuild.Transition == database.WorkspaceTransitionStart { + // Get the updated job to report the metrics with correct data + updatedJob, err := s.Database.GetProvisionerJobByID(ctx, jobID) + if err != nil { + s.Logger.Error(ctx, "get updated job from database", slog.Error(err)) + } else + // Only consider 'succeeded' provisioner jobs + if updatedJob.JobStatus == database.ProvisionerJobStatusSucceeded { + presetName := "" + if workspaceBuild.TemplateVersionPresetID.Valid { + preset, err := s.Database.GetPresetByID(ctx, workspaceBuild.TemplateVersionPresetID.UUID) + if err != nil { + if !errors.Is(err, sql.ErrNoRows) { + s.Logger.Error(ctx, "get preset by ID for workspace timing metrics", slog.Error(err)) + } + } else { + presetName = preset.Name + } + } + + buildTime := updatedJob.CompletedAt.Time.Sub(updatedJob.StartedAt.Time).Seconds() + s.metrics.UpdateWorkspaceTimingsMetrics( + ctx, + WorkspaceTimingFlags{ + // Is a prebuilt workspace creation build + IsPrebuild: input.PrebuiltWorkspaceBuildStage.IsPrebuild(), + // Is a prebuilt workspace claim build + IsClaim: input.PrebuiltWorkspaceBuildStage.IsPrebuiltWorkspaceClaim(), + // Is a regular workspace creation build + // Only consider the first build number for regular workspaces + IsFirstBuild: workspaceBuild.BuildNumber == 1, + }, + workspace.OrganizationName, + workspace.TemplateName, + presetName, + buildTime, + ) + } + } + } + msg, err := json.Marshal(wspubsub.WorkspaceEvent{ Kind: wspubsub.WorkspaceEventKindStateChange, WorkspaceID: workspace.ID, diff --git a/coderd/provisionerdserver/provisionerdserver_test.go b/coderd/provisionerdserver/provisionerdserver_test.go index 98af0bb86a73f..914f6dd024193 100644 --- a/coderd/provisionerdserver/provisionerdserver_test.go +++ b/coderd/provisionerdserver/provisionerdserver_test.go @@ -4144,6 +4144,7 @@ func setup(t *testing.T, ignoreLogErrors bool, ov *overrides) (proto.DRPCProvisi }, notifEnq, &op, + provisionerdserver.NewMetrics(logger), ) require.NoError(t, err) return srv, db, ps, daemon diff --git a/coderd/taskname/taskname.go b/coderd/taskname/taskname.go index dff57dfd0c7f5..734c23eb3dd76 100644 --- a/coderd/taskname/taskname.go +++ b/coderd/taskname/taskname.go @@ -24,7 +24,7 @@ const ( Requirements: - Only lowercase letters, numbers, and hyphens - Start with "task-" -- Maximum 28 characters total +- Maximum 27 characters total - Descriptive of the main task Examples: @@ -145,17 +145,23 @@ func Generate(ctx context.Context, prompt string, opts ...Option) (string, error return "", ErrNoNameGenerated } - generatedName := acc.Messages()[0].Content - - if err := codersdk.NameValid(generatedName); err != nil { - return "", xerrors.Errorf("generated name %v not valid: %w", generatedName, err) + taskName := acc.Messages()[0].Content + if taskName == "task-unnamed" { + return "", ErrNoNameGenerated } - if generatedName == "task-unnamed" { - return "", ErrNoNameGenerated + // We append a suffix to the end of the task name to reduce + // the chance of collisions. We truncate the task name to + // to a maximum of 27 bytes, so that when we append the + // 5 byte suffix (`-` and 4 byte hex slug), it should + // remain within the 32 byte workspace name limit. + taskName = taskName[:min(len(taskName), 27)] + taskName = fmt.Sprintf("%s-%s", taskName, generateSuffix()) + if err := codersdk.NameValid(taskName); err != nil { + return "", xerrors.Errorf("generated name %v not valid: %w", taskName, err) } - return fmt.Sprintf("%s-%s", generatedName, generateSuffix()), nil + return taskName, nil } func anthropicDataStream(ctx context.Context, client anthropic.Client, model anthropic.Model, input []aisdk.Message) (aisdk.DataStream, error) { diff --git a/docs/admin/infrastructure/scale-utility.md b/docs/admin/infrastructure/scale-utility.md index b66e7fca41394..6945b54bf559e 100644 --- a/docs/admin/infrastructure/scale-utility.md +++ b/docs/admin/infrastructure/scale-utility.md @@ -44,7 +44,7 @@ environments. > for your users. > To avoid potential outages and orphaned resources, we recommend that you run > scale tests on a secondary "staging" environment or a dedicated -> [Kubernetes playground cluster](https://github.com/coder/coder/tree/main/scaletest/terraform). +> Kubernetes playground cluster. > > Run it against a production environment at your own risk. diff --git a/docs/admin/integrations/prometheus.md b/docs/admin/integrations/prometheus.md index ac88c8c5beda7..47fbc575c7c2e 100644 --- a/docs/admin/integrations/prometheus.md +++ b/docs/admin/integrations/prometheus.md @@ -143,9 +143,12 @@ deployment. They will always be available from the agent. | `coderd_oauth2_external_requests_rate_limit_total` | gauge | DEPRECATED: use coderd_oauth2_external_requests_rate_limit instead | `name` `resource` | | `coderd_oauth2_external_requests_rate_limit_used` | gauge | The number of requests made in this interval. | `name` `resource` | | `coderd_oauth2_external_requests_total` | counter | The total number of api calls made to external oauth2 providers. 'status_code' will be 0 if the request failed with no response. | `name` `source` `status_code` | +| `coderd_prebuilt_workspace_claim_duration_seconds` | histogram | Time to claim a prebuilt workspace by organization, template, and preset. | `organization_name` `preset_name` `template_name` | | `coderd_provisionerd_job_timings_seconds` | histogram | The provisioner job time duration in seconds. | `provisioner` `status` | | `coderd_provisionerd_jobs_current` | gauge | The number of currently running provisioner jobs. | `provisioner` | | `coderd_workspace_builds_total` | counter | The number of workspaces started, updated, or deleted. | `action` `owner_email` `status` `template_name` `template_version` `workspace_name` | +| `coderd_workspace_creation_duration_seconds` | histogram | Time to create a workspace by organization, template, preset, and type (regular or prebuild). | `organization_name` `preset_name` `template_name` `type` | +| `coderd_workspace_creation_total` | counter | Total regular (non-prebuilt) workspace creations by organization, template, and preset. | `organization_name` `preset_name` `template_name` | | `coderd_workspace_latest_build_status` | gauge | The current workspace statuses by template, transition, and owner. | `status` `template_name` `template_version` `workspace_owner` `workspace_transition` | | `go_gc_duration_seconds` | summary | A summary of the pause duration of garbage collection cycles. | | | `go_goroutines` | gauge | Number of goroutines that currently exist. | | @@ -185,3 +188,19 @@ deployment. They will always be available from the agent. | `promhttp_metric_handler_requests_total` | counter | Total number of scrapes by HTTP status code. | `code` | + +### Note on Prometheus native histogram support + +The following metrics support native histograms: + +* `coderd_workspace_creation_duration_seconds` +* `coderd_prebuilt_workspace_claim_duration_seconds` + +Native histograms are an **experimental** Prometheus feature that removes the need to predefine bucket boundaries and allows higher-resolution buckets that adapt to deployment characteristics. +Whether a metric is exposed as classic or native depends entirely on the Prometheus server configuration (see [Prometheus docs](https://prometheus.io/docs/specs/native_histograms/) for details): + +* If native histograms are enabled, Prometheus ingests the high-resolution histogram. +* If not, it falls back to the predefined buckets. + +⚠️ Important: classic and native histograms cannot be aggregated together. If Prometheus is switched from classic to native at a certain point in time, dashboards may need to account for that transition. +For this reason, it’s recommended to follow [Prometheus’ migration guidelines](https://prometheus.io/docs/specs/native_histograms/#migration-considerations) when moving from classic to native histograms. diff --git a/docs/admin/templates/extending-templates/prebuilt-workspaces.md b/docs/admin/templates/extending-templates/prebuilt-workspaces.md index bf80ca479254a..61734679d4c7d 100644 --- a/docs/admin/templates/extending-templates/prebuilt-workspaces.md +++ b/docs/admin/templates/extending-templates/prebuilt-workspaces.md @@ -300,6 +300,7 @@ Coder provides several metrics to monitor your prebuilt workspaces: - `coderd_prebuilt_workspaces_desired` (gauge): Target number of prebuilt workspaces that should be available. - `coderd_prebuilt_workspaces_running` (gauge): Current number of prebuilt workspaces in a `running` state. - `coderd_prebuilt_workspaces_eligible` (gauge): Current number of prebuilt workspaces eligible to be claimed. +- `coderd_prebuilt_workspace_claim_duration_seconds` ([_native histogram_](https://prometheus.io/docs/specs/native_histograms) support): Time to claim a prebuilt workspace from the prebuild pool. #### Logs diff --git a/docs/ai-coder/tasks.md b/docs/ai-coder/tasks.md index 43c4becdf8be1..ef47a6b3fb874 100644 --- a/docs/ai-coder/tasks.md +++ b/docs/ai-coder/tasks.md @@ -82,6 +82,10 @@ If a workspace app has the special `"preview"` slug, a navbar will appear above We plan to introduce more customization options in future releases. +## Automatically name your tasks + +Coder can automatically generate a name your tasks if you set the `ANTHROPIC_API_KEY` environment variable on the Coder server. Otherwise, tasks will be given randomly generated names. + ## Opting out of Tasks If you tried Tasks and decided you don't want to use it, you can hide the Tasks tab by starting `coder server` with the `CODER_HIDE_AI_TASKS=true` environment variable or the `--hide-ai-tasks` flag. diff --git a/docs/manifest.json b/docs/manifest.json index 4d2a62c994c88..d2cd11ace699b 100644 --- a/docs/manifest.json +++ b/docs/manifest.json @@ -542,7 +542,7 @@ "title": "External Workspaces", "description": "Learn how to manage external workspaces", "path": "./admin/templates/managing-templates/external-workspaces.md", - "state": ["early access"] + "state": ["premium", "early access"] } ] }, diff --git a/enterprise/coderd/provisionerdaemons.go b/enterprise/coderd/provisionerdaemons.go index 65b03a7d6b864..be03af29293f9 100644 --- a/enterprise/coderd/provisionerdaemons.go +++ b/enterprise/coderd/provisionerdaemons.go @@ -361,6 +361,7 @@ func (api *API) provisionerDaemonServe(rw http.ResponseWriter, r *http.Request) }, api.NotificationsEnqueuer, &api.AGPL.PrebuildsReconciler, + api.ProvisionerdServerMetrics, ) if err != nil { if !xerrors.Is(err, context.Canceled) { diff --git a/enterprise/coderd/workspaces_test.go b/enterprise/coderd/workspaces_test.go index 12a45cba952e2..31821bb798de9 100644 --- a/enterprise/coderd/workspaces_test.go +++ b/enterprise/coderd/workspaces_test.go @@ -26,6 +26,7 @@ import ( "github.com/coder/coder/v2/coderd/audit" "github.com/coder/coder/v2/coderd/autobuild" "github.com/coder/coder/v2/coderd/coderdtest" + "github.com/coder/coder/v2/coderd/coderdtest/promhelp" "github.com/coder/coder/v2/coderd/database" "github.com/coder/coder/v2/coderd/database/dbauthz" "github.com/coder/coder/v2/coderd/database/dbfake" @@ -2873,6 +2874,133 @@ func TestPrebuildActivityBump(t *testing.T) { require.Zero(t, workspace.LatestBuild.MaxDeadline) } +func TestWorkspaceProvisionerdServerMetrics(t *testing.T) { + t.Parallel() + + // Setup + log := testutil.Logger(t) + reg := prometheus.NewRegistry() + provisionerdserverMetrics := provisionerdserver.NewMetrics(log) + err := provisionerdserverMetrics.Register(reg) + require.NoError(t, err) + client, db, owner := coderdenttest.NewWithDatabase(t, &coderdenttest.Options{ + Options: &coderdtest.Options{ + IncludeProvisionerDaemon: true, + ProvisionerdServerMetrics: provisionerdserverMetrics, + }, + LicenseOptions: &coderdenttest.LicenseOptions{ + Features: license.Features{ + codersdk.FeatureWorkspacePrebuilds: 1, + }, + }, + }) + + // Given: a template and a template version with a preset without prebuild instances + presetNoPrebuildID := uuid.New() + versionNoPrebuild := coderdtest.CreateTemplateVersion(t, client, owner.OrganizationID, nil) + _ = coderdtest.AwaitTemplateVersionJobCompleted(t, client, versionNoPrebuild.ID) + templateNoPrebuild := coderdtest.CreateTemplate(t, client, owner.OrganizationID, versionNoPrebuild.ID) + presetNoPrebuild := dbgen.Preset(t, db, database.InsertPresetParams{ + ID: presetNoPrebuildID, + TemplateVersionID: versionNoPrebuild.ID, + }) + + // Given: a template and a template version with a preset with a prebuild instance + presetPrebuildID := uuid.New() + versionPrebuild := coderdtest.CreateTemplateVersion(t, client, owner.OrganizationID, nil) + _ = coderdtest.AwaitTemplateVersionJobCompleted(t, client, versionPrebuild.ID) + templatePrebuild := coderdtest.CreateTemplate(t, client, owner.OrganizationID, versionPrebuild.ID) + presetPrebuild := dbgen.Preset(t, db, database.InsertPresetParams{ + ID: presetPrebuildID, + TemplateVersionID: versionPrebuild.ID, + DesiredInstances: sql.NullInt32{Int32: 1, Valid: true}, + }) + // Given: a prebuild workspace + wb := dbfake.WorkspaceBuild(t, db, database.WorkspaceTable{ + OwnerID: database.PrebuildsSystemUserID, + TemplateID: templatePrebuild.ID, + }).Seed(database.WorkspaceBuild{ + TemplateVersionID: versionPrebuild.ID, + TemplateVersionPresetID: uuid.NullUUID{ + UUID: presetPrebuildID, + Valid: true, + }, + }).WithAgent(func(agent []*proto.Agent) []*proto.Agent { + return agent + }).Do() + + // Mark the prebuilt workspace's agent as ready so the prebuild can be claimed + // nolint:gocritic + ctx := dbauthz.AsSystemRestricted(testutil.Context(t, testutil.WaitLong)) + agent, err := db.GetWorkspaceAgentAndLatestBuildByAuthToken(ctx, uuid.MustParse(wb.AgentToken)) + require.NoError(t, err) + err = db.UpdateWorkspaceAgentLifecycleStateByID(ctx, database.UpdateWorkspaceAgentLifecycleStateByIDParams{ + ID: agent.WorkspaceAgent.ID, + LifecycleState: database.WorkspaceAgentLifecycleStateReady, + }) + require.NoError(t, err) + + organizationName, err := client.Organization(ctx, owner.OrganizationID) + require.NoError(t, err) + user, err := client.User(ctx, "testUser") + require.NoError(t, err) + + // Given: no histogram value for prebuilt workspaces claim + prebuiltWorkspaceHistogramMetric := promhelp.MetricValue(t, reg, "coderd_prebuilt_workspace_claim_duration_seconds", prometheus.Labels{ + "organization_name": organizationName.Name, + "template_name": templatePrebuild.Name, + "preset_name": presetPrebuild.Name, + }) + require.Nil(t, prebuiltWorkspaceHistogramMetric) + + // Given: the prebuilt workspace is claimed by a user + claimedWorkspace, err := client.CreateUserWorkspace(ctx, user.ID.String(), codersdk.CreateWorkspaceRequest{ + TemplateVersionID: versionPrebuild.ID, + TemplateVersionPresetID: presetPrebuildID, + Name: coderdtest.RandomUsername(t), + }) + require.NoError(t, err) + coderdtest.AwaitWorkspaceBuildJobCompleted(t, client, claimedWorkspace.LatestBuild.ID) + require.Equal(t, wb.Workspace.ID, claimedWorkspace.ID) + + // Then: the histogram value for prebuilt workspace claim should be updated + prebuiltWorkspaceHistogram := promhelp.HistogramValue(t, reg, "coderd_prebuilt_workspace_claim_duration_seconds", prometheus.Labels{ + "organization_name": organizationName.Name, + "template_name": templatePrebuild.Name, + "preset_name": presetPrebuild.Name, + }) + require.NotNil(t, prebuiltWorkspaceHistogram) + require.Equal(t, uint64(1), prebuiltWorkspaceHistogram.GetSampleCount()) + + // Given: no histogram value for regular workspaces creation + regularWorkspaceHistogramMetric := promhelp.MetricValue(t, reg, "coderd_workspace_creation_duration_seconds", prometheus.Labels{ + "organization_name": organizationName.Name, + "template_name": templateNoPrebuild.Name, + "preset_name": presetNoPrebuild.Name, + "type": "regular", + }) + require.Nil(t, regularWorkspaceHistogramMetric) + + // Given: a user creates a regular workspace (without prebuild pool) + regularWorkspace, err := client.CreateUserWorkspace(ctx, user.ID.String(), codersdk.CreateWorkspaceRequest{ + TemplateVersionID: versionNoPrebuild.ID, + TemplateVersionPresetID: presetNoPrebuildID, + Name: coderdtest.RandomUsername(t), + }) + require.NoError(t, err) + coderdtest.AwaitWorkspaceBuildJobCompleted(t, client, regularWorkspace.LatestBuild.ID) + + // Then: the histogram value for regular workspace creation should be updated + regularWorkspaceHistogram := promhelp.HistogramValue(t, reg, "coderd_workspace_creation_duration_seconds", prometheus.Labels{ + "organization_name": organizationName.Name, + "template_name": templateNoPrebuild.Name, + "preset_name": presetNoPrebuild.Name, + "type": "regular", + }) + require.NotNil(t, regularWorkspaceHistogram) + require.Equal(t, uint64(1), regularWorkspaceHistogram.GetSampleCount()) +} + // TestWorkspaceTemplateParamsChange tests a workspace with a parameter that // validation changes on apply. The params used in create workspace are invalid // according to the static params on import. diff --git a/scaletest/README.md b/scaletest/README.md deleted file mode 100644 index 9fa475ae29ab5..0000000000000 --- a/scaletest/README.md +++ /dev/null @@ -1,109 +0,0 @@ -# Scale Testing - -This folder contains CLI commands, Terraform code, and scripts to aid in performing load tests of Coder. -At a high level, it performs the following steps: - -- Using the Terraform code in `./terraform`, stands up a preconfigured Google Cloud environment - consisting of a VPC, GKE Cluster, and CloudSQL instance. - > **Note: You must have an existing Google Cloud project available.** -- Creates a dedicated namespace for Coder and installs Coder using the Helm chart in this namespace. -- Configures the Coder deployment with random credentials and a predefined Kubernetes template. - > **Note:** These credentials are stored in `${PROJECT_ROOT}/scaletest/.coderv2/coder.env`. -- Creates a number of workspaces and waits for them to all start successfully. These workspaces - are ephemeral and do not contain any persistent resources. -- Waits for 10 minutes to allow things to settle and establish a baseline. -- Generates web terminal traffic to all workspaces for 30 minutes. -- Directly after traffic generation, captures goroutine and heap snapshots of the Coder deployment. -- Tears down all resources (unless `--skip-cleanup` is specified). - -## Usage - -The main entrypoint is the `scaletest.sh` script. - -```console -$ scaletest.sh --help -Usage: scaletest.sh --name --project --num-workspaces --scenario [--dry-run] [--skip-cleanup] -``` - -### Required arguments - -- `--name`: Name for the loadtest. This is added as a prefix to resources created by Terraform (e.g. `joe-big-loadtest`). -- `--project`: Google Cloud project in which to create the resources (example: `my-loadtest-project`). -- `--num-workspaces`: Number of workspaces to create (example: `10`). -- `--scenario`: Deployment scenario to use (example: `small`). See `terraform/scenario-*.tfvars`. - -> **Note:** In order to capture Prometheus metrics, you must define the environment variables -> `SCALETEST_PROMETHEUS_REMOTE_WRITE_USER` and `SCALETEST_PROMETHEUS_REMOTE_WRITE_PASSWORD`. - -### Optional arguments - -- `--dry-run`: Do not perform any action and instead print what would be executed. -- `--skip-cleanup`: Do not perform any cleanup. You will be responsible for deleting any resources this creates. - -### Environment Variables - -All of the above arguments may be specified as environment variables. Consult the script for details. - -### Prometheus Metrics - -To capture Prometheus metrics from the loadtest, two environment variables are required: - -- `SCALETEST_PROMETHEUS_REMOTE_WRITE_USER` -- `SCALETEST_PROMETHEUS_REMOTE_WRITE_PASSWORD` - -### Enterprise License - -To add an Enterprise license, set the `SCALETEST_CODER_LICENSE` environment variable to the JWT string - -## Scenarios - -A scenario defines a number of variables that override the default Terraform variables. -A number of existing scenarios are provided in `scaletest/terraform/scenario-*.tfvars`. - -For example, `scenario-small.tfvars` includes the following variable definitions: - -```hcl -nodepool_machine_type_coder = "t2d-standard-2" -nodepool_machine_type_workspaces = "t2d-standard-2" -coder_cpu = "1000m" # Leaving 1 CPU for system workloads -coder_mem = "4Gi" # Leaving 4GB for system workloads -``` - -To create your own scenario, simply add a new file `terraform/scenario-$SCENARIO_NAME.tfvars`. -In this file, override variables as required, consulting `vars.tf` as needed. -You can then use this scenario by specifying `--scenario $SCENARIO_NAME`. -For example, if your scenario file were named `scenario-big-whopper2x.tfvars`, you would specify -`--scenario=big-whopper2x`. - -## Utility scripts - -A number of utility scripts are provided in `lib`, and are used by `scaletest.sh`: - -- `coder_shim.sh`: a convenience script to run the `coder` binary with a predefined config root. - This is intended to allow running Coder CLI commands against the loadtest cluster without - modifying a user's existing Coder CLI configuration. -- `coder_init.sh`: Performs first-time user setup of an existing Coder instance, generating - a random password for the admin user. The admin user is named `admin@coder.com` by default. - Credentials are written to `scaletest/.coderv2/coder.env`. -- `coder_workspacetraffic.sh`: Runs traffic generation against the loadtest cluster and creates - a monitoring manifest for the traffic generation pod. This pod will restart automatically - after the traffic generation has completed. - -## Grafana Dashboard - -A sample Grafana dashboard is provided in `scaletest_dashboard.json`. This dashboard is intended -to be imported into an existing Grafana instance. It provides a number of useful metrics: - -- **Control Plane Resources**: CPU, memory, and network usage for the Coder deployment, as well as the number of pod restarts. -- **Database**: Rows inserted/updated/deleted/returned, active connections, and transactions per second. Fine-grained `sqlQuerier` metrics are provided for Coder's database as well, broken down my query method. -- **HTTP requests**: Number of HTTP requests per second, broken down by status code and path. -- **Workspace Resources**: CPU, memory, and network usage for all workspaces. -- **Workspace Agents**: Workspace agent network usage, connection latency, and number of active connections. -- **Workspace Traffic**: Statistics related to workspace traffic generation. -- **Internals**: Provisioner job timings, concurrency, workspace builds, and AuthZ duration. - -A subset of these metrics may be useful for a production deployment, but some are only useful -for load testing. - -> **Note:** in particular, `sqlQuerier` metrics produce a large number of time series and may cause -> increased charges in your metrics provider. diff --git a/scaletest/scaletest.sh b/scaletest/scaletest.sh deleted file mode 100755 index dd0a6cb4f450c..0000000000000 --- a/scaletest/scaletest.sh +++ /dev/null @@ -1,240 +0,0 @@ -#!/usr/bin/env bash - -[[ -n ${VERBOSE:-} ]] && set -x -set -euo pipefail - -PROJECT_ROOT="$(git rev-parse --show-toplevel)" -# shellcheck source=scripts/lib.sh -source "${PROJECT_ROOT}/scripts/lib.sh" - -DRY_RUN="${DRY_RUN:-0}" -SCALETEST_NAME="${SCALETEST_NAME:-}" -SCALETEST_NUM_WORKSPACES="${SCALETEST_NUM_WORKSPACES:-}" -SCALETEST_SCENARIO="${SCALETEST_SCENARIO:-}" -SCALETEST_PROJECT="${SCALETEST_PROJECT:-}" -SCALETEST_PROMETHEUS_REMOTE_WRITE_USER="${SCALETEST_PROMETHEUS_REMOTE_WRITE_USER:-}" -SCALETEST_PROMETHEUS_REMOTE_WRITE_PASSWORD="${SCALETEST_PROMETHEUS_REMOTE_WRITE_PASSWORD:-}" -SCALETEST_CODER_LICENSE="${SCALETEST_CODER_LICENSE:-}" -SCALETEST_SKIP_CLEANUP="${SCALETEST_SKIP_CLEANUP:-0}" -SCALETEST_CREATE_CONCURRENCY="${SCALETEST_CREATE_CONCURRENCY:-10}" -SCALETEST_TRAFFIC_BYTES_PER_TICK="${SCALETEST_TRAFFIC_BYTES_PER_TICK:-1024}" -SCALETEST_TRAFFIC_TICK_INTERVAL="${SCALETEST_TRAFFIC_TICK_INTERVAL:-10s}" -SCALETEST_DESTROY="${SCALETEST_DESTROY:-0}" - -script_name=$(basename "$0") -args="$(getopt -o "" -l create-concurrency:,destroy,dry-run,help,name:,num-workspaces:,project:,scenario:,skip-cleanup,traffic-bytes-per-tick:,traffic-tick-interval:, -- "$@")" -eval set -- "$args" -while true; do - case "$1" in - --create-concurrency) - SCALETEST_CREATE_CONCURRENCY="$2" - shift 2 - ;; - --destroy) - SCALETEST_DESTROY=1 - shift - ;; - --dry-run) - DRY_RUN=1 - shift - ;; - --help) - echo "Usage: $script_name --name --project --num-workspaces --scenario [--create-concurrency ] [--destroy] [--dry-run] [--skip-cleanup] [--traffic-bytes-per-tick ] [--traffic-tick-interval ]" - exit 1 - ;; - --name) - SCALETEST_NAME="$2" - shift 2 - ;; - --num-workspaces) - SCALETEST_NUM_WORKSPACES="$2" - shift 2 - ;; - --project) - SCALETEST_PROJECT="$2" - shift 2 - ;; - --scenario) - SCALETEST_SCENARIO="$2" - shift 2 - ;; - --skip-cleanup) - SCALETEST_SKIP_CLEANUP=1 - shift - ;; - --traffic-bytes-per-tick) - SCALETEST_TRAFFIC_BYTES_PER_TICK="$2" - shift 2 - ;; - --traffic-tick-interval) - SCALETEST_TRAFFIC_TICK_INTERVAL="$2" - shift 2 - ;; - --) - shift - break - ;; - *) - error "Unrecognized option: $1" - ;; - esac -done - -dependencies gcloud kubectl terraform - -if [[ -z "${SCALETEST_NAME}" ]]; then - echo "Must specify --name" - exit 1 -fi - -if [[ -z "${SCALETEST_PROJECT}" ]]; then - echo "Must specify --project" - exit 1 -fi - -if [[ -z "${SCALETEST_NUM_WORKSPACES}" ]]; then - echo "Must specify --num-workspaces" - exit 1 -fi - -if [[ -z "${SCALETEST_SCENARIO}" ]]; then - echo "Must specify --scenario" - exit 1 -fi - -if [[ -z "${SCALETEST_PROMETHEUS_REMOTE_WRITE_USER}" ]] || [[ -z "${SCALETEST_PROMETHEUS_REMOTE_WRITE_PASSWORD}" ]]; then - echo "SCALETEST_PROMETHEUS_REMOTE_WRITE_USER or SCALETEST_PROMETHEUS_REMOTE_WRITE_PASSWORD not specified." - echo "No prometheus metrics will be collected!" - read -p "Continue (y/N)? " -n1 -r - if [[ "${REPLY}" != [yY] ]]; then - exit 1 - fi -fi - -SCALETEST_SCENARIO_VARS="${PROJECT_ROOT}/scaletest/terraform/scenario-${SCALETEST_SCENARIO}.tfvars" -if [[ ! -f "${SCALETEST_SCENARIO_VARS}" ]]; then - echo "Scenario ${SCALETEST_SCENARIO_VARS} not found." - echo "Please create it or choose another scenario:" - find "${PROJECT_ROOT}/scaletest/terraform" -type f -name 'scenario-*.tfvars' - exit 1 -fi - -if [[ "${SCALETEST_SKIP_CLEANUP}" == 1 ]]; then - log "WARNING: you told me to not clean up after myself, so this is now your job!" -fi - -CONFIG_DIR="${PROJECT_ROOT}/scaletest/.coderv2" -if [[ -d "${CONFIG_DIR}" ]] && files=$(ls -qAH -- "${CONFIG_DIR}") && [[ -z "$files" ]]; then - echo "Cleaning previous configuration" - maybedryrun "$DRY_RUN" rm -fv "${CONFIG_DIR}/*" -fi -maybedryrun "$DRY_RUN" mkdir -p "${CONFIG_DIR}" - -SCALETEST_SCENARIO_VARS="${PROJECT_ROOT}/scaletest/terraform/scenario-${SCALETEST_SCENARIO}.tfvars" -SCALETEST_SECRETS="${PROJECT_ROOT}/scaletest/terraform/secrets.tfvars" -SCALETEST_SECRETS_TEMPLATE="${PROJECT_ROOT}/scaletest/terraform/secrets.tfvars.tpl" - -log "Writing scaletest secrets to file." -SCALETEST_NAME="${SCALETEST_NAME}" \ - SCALETEST_PROJECT="${SCALETEST_PROJECT}" \ - SCALETEST_PROMETHEUS_REMOTE_WRITE_USER="${SCALETEST_PROMETHEUS_REMOTE_WRITE_USER}" \ - SCALETEST_PROMETHEUS_REMOTE_WRITE_PASSWORD="${SCALETEST_PROMETHEUS_REMOTE_WRITE_PASSWORD}" \ - envsubst <"${SCALETEST_SECRETS_TEMPLATE}" >"${SCALETEST_SECRETS}" - -pushd "${PROJECT_ROOT}/scaletest/terraform" - -echo "Initializing terraform." -maybedryrun "$DRY_RUN" terraform init - -echo "Setting up infrastructure." -maybedryrun "$DRY_RUN" terraform apply --var-file="${SCALETEST_SCENARIO_VARS}" --var-file="${SCALETEST_SECRETS}" --var state=started --auto-approve - -if [[ "${DRY_RUN}" != 1 ]]; then - SCALETEST_CODER_URL=$(<"${CONFIG_DIR}/url") -else - SCALETEST_CODER_URL="http://coder.dryrun.local:3000" -fi -KUBECONFIG="${PROJECT_ROOT}/scaletest/.coderv2/${SCALETEST_NAME}-cluster.kubeconfig" -echo "Waiting for Coder deployment at ${SCALETEST_CODER_URL} to become ready" -max_attempts=10 -for attempt in $(seq 1 $max_attempts); do - maybedryrun "$DRY_RUN" curl --silent --fail --output /dev/null "${SCALETEST_CODER_URL}/api/v2/buildinfo" - curl_status=$? - if [[ $curl_status -eq 0 ]]; then - break - fi - if attempt -eq $max_attempts; then - echo - echo "Coder deployment failed to become ready in time!" - exit 1 - fi - echo "Coder deployment not ready yet (${attempt}/${max_attempts}), sleeping 3 seconds" - maybedryrun "$DRY_RUN" sleep 3 -done - -echo "Initializing Coder deployment." -DRY_RUN="$DRY_RUN" "${PROJECT_ROOT}/scaletest/lib/coder_init.sh" "${SCALETEST_CODER_URL}" - -if [[ -n "${SCALETEST_CODER_LICENSE}" ]]; then - echo "Applying Coder Enterprise License" - DRY_RUN="$DRY_RUN" "${PROJECT_ROOT}/scaletest/lib/coder_shim.sh" license add -l "${SCALETEST_CODER_LICENSE}" -fi - -echo "Creating ${SCALETEST_NUM_WORKSPACES} workspaces." -DRY_RUN="$DRY_RUN" "${PROJECT_ROOT}/scaletest/lib/coder_shim.sh" exp scaletest create-workspaces \ - --count "${SCALETEST_NUM_WORKSPACES}" \ - --template=kubernetes \ - --concurrency "${SCALETEST_CREATE_CONCURRENCY}" \ - --no-cleanup - -echo "Sleeping 10 minutes to establish a baseline measurement." -maybedryrun "$DRY_RUN" sleep 600 - -echo "Sending traffic to workspaces" -maybedryrun "$DRY_RUN" "${PROJECT_ROOT}/scaletest/lib/coder_workspacetraffic.sh" \ - --name "${SCALETEST_NAME}" \ - --traffic-bytes-per-tick "${SCALETEST_TRAFFIC_BYTES_PER_TICK}" \ - --traffic-tick-interval "${SCALETEST_TRAFFIC_TICK_INTERVAL}" -maybedryrun "$DRY_RUN" kubectl --kubeconfig="${KUBECONFIG}" -n "coder-${SCALETEST_NAME}" wait pods coder-scaletest-workspace-traffic --for condition=Ready - -echo "Sleeping 15 minutes for traffic generation" -maybedryrun "$DRY_RUN" sleep 900 - -echo "Starting pprof" -maybedryrun "$DRY_RUN" kubectl -n "coder-${SCALETEST_NAME}" port-forward deployment/coder 6061:6060 & -pfpid=$! -maybedryrun "$DRY_RUN" trap "kill $pfpid" EXIT - -echo "Waiting for pprof endpoint to become available" -pprof_attempt_counter=0 -while ! maybedryrun "$DRY_RUN" timeout 1 bash -c "echo > /dev/tcp/localhost/6061"; do - if [[ $pprof_attempt_counter -eq 10 ]]; then - echo - echo "pprof failed to become ready in time!" - exit 1 - fi - ((pprof_attempt_counter += 1)) - maybedryrun "$DRY_RUN" sleep 3 -done - -echo "Taking pprof snapshots" -maybedryrun "$DRY_RUN" curl --silent --fail --output "${SCALETEST_NAME}-heap.pprof.gz" http://localhost:6061/debug/pprof/heap -maybedryrun "$DRY_RUN" curl --silent --fail --output "${SCALETEST_NAME}-goroutine.pprof.gz" http://localhost:6061/debug/pprof/goroutine -# No longer need to port-forward -maybedryrun "$DRY_RUN" kill "$pfpid" -maybedryrun "$DRY_RUN" trap - EXIT - -if [[ "${SCALETEST_SKIP_CLEANUP}" == 1 ]]; then - echo "Leaving resources up for you to inspect." - echo "Please don't forget to clean up afterwards:" - echo "cd terraform && terraform destroy --var-file=${SCALETEST_SCENARIO_VARS} --var-file=${SCALETEST_SECRETS} --auto-approve" - exit 0 -fi - -if [[ "${SCALETEST_DESTROY}" == 1 ]]; then - echo "Destroying infrastructure" - maybedryrun "$DRY_RUN" terraform destroy --var-file="${SCALETEST_SCENARIO_VARS}" --var-file="${SCALETEST_SECRETS}" --auto-approve -else - echo "Scaling down infrastructure" - maybedryrun "$DRY_RUN" terraform apply --var-file="${SCALETEST_SCENARIO_VARS}" --var-file="${SCALETEST_SECRETS}" --var state=stopped --auto-approve -fi diff --git a/scaletest/terraform/action/.gitignore b/scaletest/terraform/action/.gitignore deleted file mode 100644 index c45cf41694258..0000000000000 --- a/scaletest/terraform/action/.gitignore +++ /dev/null @@ -1 +0,0 @@ -*.tfvars diff --git a/scaletest/terraform/action/cf_dns.tf b/scaletest/terraform/action/cf_dns.tf deleted file mode 100644 index 126c35c12cc76..0000000000000 --- a/scaletest/terraform/action/cf_dns.tf +++ /dev/null @@ -1,21 +0,0 @@ -data "cloudflare_zone" "domain" { - name = var.cloudflare_domain -} - -resource "cloudflare_record" "coder" { - for_each = local.deployments - zone_id = data.cloudflare_zone.domain.zone_id - name = "${each.value.subdomain}.${var.cloudflare_domain}" - content = google_compute_address.coder[each.key].address - type = "A" - ttl = 3600 -} - -resource "cloudflare_record" "coder_wildcard" { - for_each = local.deployments - zone_id = data.cloudflare_zone.domain.id - name = each.value.wildcard_subdomain - content = cloudflare_record.coder[each.key].name - type = "CNAME" - ttl = 3600 -} diff --git a/scaletest/terraform/action/coder_helm_values.tftpl b/scaletest/terraform/action/coder_helm_values.tftpl deleted file mode 100644 index 3fc8d5dfd4226..0000000000000 --- a/scaletest/terraform/action/coder_helm_values.tftpl +++ /dev/null @@ -1,120 +0,0 @@ -coder: - workspaceProxy: ${workspace_proxy} - affinity: - nodeAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - - matchExpressions: - - key: "cloud.google.com/gke-nodepool" - operator: "In" - values: ["${node_pool}"] - podAntiAffinity: - preferredDuringSchedulingIgnoredDuringExecution: - - weight: 1 - podAffinityTerm: - topologyKey: "kubernetes.io/hostname" - labelSelector: - matchExpressions: - - key: "app.kubernetes.io/instance" - operator: "In" - values: ["${release_name}"] - env: - %{~ if workspace_proxy ~} - - name: "CODER_ACCESS_URL" - value: "${access_url}" - - name: "CODER_WILDCARD_ACCESS_URL" - value: "${wildcard_access_url}" - - name: CODER_PRIMARY_ACCESS_URL - value: "${primary_url}" - - name: CODER_PROXY_SESSION_TOKEN - valueFrom: - secretKeyRef: - key: token - name: "${proxy_token}" - %{~ endif ~} - %{~ if provisionerd ~} - - name: "CODER_URL" - value: "${access_url}" - - name: "CODER_PROVISIONERD_TAGS" - value: "scope=organization,deployment=${deployment}" - - name: "CODER_PROVISIONER_DAEMON_NAME" - valueFrom: - fieldRef: - fieldPath: metadata.name - - name: "CODER_CONFIG_DIR" - value: "/tmp/config" - %{~ endif ~} - %{~ if !workspace_proxy && !provisionerd ~} - - name: "CODER_ACCESS_URL" - value: "${access_url}" - - name: "CODER_WILDCARD_ACCESS_URL" - value: "${wildcard_access_url}" - - name: "CODER_PG_CONNECTION_URL" - valueFrom: - secretKeyRef: - name: "${db_secret}" - key: url - - name: "CODER_PROVISIONER_DAEMONS" - value: "0" - - name: CODER_PROVISIONER_DAEMON_PSK - valueFrom: - secretKeyRef: - key: psk - name: "${provisionerd_psk}" - - name: "CODER_PROMETHEUS_COLLECT_AGENT_STATS" - value: "true" - - name: "CODER_PROMETHEUS_COLLECT_DB_METRICS" - value: "true" - - name: "CODER_PPROF_ENABLE" - value: "true" - %{~ endif ~} - - name: "CODER_CACHE_DIRECTORY" - value: "/tmp/coder" - - name: "CODER_TELEMETRY_ENABLE" - value: "false" - - name: "CODER_LOGGING_HUMAN" - value: "/dev/null" - - name: "CODER_LOGGING_STACKDRIVER" - value: "/dev/stderr" - - name: "CODER_PROMETHEUS_ENABLE" - value: "true" - - name: "CODER_VERBOSE" - value: "true" - - name: "CODER_EXPERIMENTS" - value: "${experiments}" - - name: "CODER_DANGEROUS_DISABLE_RATE_LIMITS" - value: "true" - - name: "CODER_DANGEROUS_ALLOW_PATH_APP_SITE_OWNER_ACCESS" - value: "true" - image: - repo: ${image_repo} - tag: ${image_tag} - replicaCount: "${replicas}" - resources: - requests: - cpu: "${cpu_request}" - memory: "${mem_request}" - limits: - cpu: "${cpu_limit}" - memory: "${mem_limit}" - securityContext: - readOnlyRootFilesystem: true - %{~ if !provisionerd ~} - service: - enable: true - sessionAffinity: None - loadBalancerIP: "${ip_address}" - %{~ endif ~} - volumeMounts: - - mountPath: "/tmp" - name: cache - readOnly: false - volumes: - - emptyDir: - sizeLimit: 1024Mi - name: cache - %{~ if !provisionerd ~} - tls: - secretNames: - - "${tls_secret_name}" - %{~ endif ~} diff --git a/scaletest/terraform/action/coder_proxies.tf b/scaletest/terraform/action/coder_proxies.tf deleted file mode 100644 index 6af3ef82bb392..0000000000000 --- a/scaletest/terraform/action/coder_proxies.tf +++ /dev/null @@ -1,102 +0,0 @@ -data "http" "coder_healthy" { - url = local.deployments.primary.url - // Wait up to 5 minutes for DNS to propagate - retry { - attempts = 30 - min_delay_ms = 10000 - } - - lifecycle { - postcondition { - condition = self.status_code == 200 - error_message = "${self.url} returned an unhealthy status code" - } - } - - depends_on = [helm_release.coder_primary, cloudflare_record.coder["primary"]] -} - -resource "null_resource" "api_key" { - provisioner "local-exec" { - interpreter = ["/bin/bash", "-c"] - command = < ${path.module}/.coderv2/session_token - -api_key=$(curl '${local.deployments.primary.url}/api/v2/users/me/keys/tokens' \ - -H "Coder-Session-Token: $${session_token}" \ - --data-raw '{"token_name":"terraform","scope":"all"}' \ - --insecure --silent | jq -r .key) - -echo -n $${api_key} > ${path.module}/.coderv2/api_key -EOF - } - - depends_on = [data.http.coder_healthy] -} - -data "local_file" "api_key" { - filename = "${path.module}/.coderv2/api_key" - depends_on = [null_resource.api_key] -} - -resource "null_resource" "license" { - provisioner "local-exec" { - interpreter = ["/bin/bash", "-c"] - command = < ${path.module}/.coderv2/europe_proxy_token -EOF - } - - depends_on = [null_resource.license] -} - -data "local_file" "europe_proxy_token" { - filename = "${path.module}/.coderv2/europe_proxy_token" - depends_on = [null_resource.europe_proxy_token] -} - -resource "null_resource" "asia_proxy_token" { - provisioner "local-exec" { - interpreter = ["/bin/bash", "-c"] - command = < ${path.module}/.coderv2/asia_proxy_token -EOF - } - - depends_on = [null_resource.license] -} - -data "local_file" "asia_proxy_token" { - filename = "${path.module}/.coderv2/asia_proxy_token" - depends_on = [null_resource.asia_proxy_token] -} diff --git a/scaletest/terraform/action/coder_templates.tf b/scaletest/terraform/action/coder_templates.tf deleted file mode 100644 index d27c25844b91e..0000000000000 --- a/scaletest/terraform/action/coder_templates.tf +++ /dev/null @@ -1,340 +0,0 @@ -resource "local_file" "kubernetes_template" { - filename = "${path.module}/.coderv2/templates/kubernetes/main.tf" - content = <