diff --git a/coderd/autobuild/lifecycle_executor.go b/coderd/autobuild/lifecycle_executor.go index f71d267028270..1846b1ea18284 100644 --- a/coderd/autobuild/lifecycle_executor.go +++ b/coderd/autobuild/lifecycle_executor.go @@ -5,6 +5,8 @@ import ( "database/sql" "fmt" "net/http" + "slices" + "strings" "sync" "sync/atomic" "time" @@ -155,6 +157,22 @@ func (e *Executor) runOnce(t time.Time) Stats { return stats } + // Sort the workspaces by build template version ID so that we can group + // identical template versions together. This is a slight (and imperfect) + // optimization. + // + // `wsbuilder` needs to load the terraform files for a given template version + // into memory. If 2 workspaces are using the same template version, they will + // share the same files in the FileCache. This only happens if the builds happen + // in parallel. + // TODO: Actually make sure the cache has the files in the cache for the full + // set of identical template versions. Then unload the files when the builds + // are done. Right now, this relies on luck for the 10 goroutine workers to + // overlap and keep the file reference in the cache alive. + slices.SortFunc(workspaces, func(a, b database.GetWorkspacesEligibleForTransitionRow) int { + return strings.Compare(a.BuildTemplateVersionID.UUID.String(), b.BuildTemplateVersionID.UUID.String()) + }) + // We only use errgroup here for convenience of API, not for early // cancellation. This means we only return nil errors in th eg.Go. eg := errgroup.Group{} diff --git a/coderd/database/queries.sql.go b/coderd/database/queries.sql.go index fe32851f0e002..4315096587c4f 100644 --- a/coderd/database/queries.sql.go +++ b/coderd/database/queries.sql.go @@ -19335,7 +19335,8 @@ func (q *sqlQuerier) GetWorkspacesByTemplateID(ctx context.Context, templateID u const getWorkspacesEligibleForTransition = `-- name: GetWorkspacesEligibleForTransition :many SELECT workspaces.id, - workspaces.name + workspaces.name, + workspace_builds.template_version_id as build_template_version_id FROM workspaces LEFT JOIN @@ -19454,8 +19455,9 @@ WHERE ` type GetWorkspacesEligibleForTransitionRow struct { - ID uuid.UUID `db:"id" json:"id"` - Name string `db:"name" json:"name"` + ID uuid.UUID `db:"id" json:"id"` + Name string `db:"name" json:"name"` + BuildTemplateVersionID uuid.NullUUID `db:"build_template_version_id" json:"build_template_version_id"` } func (q *sqlQuerier) GetWorkspacesEligibleForTransition(ctx context.Context, now time.Time) ([]GetWorkspacesEligibleForTransitionRow, error) { @@ -19467,7 +19469,7 @@ func (q *sqlQuerier) GetWorkspacesEligibleForTransition(ctx context.Context, now var items []GetWorkspacesEligibleForTransitionRow for rows.Next() { var i GetWorkspacesEligibleForTransitionRow - if err := rows.Scan(&i.ID, &i.Name); err != nil { + if err := rows.Scan(&i.ID, &i.Name, &i.BuildTemplateVersionID); err != nil { return nil, err } items = append(items, i) diff --git a/coderd/database/queries/workspaces.sql b/coderd/database/queries/workspaces.sql index f6ee14ae0ac7d..25e4d4f97a46b 100644 --- a/coderd/database/queries/workspaces.sql +++ b/coderd/database/queries/workspaces.sql @@ -642,7 +642,8 @@ FROM pending_workspaces, building_workspaces, running_workspaces, failed_workspa -- name: GetWorkspacesEligibleForTransition :many SELECT workspaces.id, - workspaces.name + workspaces.name, + workspace_builds.template_version_id as build_template_version_id FROM workspaces LEFT JOIN diff --git a/coderd/files/cache.go b/coderd/files/cache.go index 32e03e0b8f209..3698aac9286c8 100644 --- a/coderd/files/cache.go +++ b/coderd/files/cache.go @@ -71,12 +71,12 @@ func (c *Cache) registerMetrics(registerer prometheus.Registerer) *Cache { Help: "The count of file references currently open in the file cache. Multiple references can be held for the same file.", }) - c.totalOpenFileReferences = f.NewCounter(prometheus.CounterOpts{ + c.totalOpenFileReferences = f.NewCounterVec(prometheus.CounterOpts{ Namespace: "coderd", Subsystem: subsystem, Name: "open_file_refs_total", - Help: "The total number of file references ever opened in the file cache.", - }) + Help: "The total number of file references ever opened in the file cache. The 'hit' label indicates if the file was loaded from the cache.", + }, []string{"hit"}) return c } @@ -97,7 +97,7 @@ type Cache struct { type cacheMetrics struct { currentOpenFileReferences prometheus.Gauge - totalOpenFileReferences prometheus.Counter + totalOpenFileReferences *prometheus.CounterVec currentOpenFiles prometheus.Gauge totalOpenedFiles prometheus.Counter @@ -173,6 +173,7 @@ func (c *Cache) prepare(ctx context.Context, db database.Store, fileID uuid.UUID c.lock.Lock() defer c.lock.Unlock() + hitLabel := "true" entry, ok := c.data[fileID] if !ok { value := lazy.NewWithError(func() (CacheEntryValue, error) { @@ -194,10 +195,11 @@ func (c *Cache) prepare(ctx context.Context, db database.Store, fileID uuid.UUID c.data[fileID] = entry c.currentOpenFiles.Inc() c.totalOpenedFiles.Inc() + hitLabel = "false" } c.currentOpenFileReferences.Inc() - c.totalOpenFileReferences.Inc() + c.totalOpenFileReferences.WithLabelValues(hitLabel).Inc() entry.refCount++ return entry.value } diff --git a/coderd/files/cache_test.go b/coderd/files/cache_test.go index a5a5dfae268ca..8a8acfbc0772f 100644 --- a/coderd/files/cache_test.go +++ b/coderd/files/cache_test.go @@ -161,7 +161,9 @@ func TestConcurrency(t *testing.T) { require.Equal(t, batches, promhelp.GaugeValue(t, reg, cachePromMetricName("open_files_current"), nil)) require.Equal(t, batches, promhelp.CounterValue(t, reg, cachePromMetricName("open_files_total"), nil)) require.Equal(t, batches*batchSize, promhelp.GaugeValue(t, reg, cachePromMetricName("open_file_refs_current"), nil)) - require.Equal(t, batches*batchSize, promhelp.CounterValue(t, reg, cachePromMetricName("open_file_refs_total"), nil)) + hit, miss := promhelp.CounterValue(t, reg, cachePromMetricName("open_file_refs_total"), prometheus.Labels{"hit": "false"}), + promhelp.CounterValue(t, reg, cachePromMetricName("open_file_refs_total"), prometheus.Labels{"hit": "true"}) + require.Equal(t, batches*batchSize, hit+miss) } func TestRelease(t *testing.T) { @@ -245,7 +247,6 @@ func TestRelease(t *testing.T) { // Total counts remain require.Equal(t, batches*fileSize, promhelp.CounterValue(t, reg, cachePromMetricName("open_files_size_bytes_total"), nil)) require.Equal(t, batches, promhelp.CounterValue(t, reg, cachePromMetricName("open_files_total"), nil)) - require.Equal(t, batches*batchSize, promhelp.CounterValue(t, reg, cachePromMetricName("open_file_refs_total"), nil)) } func cacheAuthzSetup(t *testing.T) (database.Store, *files.Cache, *coderdtest.RecordingAuthorizer) {