From 352cb4f9d75c71a26aacb266f565df4332e82ff3 Mon Sep 17 00:00:00 2001 From: Steven Masley Date: Wed, 28 May 2025 15:15:14 -0500 Subject: [PATCH 1/5] feat: add filecache prometheus metrics --- coderd/coderd.go | 2 +- coderd/files/cache.go | 119 +++++++++++++++++++++++++--- coderd/files/cache_internal_test.go | 18 ++--- 3 files changed, 114 insertions(+), 25 deletions(-) diff --git a/coderd/coderd.go b/coderd/coderd.go index 37e7d22a6d080..4a791bf95e9e3 100644 --- a/coderd/coderd.go +++ b/coderd/coderd.go @@ -572,7 +572,7 @@ func New(options *Options) *API { TemplateScheduleStore: options.TemplateScheduleStore, UserQuietHoursScheduleStore: options.UserQuietHoursScheduleStore, AccessControlStore: options.AccessControlStore, - FileCache: files.NewFromStore(options.Database), + FileCache: files.NewFromStore(options.Database, options.PrometheusRegistry), Experiments: experiments, WebpushDispatcher: options.WebPushDispatcher, healthCheckGroup: &singleflight.Group[string, *healthsdk.HealthcheckReport]{}, diff --git a/coderd/files/cache.go b/coderd/files/cache.go index 56e9a715de189..c4d31e85c9c9d 100644 --- a/coderd/files/cache.go +++ b/coderd/files/cache.go @@ -7,6 +7,8 @@ import ( "sync" "github.com/google/uuid" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" "golang.org/x/xerrors" archivefs "github.com/coder/coder/v2/archive/fs" @@ -16,22 +18,75 @@ import ( // NewFromStore returns a file cache that will fetch files from the provided // database. -func NewFromStore(store database.Store) *Cache { - fetcher := func(ctx context.Context, fileID uuid.UUID) (fs.FS, error) { +func NewFromStore(store database.Store, registerer prometheus.Registerer) *Cache { + fetch := func(ctx context.Context, fileID uuid.UUID) (fs.FS, int64, error) { file, err := store.GetFileByID(ctx, fileID) if err != nil { - return nil, xerrors.Errorf("failed to read file from database: %w", err) + return nil, 0, xerrors.Errorf("failed to read file from database: %w", err) } content := bytes.NewBuffer(file.Data) - return archivefs.FromTarReader(content), nil + return archivefs.FromTarReader(content), int64(content.Len()), nil } - return &Cache{ + return New(fetch, registerer) +} + +func New(fetch fetcher, registerer prometheus.Registerer) *Cache { + return (&Cache{ lock: sync.Mutex{}, data: make(map[uuid.UUID]*cacheEntry), - fetcher: fetcher, - } + fetcher: fetch, + }).registerMetrics(registerer) +} + +func (c *Cache) registerMetrics(registerer prometheus.Registerer) *Cache { + subsystem := "file_cache" + f := promauto.With(registerer) + + c.currentCacheSize = f.NewGauge(prometheus.GaugeOpts{ + Namespace: "coderd", + Subsystem: subsystem, + Name: "open_files_size_current", + Help: "The current size of all files currently open in the file cache.", + }) + + c.totalCacheSize = f.NewCounter(prometheus.CounterOpts{ + Namespace: "coderd", + Subsystem: subsystem, + Name: "open_files_size_total", + Help: "The total size of all files opened in the file cache.", + }) + + c.currentOpenFiles = f.NewGauge(prometheus.GaugeOpts{ + Namespace: "coderd", + Subsystem: subsystem, + Name: "open_files_current", + Help: "The number of unique files currently open in the file cache.", + }) + + c.totalOpenedFiles = f.NewCounter(prometheus.CounterOpts{ + Namespace: "coderd", + Subsystem: subsystem, + Name: "open_files_total", + Help: "The number of unique files opened in the file cache.", + }) + + c.currentOpenFileReferences = f.NewGauge(prometheus.GaugeOpts{ + Namespace: "coderd", + Subsystem: subsystem, + Name: "open_file_refs_current", + Help: "The number of file references currently open in the file cache.", + }) + + c.totalOpenFileReferences = f.NewCounter(prometheus.CounterOpts{ + Namespace: "coderd", + Subsystem: subsystem, + Name: "open_file_refs_total", + Help: "The number of file references currently open in the file cache.", + }) + + return c } // Cache persists the files for template versions, and is used by dynamic @@ -43,15 +98,30 @@ type Cache struct { lock sync.Mutex data map[uuid.UUID]*cacheEntry fetcher + + // metrics + currentOpenFileReferences prometheus.Gauge + totalOpenFileReferences prometheus.Counter + + currentOpenFiles prometheus.Gauge + totalOpenedFiles prometheus.Counter + + currentCacheSize prometheus.Gauge + totalCacheSize prometheus.Counter +} + +type cacheEntryValue struct { + dir fs.FS + size int64 } type cacheEntry struct { // refCount must only be accessed while the Cache lock is held. refCount int - value *lazy.ValueWithError[fs.FS] + value *lazy.ValueWithError[cacheEntryValue] } -type fetcher func(context.Context, uuid.UUID) (fs.FS, error) +type fetcher func(context.Context, uuid.UUID) (dir fs.FS, size int64, err error) // Acquire will load the fs.FS for the given file. It guarantees that parallel // calls for the same fileID will only result in one fetch, and that parallel @@ -67,17 +137,28 @@ func (c *Cache) Acquire(ctx context.Context, fileID uuid.UUID) (fs.FS, error) { if err != nil { c.Release(fileID) } - return it, err + return it.dir, err } -func (c *Cache) prepare(ctx context.Context, fileID uuid.UUID) *lazy.ValueWithError[fs.FS] { +func (c *Cache) prepare(ctx context.Context, fileID uuid.UUID) *lazy.ValueWithError[cacheEntryValue] { c.lock.Lock() defer c.lock.Unlock() entry, ok := c.data[fileID] if !ok { - value := lazy.NewWithError(func() (fs.FS, error) { - return c.fetcher(ctx, fileID) + value := lazy.NewWithError(func() (cacheEntryValue, error) { + dir, size, err := c.fetcher(ctx, fileID) + + // Always add to the cache size the bytes of the file loaded. + if err == nil { + c.currentCacheSize.Add(float64(size)) + c.totalCacheSize.Add(float64(size)) + } + + return cacheEntryValue{ + dir: dir, + size: size, + }, err }) entry = &cacheEntry{ @@ -85,8 +166,12 @@ func (c *Cache) prepare(ctx context.Context, fileID uuid.UUID) *lazy.ValueWithEr refCount: 0, } c.data[fileID] = entry + c.currentOpenFiles.Inc() + c.totalOpenedFiles.Inc() } + c.currentOpenFileReferences.Inc() + c.totalOpenFileReferences.Inc() entry.refCount++ return entry.value } @@ -105,11 +190,19 @@ func (c *Cache) Release(fileID uuid.UUID) { return } + c.currentOpenFileReferences.Dec() entry.refCount-- if entry.refCount > 0 { return } + c.currentOpenFiles.Dec() + + ev, err := entry.value.Load() + if err == nil { + c.currentCacheSize.Add(-1 * float64(ev.size)) + } + delete(c.data, fileID) } diff --git a/coderd/files/cache_internal_test.go b/coderd/files/cache_internal_test.go index 03603906b6ccd..9dc54132a5a0c 100644 --- a/coderd/files/cache_internal_test.go +++ b/coderd/files/cache_internal_test.go @@ -3,12 +3,12 @@ package files import ( "context" "io/fs" - "sync" "sync/atomic" "testing" "time" "github.com/google/uuid" + "github.com/prometheus/client_golang/prometheus" "github.com/spf13/afero" "github.com/stretchr/testify/require" "golang.org/x/sync/errgroup" @@ -21,12 +21,12 @@ func TestConcurrency(t *testing.T) { emptyFS := afero.NewIOFS(afero.NewReadOnlyFs(afero.NewMemMapFs())) var fetches atomic.Int64 - c := newTestCache(func(_ context.Context, _ uuid.UUID) (fs.FS, error) { + c := newTestCache(func(_ context.Context, _ uuid.UUID) (fs.FS, int64, error) { fetches.Add(1) // Wait long enough before returning to make sure that all of the goroutines // will be waiting in line, ensuring that no one duplicated a fetch. time.Sleep(testutil.IntervalMedium) - return emptyFS, nil + return emptyFS, 0, nil }) batches := 1000 @@ -61,8 +61,8 @@ func TestRelease(t *testing.T) { t.Parallel() emptyFS := afero.NewIOFS(afero.NewReadOnlyFs(afero.NewMemMapFs())) - c := newTestCache(func(_ context.Context, _ uuid.UUID) (fs.FS, error) { - return emptyFS, nil + c := newTestCache(func(_ context.Context, _ uuid.UUID) (fs.FS, int64, error) { + return emptyFS, 0, nil }) batches := 100 @@ -95,10 +95,6 @@ func TestRelease(t *testing.T) { require.Equal(t, len(c.data), 0) } -func newTestCache(fetcher func(context.Context, uuid.UUID) (fs.FS, error)) Cache { - return Cache{ - lock: sync.Mutex{}, - data: make(map[uuid.UUID]*cacheEntry), - fetcher: fetcher, - } +func newTestCache(fetcher func(context.Context, uuid.UUID) (fs.FS, int64, error)) *Cache { + return New(fetcher, prometheus.NewRegistry()) } From 1bf620cc70bca80c91d0fd78b446178dbf1525f4 Mon Sep 17 00:00:00 2001 From: Steven Masley Date: Wed, 28 May 2025 15:37:01 -0500 Subject: [PATCH 2/5] chore: add unit test to ensure metrics are correct --- coderd/files/cache_internal_test.go | 76 ++++++++++++++++++++++++----- 1 file changed, 63 insertions(+), 13 deletions(-) diff --git a/coderd/files/cache_internal_test.go b/coderd/files/cache_internal_test.go index 9dc54132a5a0c..cf39639bdc114 100644 --- a/coderd/files/cache_internal_test.go +++ b/coderd/files/cache_internal_test.go @@ -13,21 +13,28 @@ import ( "github.com/stretchr/testify/require" "golang.org/x/sync/errgroup" + "github.com/coder/coder/v2/coderd/coderdtest/promhelp" "github.com/coder/coder/v2/testutil" ) +func cachePromMetricName(metric string) string { + return "coderd_file_cache_" + metric +} + func TestConcurrency(t *testing.T) { t.Parallel() + const fileSize = 10 emptyFS := afero.NewIOFS(afero.NewReadOnlyFs(afero.NewMemMapFs())) var fetches atomic.Int64 - c := newTestCache(func(_ context.Context, _ uuid.UUID) (fs.FS, int64, error) { + reg := prometheus.NewRegistry() + c := New(func(_ context.Context, _ uuid.UUID) (fs.FS, int64, error) { fetches.Add(1) // Wait long enough before returning to make sure that all of the goroutines // will be waiting in line, ensuring that no one duplicated a fetch. time.Sleep(testutil.IntervalMedium) - return emptyFS, 0, nil - }) + return emptyFS, fileSize, nil + }, reg) batches := 1000 groups := make([]*errgroup.Group, 0, batches) @@ -55,15 +62,26 @@ func TestConcurrency(t *testing.T) { require.NoError(t, g.Wait()) } require.Equal(t, int64(batches), fetches.Load()) + + // Verify all the counts & metrics are correct. + require.Equal(t, batches, c.Count()) + require.Equal(t, batches*fileSize, promhelp.GaugeValue(t, reg, cachePromMetricName("open_files_size_current"), nil)) + require.Equal(t, batches*fileSize, promhelp.CounterValue(t, reg, cachePromMetricName("open_files_size_total"), nil)) + require.Equal(t, batches, promhelp.GaugeValue(t, reg, cachePromMetricName("open_files_current"), nil)) + require.Equal(t, batches, promhelp.CounterValue(t, reg, cachePromMetricName("open_files_total"), nil)) + require.Equal(t, batches*batchSize, promhelp.GaugeValue(t, reg, cachePromMetricName("open_file_refs_current"), nil)) + require.Equal(t, batches*batchSize, promhelp.CounterValue(t, reg, cachePromMetricName("open_file_refs_total"), nil)) } func TestRelease(t *testing.T) { t.Parallel() + const fileSize = 10 emptyFS := afero.NewIOFS(afero.NewReadOnlyFs(afero.NewMemMapFs())) - c := newTestCache(func(_ context.Context, _ uuid.UUID) (fs.FS, int64, error) { - return emptyFS, 0, nil - }) + reg := prometheus.NewRegistry() + c := New(func(_ context.Context, _ uuid.UUID) (fs.FS, int64, error) { + return emptyFS, fileSize, nil + }, reg) batches := 100 ids := make([]uuid.UUID, 0, batches) @@ -73,11 +91,21 @@ func TestRelease(t *testing.T) { // Acquire a bunch of references batchSize := 10 - for _, id := range ids { - for range batchSize { + for openedIdx, id := range ids { + for batchIdx := range batchSize { it, err := c.Acquire(t.Context(), id) require.NoError(t, err) require.Equal(t, emptyFS, it) + + // Each time a new file is opened, the metrics should be updated as so: + opened := openedIdx + 1 + // Number of unique files opened is equal to the idx of the ids. + require.Equal(t, opened, c.Count()) + require.Equal(t, opened, promhelp.GaugeValue(t, reg, cachePromMetricName("open_files_current"), nil)) + // Current file size is unique files * file size. + require.Equal(t, opened*fileSize, promhelp.GaugeValue(t, reg, cachePromMetricName("open_files_size_current"), nil)) + // The number of refs is the current iteration of both loops. + require.Equal(t, ((opened-1)*batchSize)+(batchIdx+1), promhelp.GaugeValue(t, reg, cachePromMetricName("open_file_refs_current"), nil)) } } @@ -85,16 +113,38 @@ func TestRelease(t *testing.T) { require.Equal(t, len(c.data), batches) // Now release all of the references - for _, id := range ids { - for range batchSize { + for closedIdx, id := range ids { + stillOpen := len(ids) - closedIdx + for closingIdx := range batchSize { c.Release(id) + + // Each time a file is released, the metrics should decrement the file refs + require.Equal(t, (stillOpen*batchSize)-(closingIdx+1), promhelp.GaugeValue(t, reg, cachePromMetricName("open_file_refs_current"), nil)) + + closed := closingIdx+1 == batchSize + if closed { + continue + } + + // File ref still exists, so the counts should not change yet. + require.Equal(t, stillOpen, c.Count()) + require.Equal(t, stillOpen, promhelp.GaugeValue(t, reg, cachePromMetricName("open_files_current"), nil)) + require.Equal(t, stillOpen*fileSize, promhelp.GaugeValue(t, reg, cachePromMetricName("open_files_size_current"), nil)) } } // ...and make sure that the cache has emptied itself. require.Equal(t, len(c.data), 0) -} -func newTestCache(fetcher func(context.Context, uuid.UUID) (fs.FS, int64, error)) *Cache { - return New(fetcher, prometheus.NewRegistry()) + // Verify all the counts & metrics are correct. + // All existing files are closed + require.Equal(t, 0, c.Count()) + require.Equal(t, 0, promhelp.GaugeValue(t, reg, cachePromMetricName("open_files_size_current"), nil)) + require.Equal(t, 0, promhelp.GaugeValue(t, reg, cachePromMetricName("open_files_current"), nil)) + require.Equal(t, 0, promhelp.GaugeValue(t, reg, cachePromMetricName("open_file_refs_current"), nil)) + + // Total counts remain + require.Equal(t, batches*fileSize, promhelp.CounterValue(t, reg, cachePromMetricName("open_files_size_total"), nil)) + require.Equal(t, batches, promhelp.CounterValue(t, reg, cachePromMetricName("open_files_total"), nil)) + require.Equal(t, batches*batchSize, promhelp.CounterValue(t, reg, cachePromMetricName("open_file_refs_total"), nil)) } From 9f8b24447f6f9ea17c9c26b37af86e9d1ddc4ef0 Mon Sep 17 00:00:00 2001 From: Steven Masley Date: Wed, 28 May 2025 15:45:05 -0500 Subject: [PATCH 3/5] rename metric --- coderd/files/cache.go | 16 ++++++++-------- coderd/files/cache_internal_test.go | 4 ++-- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/coderd/files/cache.go b/coderd/files/cache.go index c4d31e85c9c9d..b1afe5c5050d4 100644 --- a/coderd/files/cache.go +++ b/coderd/files/cache.go @@ -47,43 +47,43 @@ func (c *Cache) registerMetrics(registerer prometheus.Registerer) *Cache { c.currentCacheSize = f.NewGauge(prometheus.GaugeOpts{ Namespace: "coderd", Subsystem: subsystem, - Name: "open_files_size_current", - Help: "The current size of all files currently open in the file cache.", + Name: "open_files_size_bytes_current", + Help: "The current amount of memory of all files currently open in the file cache.", }) c.totalCacheSize = f.NewCounter(prometheus.CounterOpts{ Namespace: "coderd", Subsystem: subsystem, - Name: "open_files_size_total", - Help: "The total size of all files opened in the file cache.", + Name: "open_files_size_bytes_total", + Help: "The total amount of memory ever opened in the file cache. This number never decrements.", }) c.currentOpenFiles = f.NewGauge(prometheus.GaugeOpts{ Namespace: "coderd", Subsystem: subsystem, Name: "open_files_current", - Help: "The number of unique files currently open in the file cache.", + Help: "The count of unique files currently open in the file cache.", }) c.totalOpenedFiles = f.NewCounter(prometheus.CounterOpts{ Namespace: "coderd", Subsystem: subsystem, Name: "open_files_total", - Help: "The number of unique files opened in the file cache.", + Help: "The total count of unique files ever opened in the file cache.", }) c.currentOpenFileReferences = f.NewGauge(prometheus.GaugeOpts{ Namespace: "coderd", Subsystem: subsystem, Name: "open_file_refs_current", - Help: "The number of file references currently open in the file cache.", + Help: "The count of file references currently open in the file cache. Multiple references can be held for the same file.", }) c.totalOpenFileReferences = f.NewCounter(prometheus.CounterOpts{ Namespace: "coderd", Subsystem: subsystem, Name: "open_file_refs_total", - Help: "The number of file references currently open in the file cache.", + Help: "The total number of file references ever opened in the file cache.", }) return c diff --git a/coderd/files/cache_internal_test.go b/coderd/files/cache_internal_test.go index cf39639bdc114..6b73b0e435b48 100644 --- a/coderd/files/cache_internal_test.go +++ b/coderd/files/cache_internal_test.go @@ -65,8 +65,8 @@ func TestConcurrency(t *testing.T) { // Verify all the counts & metrics are correct. require.Equal(t, batches, c.Count()) - require.Equal(t, batches*fileSize, promhelp.GaugeValue(t, reg, cachePromMetricName("open_files_size_current"), nil)) - require.Equal(t, batches*fileSize, promhelp.CounterValue(t, reg, cachePromMetricName("open_files_size_total"), nil)) + require.Equal(t, batches*fileSize, promhelp.GaugeValue(t, reg, cachePromMetricName("open_files_size_bytes_current"), nil)) + require.Equal(t, batches*fileSize, promhelp.CounterValue(t, reg, cachePromMetricName("open_files_size_bytes_stotal"), nil)) require.Equal(t, batches, promhelp.GaugeValue(t, reg, cachePromMetricName("open_files_current"), nil)) require.Equal(t, batches, promhelp.CounterValue(t, reg, cachePromMetricName("open_files_total"), nil)) require.Equal(t, batches*batchSize, promhelp.GaugeValue(t, reg, cachePromMetricName("open_file_refs_current"), nil)) From 253ddee152ad60df8cb645c2818cb45c814bf8b6 Mon Sep 17 00:00:00 2001 From: Steven Masley Date: Wed, 28 May 2025 15:46:32 -0500 Subject: [PATCH 4/5] fixup! rename metric --- coderd/files/cache_internal_test.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/coderd/files/cache_internal_test.go b/coderd/files/cache_internal_test.go index 6b73b0e435b48..9f0fa351b723d 100644 --- a/coderd/files/cache_internal_test.go +++ b/coderd/files/cache_internal_test.go @@ -66,7 +66,7 @@ func TestConcurrency(t *testing.T) { // Verify all the counts & metrics are correct. require.Equal(t, batches, c.Count()) require.Equal(t, batches*fileSize, promhelp.GaugeValue(t, reg, cachePromMetricName("open_files_size_bytes_current"), nil)) - require.Equal(t, batches*fileSize, promhelp.CounterValue(t, reg, cachePromMetricName("open_files_size_bytes_stotal"), nil)) + require.Equal(t, batches*fileSize, promhelp.CounterValue(t, reg, cachePromMetricName("open_files_size_bytes_total"), nil)) require.Equal(t, batches, promhelp.GaugeValue(t, reg, cachePromMetricName("open_files_current"), nil)) require.Equal(t, batches, promhelp.CounterValue(t, reg, cachePromMetricName("open_files_total"), nil)) require.Equal(t, batches*batchSize, promhelp.GaugeValue(t, reg, cachePromMetricName("open_file_refs_current"), nil)) @@ -103,7 +103,7 @@ func TestRelease(t *testing.T) { require.Equal(t, opened, c.Count()) require.Equal(t, opened, promhelp.GaugeValue(t, reg, cachePromMetricName("open_files_current"), nil)) // Current file size is unique files * file size. - require.Equal(t, opened*fileSize, promhelp.GaugeValue(t, reg, cachePromMetricName("open_files_size_current"), nil)) + require.Equal(t, opened*fileSize, promhelp.GaugeValue(t, reg, cachePromMetricName("open_files_size_bytes_current"), nil)) // The number of refs is the current iteration of both loops. require.Equal(t, ((opened-1)*batchSize)+(batchIdx+1), promhelp.GaugeValue(t, reg, cachePromMetricName("open_file_refs_current"), nil)) } @@ -129,7 +129,7 @@ func TestRelease(t *testing.T) { // File ref still exists, so the counts should not change yet. require.Equal(t, stillOpen, c.Count()) require.Equal(t, stillOpen, promhelp.GaugeValue(t, reg, cachePromMetricName("open_files_current"), nil)) - require.Equal(t, stillOpen*fileSize, promhelp.GaugeValue(t, reg, cachePromMetricName("open_files_size_current"), nil)) + require.Equal(t, stillOpen*fileSize, promhelp.GaugeValue(t, reg, cachePromMetricName("open_files_size_bytes_current"), nil)) } } @@ -139,12 +139,12 @@ func TestRelease(t *testing.T) { // Verify all the counts & metrics are correct. // All existing files are closed require.Equal(t, 0, c.Count()) - require.Equal(t, 0, promhelp.GaugeValue(t, reg, cachePromMetricName("open_files_size_current"), nil)) + require.Equal(t, 0, promhelp.GaugeValue(t, reg, cachePromMetricName("open_files_size_bytes_current"), nil)) require.Equal(t, 0, promhelp.GaugeValue(t, reg, cachePromMetricName("open_files_current"), nil)) require.Equal(t, 0, promhelp.GaugeValue(t, reg, cachePromMetricName("open_file_refs_current"), nil)) // Total counts remain - require.Equal(t, batches*fileSize, promhelp.CounterValue(t, reg, cachePromMetricName("open_files_size_total"), nil)) + require.Equal(t, batches*fileSize, promhelp.CounterValue(t, reg, cachePromMetricName("open_files_size_bytes_total"), nil)) require.Equal(t, batches, promhelp.CounterValue(t, reg, cachePromMetricName("open_files_total"), nil)) require.Equal(t, batches*batchSize, promhelp.CounterValue(t, reg, cachePromMetricName("open_file_refs_total"), nil)) } From e54e48a5ca3f94f2ee07bb52c40df31ac4ef1349 Mon Sep 17 00:00:00 2001 From: Steven Masley Date: Fri, 30 May 2025 09:48:14 -0500 Subject: [PATCH 5/5] address PR comments --- coderd/files/cache.go | 31 +++++++++++++++++------------ coderd/files/cache_internal_test.go | 12 ++++++----- 2 files changed, 25 insertions(+), 18 deletions(-) diff --git a/coderd/files/cache.go b/coderd/files/cache.go index b1afe5c5050d4..48587eb402351 100644 --- a/coderd/files/cache.go +++ b/coderd/files/cache.go @@ -19,14 +19,17 @@ import ( // NewFromStore returns a file cache that will fetch files from the provided // database. func NewFromStore(store database.Store, registerer prometheus.Registerer) *Cache { - fetch := func(ctx context.Context, fileID uuid.UUID) (fs.FS, int64, error) { + fetch := func(ctx context.Context, fileID uuid.UUID) (cacheEntryValue, error) { file, err := store.GetFileByID(ctx, fileID) if err != nil { - return nil, 0, xerrors.Errorf("failed to read file from database: %w", err) + return cacheEntryValue{}, xerrors.Errorf("failed to read file from database: %w", err) } content := bytes.NewBuffer(file.Data) - return archivefs.FromTarReader(content), int64(content.Len()), nil + return cacheEntryValue{ + FS: archivefs.FromTarReader(content), + size: int64(content.Len()), + }, nil } return New(fetch, registerer) @@ -100,6 +103,10 @@ type Cache struct { fetcher // metrics + cacheMetrics +} + +type cacheMetrics struct { currentOpenFileReferences prometheus.Gauge totalOpenFileReferences prometheus.Counter @@ -111,7 +118,7 @@ type Cache struct { } type cacheEntryValue struct { - dir fs.FS + fs.FS size int64 } @@ -121,7 +128,7 @@ type cacheEntry struct { value *lazy.ValueWithError[cacheEntryValue] } -type fetcher func(context.Context, uuid.UUID) (dir fs.FS, size int64, err error) +type fetcher func(context.Context, uuid.UUID) (cacheEntryValue, error) // Acquire will load the fs.FS for the given file. It guarantees that parallel // calls for the same fileID will only result in one fetch, and that parallel @@ -136,8 +143,9 @@ func (c *Cache) Acquire(ctx context.Context, fileID uuid.UUID) (fs.FS, error) { it, err := c.prepare(ctx, fileID).Load() if err != nil { c.Release(fileID) + return nil, err } - return it.dir, err + return it.FS, err } func (c *Cache) prepare(ctx context.Context, fileID uuid.UUID) *lazy.ValueWithError[cacheEntryValue] { @@ -147,18 +155,15 @@ func (c *Cache) prepare(ctx context.Context, fileID uuid.UUID) *lazy.ValueWithEr entry, ok := c.data[fileID] if !ok { value := lazy.NewWithError(func() (cacheEntryValue, error) { - dir, size, err := c.fetcher(ctx, fileID) + val, err := c.fetcher(ctx, fileID) // Always add to the cache size the bytes of the file loaded. if err == nil { - c.currentCacheSize.Add(float64(size)) - c.totalCacheSize.Add(float64(size)) + c.currentCacheSize.Add(float64(val.size)) + c.totalCacheSize.Add(float64(val.size)) } - return cacheEntryValue{ - dir: dir, - size: size, - }, err + return val, err }) entry = &cacheEntry{ diff --git a/coderd/files/cache_internal_test.go b/coderd/files/cache_internal_test.go index 9f0fa351b723d..6ad84185b44b6 100644 --- a/coderd/files/cache_internal_test.go +++ b/coderd/files/cache_internal_test.go @@ -2,7 +2,6 @@ package files import ( "context" - "io/fs" "sync/atomic" "testing" "time" @@ -28,12 +27,12 @@ func TestConcurrency(t *testing.T) { emptyFS := afero.NewIOFS(afero.NewReadOnlyFs(afero.NewMemMapFs())) var fetches atomic.Int64 reg := prometheus.NewRegistry() - c := New(func(_ context.Context, _ uuid.UUID) (fs.FS, int64, error) { + c := New(func(_ context.Context, _ uuid.UUID) (cacheEntryValue, error) { fetches.Add(1) // Wait long enough before returning to make sure that all of the goroutines // will be waiting in line, ensuring that no one duplicated a fetch. time.Sleep(testutil.IntervalMedium) - return emptyFS, fileSize, nil + return cacheEntryValue{FS: emptyFS, size: fileSize}, nil }, reg) batches := 1000 @@ -79,8 +78,11 @@ func TestRelease(t *testing.T) { const fileSize = 10 emptyFS := afero.NewIOFS(afero.NewReadOnlyFs(afero.NewMemMapFs())) reg := prometheus.NewRegistry() - c := New(func(_ context.Context, _ uuid.UUID) (fs.FS, int64, error) { - return emptyFS, fileSize, nil + c := New(func(_ context.Context, _ uuid.UUID) (cacheEntryValue, error) { + return cacheEntryValue{ + FS: emptyFS, + size: fileSize, + }, nil }, reg) batches := 100