Skip to content

Commit 352cb4f

Browse files
committed
feat: add filecache prometheus metrics
1 parent 3a2e362 commit 352cb4f

File tree

3 files changed

+114
-25
lines changed

3 files changed

+114
-25
lines changed

coderd/coderd.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -572,7 +572,7 @@ func New(options *Options) *API {
572572
TemplateScheduleStore: options.TemplateScheduleStore,
573573
UserQuietHoursScheduleStore: options.UserQuietHoursScheduleStore,
574574
AccessControlStore: options.AccessControlStore,
575-
FileCache: files.NewFromStore(options.Database),
575+
FileCache: files.NewFromStore(options.Database, options.PrometheusRegistry),
576576
Experiments: experiments,
577577
WebpushDispatcher: options.WebPushDispatcher,
578578
healthCheckGroup: &singleflight.Group[string, *healthsdk.HealthcheckReport]{},

coderd/files/cache.go

Lines changed: 106 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ import (
77
"sync"
88

99
"github.com/google/uuid"
10+
"github.com/prometheus/client_golang/prometheus"
11+
"github.com/prometheus/client_golang/prometheus/promauto"
1012
"golang.org/x/xerrors"
1113

1214
archivefs "github.com/coder/coder/v2/archive/fs"
@@ -16,22 +18,75 @@ import (
1618

1719
// NewFromStore returns a file cache that will fetch files from the provided
1820
// database.
19-
func NewFromStore(store database.Store) *Cache {
20-
fetcher := func(ctx context.Context, fileID uuid.UUID) (fs.FS, error) {
21+
func NewFromStore(store database.Store, registerer prometheus.Registerer) *Cache {
22+
fetch := func(ctx context.Context, fileID uuid.UUID) (fs.FS, int64, error) {
2123
file, err := store.GetFileByID(ctx, fileID)
2224
if err != nil {
23-
return nil, xerrors.Errorf("failed to read file from database: %w", err)
25+
return nil, 0, xerrors.Errorf("failed to read file from database: %w", err)
2426
}
2527

2628
content := bytes.NewBuffer(file.Data)
27-
return archivefs.FromTarReader(content), nil
29+
return archivefs.FromTarReader(content), int64(content.Len()), nil
2830
}
2931

30-
return &Cache{
32+
return New(fetch, registerer)
33+
}
34+
35+
func New(fetch fetcher, registerer prometheus.Registerer) *Cache {
36+
return (&Cache{
3137
lock: sync.Mutex{},
3238
data: make(map[uuid.UUID]*cacheEntry),
33-
fetcher: fetcher,
34-
}
39+
fetcher: fetch,
40+
}).registerMetrics(registerer)
41+
}
42+
43+
func (c *Cache) registerMetrics(registerer prometheus.Registerer) *Cache {
44+
subsystem := "file_cache"
45+
f := promauto.With(registerer)
46+
47+
c.currentCacheSize = f.NewGauge(prometheus.GaugeOpts{
48+
Namespace: "coderd",
49+
Subsystem: subsystem,
50+
Name: "open_files_size_current",
51+
Help: "The current size of all files currently open in the file cache.",
52+
})
53+
54+
c.totalCacheSize = f.NewCounter(prometheus.CounterOpts{
55+
Namespace: "coderd",
56+
Subsystem: subsystem,
57+
Name: "open_files_size_total",
58+
Help: "The total size of all files opened in the file cache.",
59+
})
60+
61+
c.currentOpenFiles = f.NewGauge(prometheus.GaugeOpts{
62+
Namespace: "coderd",
63+
Subsystem: subsystem,
64+
Name: "open_files_current",
65+
Help: "The number of unique files currently open in the file cache.",
66+
})
67+
68+
c.totalOpenedFiles = f.NewCounter(prometheus.CounterOpts{
69+
Namespace: "coderd",
70+
Subsystem: subsystem,
71+
Name: "open_files_total",
72+
Help: "The number of unique files opened in the file cache.",
73+
})
74+
75+
c.currentOpenFileReferences = f.NewGauge(prometheus.GaugeOpts{
76+
Namespace: "coderd",
77+
Subsystem: subsystem,
78+
Name: "open_file_refs_current",
79+
Help: "The number of file references currently open in the file cache.",
80+
})
81+
82+
c.totalOpenFileReferences = f.NewCounter(prometheus.CounterOpts{
83+
Namespace: "coderd",
84+
Subsystem: subsystem,
85+
Name: "open_file_refs_total",
86+
Help: "The number of file references currently open in the file cache.",
87+
})
88+
89+
return c
3590
}
3691

3792
// Cache persists the files for template versions, and is used by dynamic
@@ -43,15 +98,30 @@ type Cache struct {
4398
lock sync.Mutex
4499
data map[uuid.UUID]*cacheEntry
45100
fetcher
101+
102+
// metrics
103+
currentOpenFileReferences prometheus.Gauge
104+
totalOpenFileReferences prometheus.Counter
105+
106+
currentOpenFiles prometheus.Gauge
107+
totalOpenedFiles prometheus.Counter
108+
109+
currentCacheSize prometheus.Gauge
110+
totalCacheSize prometheus.Counter
111+
}
112+
113+
type cacheEntryValue struct {
114+
dir fs.FS
115+
size int64
46116
}
47117

48118
type cacheEntry struct {
49119
// refCount must only be accessed while the Cache lock is held.
50120
refCount int
51-
value *lazy.ValueWithError[fs.FS]
121+
value *lazy.ValueWithError[cacheEntryValue]
52122
}
53123

54-
type fetcher func(context.Context, uuid.UUID) (fs.FS, error)
124+
type fetcher func(context.Context, uuid.UUID) (dir fs.FS, size int64, err error)
55125

56126
// Acquire will load the fs.FS for the given file. It guarantees that parallel
57127
// calls for the same fileID will only result in one fetch, and that parallel
@@ -67,26 +137,41 @@ func (c *Cache) Acquire(ctx context.Context, fileID uuid.UUID) (fs.FS, error) {
67137
if err != nil {
68138
c.Release(fileID)
69139
}
70-
return it, err
140+
return it.dir, err
71141
}
72142

73-
func (c *Cache) prepare(ctx context.Context, fileID uuid.UUID) *lazy.ValueWithError[fs.FS] {
143+
func (c *Cache) prepare(ctx context.Context, fileID uuid.UUID) *lazy.ValueWithError[cacheEntryValue] {
74144
c.lock.Lock()
75145
defer c.lock.Unlock()
76146

77147
entry, ok := c.data[fileID]
78148
if !ok {
79-
value := lazy.NewWithError(func() (fs.FS, error) {
80-
return c.fetcher(ctx, fileID)
149+
value := lazy.NewWithError(func() (cacheEntryValue, error) {
150+
dir, size, err := c.fetcher(ctx, fileID)
151+
152+
// Always add to the cache size the bytes of the file loaded.
153+
if err == nil {
154+
c.currentCacheSize.Add(float64(size))
155+
c.totalCacheSize.Add(float64(size))
156+
}
157+
158+
return cacheEntryValue{
159+
dir: dir,
160+
size: size,
161+
}, err
81162
})
82163

83164
entry = &cacheEntry{
84165
value: value,
85166
refCount: 0,
86167
}
87168
c.data[fileID] = entry
169+
c.currentOpenFiles.Inc()
170+
c.totalOpenedFiles.Inc()
88171
}
89172

173+
c.currentOpenFileReferences.Inc()
174+
c.totalOpenFileReferences.Inc()
90175
entry.refCount++
91176
return entry.value
92177
}
@@ -105,11 +190,19 @@ func (c *Cache) Release(fileID uuid.UUID) {
105190
return
106191
}
107192

193+
c.currentOpenFileReferences.Dec()
108194
entry.refCount--
109195
if entry.refCount > 0 {
110196
return
111197
}
112198

199+
c.currentOpenFiles.Dec()
200+
201+
ev, err := entry.value.Load()
202+
if err == nil {
203+
c.currentCacheSize.Add(-1 * float64(ev.size))
204+
}
205+
113206
delete(c.data, fileID)
114207
}
115208

coderd/files/cache_internal_test.go

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,12 @@ package files
33
import (
44
"context"
55
"io/fs"
6-
"sync"
76
"sync/atomic"
87
"testing"
98
"time"
109

1110
"github.com/google/uuid"
11+
"github.com/prometheus/client_golang/prometheus"
1212
"github.com/spf13/afero"
1313
"github.com/stretchr/testify/require"
1414
"golang.org/x/sync/errgroup"
@@ -21,12 +21,12 @@ func TestConcurrency(t *testing.T) {
2121

2222
emptyFS := afero.NewIOFS(afero.NewReadOnlyFs(afero.NewMemMapFs()))
2323
var fetches atomic.Int64
24-
c := newTestCache(func(_ context.Context, _ uuid.UUID) (fs.FS, error) {
24+
c := newTestCache(func(_ context.Context, _ uuid.UUID) (fs.FS, int64, error) {
2525
fetches.Add(1)
2626
// Wait long enough before returning to make sure that all of the goroutines
2727
// will be waiting in line, ensuring that no one duplicated a fetch.
2828
time.Sleep(testutil.IntervalMedium)
29-
return emptyFS, nil
29+
return emptyFS, 0, nil
3030
})
3131

3232
batches := 1000
@@ -61,8 +61,8 @@ func TestRelease(t *testing.T) {
6161
t.Parallel()
6262

6363
emptyFS := afero.NewIOFS(afero.NewReadOnlyFs(afero.NewMemMapFs()))
64-
c := newTestCache(func(_ context.Context, _ uuid.UUID) (fs.FS, error) {
65-
return emptyFS, nil
64+
c := newTestCache(func(_ context.Context, _ uuid.UUID) (fs.FS, int64, error) {
65+
return emptyFS, 0, nil
6666
})
6767

6868
batches := 100
@@ -95,10 +95,6 @@ func TestRelease(t *testing.T) {
9595
require.Equal(t, len(c.data), 0)
9696
}
9797

98-
func newTestCache(fetcher func(context.Context, uuid.UUID) (fs.FS, error)) Cache {
99-
return Cache{
100-
lock: sync.Mutex{},
101-
data: make(map[uuid.UUID]*cacheEntry),
102-
fetcher: fetcher,
103-
}
98+
func newTestCache(fetcher func(context.Context, uuid.UUID) (fs.FS, int64, error)) *Cache {
99+
return New(fetcher, prometheus.NewRegistry())
104100
}

0 commit comments

Comments
 (0)