-
Notifications
You must be signed in to change notification settings - Fork 887
feat: expose agent metrics via Prometheus endpoint #7011
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
8d4e67d
da729e6
9ad09b2
440657c
8764f89
663b5d5
63aff5e
3905481
f8d6f46
d487a77
7acbaf0
7418779
3a8e4e6
b5d0581
e4d708b
e0669f0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -0,0 +1,95 @@ | ||||||
package prometheusmetrics | ||||||
|
||||||
import ( | ||||||
"sync" | ||||||
|
||||||
"github.com/prometheus/client_golang/prometheus" | ||||||
) | ||||||
|
||||||
// CachedGaugeVec is a wrapper for the prometheus.GaugeVec which allows | ||||||
// for staging changes in the metrics vector. Calling "WithLabelValues(...)" | ||||||
// will update the internal gauge value, but it will not be returned by | ||||||
// "Collect(...)" until the "Commit()" method is called. The "Commit()" method | ||||||
// resets the internal gauge and applies all staged changes to it. | ||||||
// | ||||||
// The Use of CachedGaugeVec is recommended for use cases when there is a risk | ||||||
// that the Prometheus collector receives incomplete metrics, collected | ||||||
// in the middle of metrics recalculation, between "Reset()" and the last | ||||||
// "WithLabelValues()" call. | ||||||
type CachedGaugeVec struct { | ||||||
m sync.Mutex | ||||||
|
||||||
gaugeVec *prometheus.GaugeVec | ||||||
records []vectorRecord | ||||||
} | ||||||
|
||||||
var _ prometheus.Collector = new(CachedGaugeVec) | ||||||
|
||||||
type VectorOperation int | ||||||
|
||||||
const ( | ||||||
VectorOperationAdd VectorOperation = iota | ||||||
VectorOperationSet | ||||||
) | ||||||
|
||||||
type vectorRecord struct { | ||||||
operation VectorOperation | ||||||
value float64 | ||||||
labelValues []string | ||||||
} | ||||||
|
||||||
func NewCachedGaugeVec(gaugeVec *prometheus.GaugeVec) *CachedGaugeVec { | ||||||
return &CachedGaugeVec{ | ||||||
gaugeVec: gaugeVec, | ||||||
} | ||||||
} | ||||||
|
||||||
func (v *CachedGaugeVec) Describe(desc chan<- *prometheus.Desc) { | ||||||
v.gaugeVec.Describe(desc) | ||||||
} | ||||||
|
||||||
func (v *CachedGaugeVec) Collect(ch chan<- prometheus.Metric) { | ||||||
v.m.Lock() | ||||||
defer v.m.Unlock() | ||||||
|
||||||
v.gaugeVec.Collect(ch) | ||||||
} | ||||||
|
||||||
func (v *CachedGaugeVec) WithLabelValues(operation VectorOperation, value float64, labelValues ...string) { | ||||||
switch operation { | ||||||
case VectorOperationAdd: | ||||||
case VectorOperationSet: | ||||||
default: | ||||||
Comment on lines
+59
to
+62
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I usually prefer this. But it does not matter. switch operation {
case VectorOperationAdd, VectorOperationSet:
default:
} There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||||||
panic("unsupported vector operation") | ||||||
} | ||||||
|
||||||
v.m.Lock() | ||||||
defer v.m.Unlock() | ||||||
|
||||||
v.records = append(v.records, vectorRecord{ | ||||||
operation: operation, | ||||||
value: value, | ||||||
labelValues: labelValues, | ||||||
}) | ||||||
} | ||||||
|
||||||
// Commit will set the internal value as the cached value to return from "Collect()". | ||||||
// The internal metric value is completely reset, so the caller should expect | ||||||
// the gauge to be empty for the next 'WithLabelValues' values. | ||||||
func (v *CachedGaugeVec) Commit() { | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. // Commit will set the internal value as the cached value to return from 'Collect'.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Comment added. |
||||||
v.m.Lock() | ||||||
defer v.m.Unlock() | ||||||
|
||||||
v.gaugeVec.Reset() | ||||||
for _, record := range v.records { | ||||||
g := v.gaugeVec.WithLabelValues(record.labelValues...) | ||||||
switch record.operation { | ||||||
case VectorOperationAdd: | ||||||
g.Add(record.value) | ||||||
case VectorOperationSet: | ||||||
g.Set(record.value) | ||||||
} | ||||||
} | ||||||
|
||||||
v.records = nil | ||||||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,140 @@ | ||
package prometheusmetrics_test | ||
|
||
import ( | ||
"sort" | ||
"testing" | ||
|
||
"github.com/prometheus/client_golang/prometheus" | ||
dto "github.com/prometheus/client_model/go" | ||
"github.com/stretchr/testify/assert" | ||
"github.com/stretchr/testify/require" | ||
|
||
"github.com/coder/coder/coderd/prometheusmetrics" | ||
) | ||
|
||
func TestCollector_Add(t *testing.T) { | ||
t.Parallel() | ||
|
||
// given | ||
agentsGauge := prometheusmetrics.NewCachedGaugeVec(prometheus.NewGaugeVec(prometheus.GaugeOpts{ | ||
Namespace: "coderd", | ||
Subsystem: "agents", | ||
Name: "up", | ||
Help: "The number of active agents per workspace.", | ||
}, []string{"username", "workspace_name"})) | ||
|
||
// when | ||
agentsGauge.WithLabelValues(prometheusmetrics.VectorOperationAdd, 7, "first user", "my workspace") | ||
agentsGauge.WithLabelValues(prometheusmetrics.VectorOperationAdd, 23, "second user", "your workspace") | ||
agentsGauge.WithLabelValues(prometheusmetrics.VectorOperationAdd, 1, "first user", "my workspace") | ||
agentsGauge.WithLabelValues(prometheusmetrics.VectorOperationAdd, 25, "second user", "your workspace") | ||
agentsGauge.Commit() | ||
|
||
// then | ||
ch := make(chan prometheus.Metric, 2) | ||
agentsGauge.Collect(ch) | ||
|
||
metrics := collectAndSortMetrics(t, agentsGauge, 2) | ||
|
||
assert.Equal(t, "first user", metrics[0].Label[0].GetValue()) // Username | ||
assert.Equal(t, "my workspace", metrics[0].Label[1].GetValue()) // Workspace name | ||
assert.Equal(t, 8, int(metrics[0].Gauge.GetValue())) // Metric value | ||
|
||
assert.Equal(t, "second user", metrics[1].Label[0].GetValue()) // Username | ||
assert.Equal(t, "your workspace", metrics[1].Label[1].GetValue()) // Workspace name | ||
assert.Equal(t, 48, int(metrics[1].Gauge.GetValue())) // Metric value | ||
} | ||
|
||
func TestCollector_Set(t *testing.T) { | ||
t.Parallel() | ||
|
||
// given | ||
agentsGauge := prometheusmetrics.NewCachedGaugeVec(prometheus.NewGaugeVec(prometheus.GaugeOpts{ | ||
Namespace: "coderd", | ||
Subsystem: "agents", | ||
Name: "up", | ||
Help: "The number of active agents per workspace.", | ||
}, []string{"username", "workspace_name"})) | ||
|
||
// when | ||
agentsGauge.WithLabelValues(prometheusmetrics.VectorOperationSet, 3, "first user", "my workspace") | ||
agentsGauge.WithLabelValues(prometheusmetrics.VectorOperationSet, 4, "second user", "your workspace") | ||
agentsGauge.WithLabelValues(prometheusmetrics.VectorOperationSet, 5, "first user", "my workspace") | ||
agentsGauge.WithLabelValues(prometheusmetrics.VectorOperationSet, 6, "second user", "your workspace") | ||
agentsGauge.Commit() | ||
|
||
// then | ||
ch := make(chan prometheus.Metric, 2) | ||
agentsGauge.Collect(ch) | ||
|
||
metrics := collectAndSortMetrics(t, agentsGauge, 2) | ||
|
||
assert.Equal(t, "first user", metrics[0].Label[0].GetValue()) // Username | ||
assert.Equal(t, "my workspace", metrics[0].Label[1].GetValue()) // Workspace name | ||
assert.Equal(t, 5, int(metrics[0].Gauge.GetValue())) // Metric value | ||
|
||
assert.Equal(t, "second user", metrics[1].Label[0].GetValue()) // Username | ||
assert.Equal(t, "your workspace", metrics[1].Label[1].GetValue()) // Workspace name | ||
assert.Equal(t, 6, int(metrics[1].Gauge.GetValue())) // Metric value | ||
} | ||
|
||
func TestCollector_Set_Add(t *testing.T) { | ||
t.Parallel() | ||
|
||
// given | ||
agentsGauge := prometheusmetrics.NewCachedGaugeVec(prometheus.NewGaugeVec(prometheus.GaugeOpts{ | ||
Namespace: "coderd", | ||
Subsystem: "agents", | ||
Name: "up", | ||
Help: "The number of active agents per workspace.", | ||
}, []string{"username", "workspace_name"})) | ||
|
||
// when | ||
agentsGauge.WithLabelValues(prometheusmetrics.VectorOperationAdd, 9, "first user", "my workspace") | ||
agentsGauge.WithLabelValues(prometheusmetrics.VectorOperationAdd, 8, "second user", "your workspace") | ||
agentsGauge.WithLabelValues(prometheusmetrics.VectorOperationAdd, 7, "first user", "my workspace") | ||
agentsGauge.WithLabelValues(prometheusmetrics.VectorOperationAdd, 6, "second user", "your workspace") | ||
agentsGauge.WithLabelValues(prometheusmetrics.VectorOperationSet, 5, "first user", "my workspace") | ||
agentsGauge.WithLabelValues(prometheusmetrics.VectorOperationSet, 4, "second user", "your workspace") | ||
agentsGauge.WithLabelValues(prometheusmetrics.VectorOperationAdd, 3, "first user", "my workspace") | ||
agentsGauge.WithLabelValues(prometheusmetrics.VectorOperationAdd, 2, "second user", "your workspace") | ||
agentsGauge.Commit() | ||
|
||
// then | ||
ch := make(chan prometheus.Metric, 2) | ||
agentsGauge.Collect(ch) | ||
|
||
metrics := collectAndSortMetrics(t, agentsGauge, 2) | ||
|
||
assert.Equal(t, "first user", metrics[0].Label[0].GetValue()) // Username | ||
assert.Equal(t, "my workspace", metrics[0].Label[1].GetValue()) // Workspace name | ||
assert.Equal(t, 8, int(metrics[0].Gauge.GetValue())) // Metric value | ||
|
||
assert.Equal(t, "second user", metrics[1].Label[0].GetValue()) // Username | ||
assert.Equal(t, "your workspace", metrics[1].Label[1].GetValue()) // Workspace name | ||
assert.Equal(t, 6, int(metrics[1].Gauge.GetValue())) // Metric value | ||
} | ||
|
||
func collectAndSortMetrics(t *testing.T, collector prometheus.Collector, count int) []dto.Metric { | ||
ch := make(chan prometheus.Metric, count) | ||
defer close(ch) | ||
|
||
var metrics []dto.Metric | ||
|
||
collector.Collect(ch) | ||
for i := 0; i < count; i++ { | ||
m := <-ch | ||
|
||
var metric dto.Metric | ||
err := m.Write(&metric) | ||
require.NoError(t, err) | ||
|
||
metrics = append(metrics, metric) | ||
} | ||
|
||
// Ensure always the same order of metrics | ||
sort.Slice(metrics, func(i, j int) bool { | ||
return sort.StringsAreSorted([]string{metrics[i].Label[0].GetValue(), metrics[j].Label[1].GetValue()}) | ||
}) | ||
return metrics | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you doc the usage? And the why?
Eg:
Or something...
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added 👍