From b936232aa3ea41e2791b9aebd8f2542ae92fdb97 Mon Sep 17 00:00:00 2001 From: Marcin Tojek Date: Thu, 25 Jan 2024 11:28:42 +0100 Subject: [PATCH 1/7] feat: use map instead slice in metrics aggregator --- coderd/prometheusmetrics/aggregator.go | 71 +++++++++++---------- coderd/prometheusmetrics/aggregator_test.go | 38 ++++++++++- 2 files changed, 73 insertions(+), 36 deletions(-) diff --git a/coderd/prometheusmetrics/aggregator.go b/coderd/prometheusmetrics/aggregator.go index aac06d63ef744..06d0060a3252e 100644 --- a/coderd/prometheusmetrics/aggregator.go +++ b/coderd/prometheusmetrics/aggregator.go @@ -2,6 +2,8 @@ package prometheusmetrics import ( "context" + "fmt" + "strings" "time" "github.com/prometheus/client_golang/prometheus" @@ -30,7 +32,7 @@ const ( ) type MetricsAggregator struct { - queue []annotatedMetric + store map[string]annotatedMetric log slog.Logger metricsCleanupInterval time.Duration @@ -64,6 +66,20 @@ type annotatedMetric struct { expiryDate time.Time } +func hashKey(req *updateRequest, m *agentproto.Stats_Metric) string { + var sbLabels strings.Builder + for i, label := range m.GetLabels() { + _, _ = sbLabels.WriteString(label.Name) + _ = sbLabels.WriteByte('=') + _, _ = sbLabels.WriteString(label.Value) + + if i-1 != len(m.GetLabels()) { + _ = sbLabels.WriteByte(',') + } + } + return fmt.Sprintf("%s|%s|%s|%s|%s|%s", req.username, req.workspaceName, req.agentName, req.templateName, m.GetName(), sbLabels.String()) +} + var _ prometheus.Collector = new(MetricsAggregator) func (am *annotatedMetric) is(req updateRequest, m *agentproto.Stats_Metric) bool { @@ -129,6 +145,8 @@ func NewMetricsAggregator(logger slog.Logger, registerer prometheus.Registerer, log: logger.Named(loggerName), metricsCleanupInterval: metricsCleanupInterval, + store: map[string]annotatedMetric{}, + collectCh: make(chan (chan []prometheus.Metric), sizeCollectCh), updateCh: make(chan updateRequest, sizeUpdateCh), @@ -152,32 +170,30 @@ func (ma *MetricsAggregator) Run(ctx context.Context) func() { ma.log.Debug(ctx, "update metrics") timer := prometheus.NewTimer(ma.updateHistogram) - UpdateLoop: for _, m := range req.metrics { - for i, q := range ma.queue { - if q.is(req, m) { - ma.queue[i].Stats_Metric.Value = m.Value - ma.queue[i].expiryDate = req.timestamp.Add(ma.metricsCleanupInterval) - continue UpdateLoop + key := hashKey(&req, m) + if val, ok := ma.store[key]; ok { + val.Stats_Metric.Value = m.Value + val.expiryDate = req.timestamp.Add(ma.metricsCleanupInterval) + ma.store[key] = val + } else { + ma.store[key] = annotatedMetric{ + Stats_Metric: m, + username: req.username, + workspaceName: req.workspaceName, + agentName: req.agentName, + templateName: req.templateName, + expiryDate: req.timestamp.Add(ma.metricsCleanupInterval), } } - - ma.queue = append(ma.queue, annotatedMetric{ - Stats_Metric: m, - username: req.username, - workspaceName: req.workspaceName, - agentName: req.agentName, - templateName: req.templateName, - expiryDate: req.timestamp.Add(ma.metricsCleanupInterval), - }) } timer.ObserveDuration() case outputCh := <-ma.collectCh: ma.log.Debug(ctx, "collect metrics") - output := make([]prometheus.Metric, 0, len(ma.queue)) - for _, m := range ma.queue { + output := make([]prometheus.Metric, 0, len(ma.store)) + for _, m := range ma.store { promMetric, err := m.asPrometheus() if err != nil { ma.log.Error(ctx, "can't convert Prometheus value type", slog.F("name", m.Name), slog.F("type", m.Type), slog.F("value", m.Value), slog.Error(err)) @@ -191,25 +207,12 @@ func (ma *MetricsAggregator) Run(ctx context.Context) func() { ma.log.Debug(ctx, "clean expired metrics") timer := prometheus.NewTimer(ma.cleanupHistogram) - now := time.Now() - var hasExpiredMetrics bool - for _, m := range ma.queue { - if now.After(m.expiryDate) { - hasExpiredMetrics = true - break - } - } - - if hasExpiredMetrics { - fresh := make([]annotatedMetric, 0, len(ma.queue)) - for _, m := range ma.queue { - if m.expiryDate.After(now) { - fresh = append(fresh, m) - } + for key, val := range ma.store { + if now.After(val.expiryDate) { + delete(ma.store, key) } - ma.queue = fresh } timer.ObserveDuration() diff --git a/coderd/prometheusmetrics/aggregator_test.go b/coderd/prometheusmetrics/aggregator_test.go index 00d088f8b13b4..fff8023564d82 100644 --- a/coderd/prometheusmetrics/aggregator_test.go +++ b/coderd/prometheusmetrics/aggregator_test.go @@ -3,6 +3,7 @@ package prometheusmetrics_test import ( "context" "sort" + "strings" "sync/atomic" "testing" "time" @@ -80,7 +81,6 @@ func TestUpdateMetrics_MetricsDoNotExpire(t *testing.T) { } expected := []*agentproto.Stats_Metric{ {Name: "a_counter_one", Type: agentproto.Stats_Metric_COUNTER, Value: 1, Labels: commonLabels}, - {Name: "b_counter_two", Type: agentproto.Stats_Metric_COUNTER, Value: 4, Labels: commonLabels}, {Name: "b_counter_two", Type: agentproto.Stats_Metric_COUNTER, Value: -9, Labels: []*agentproto.Stats_Metric_Label{ {Name: "agent_name", Value: testAgentName}, {Name: "lizz", Value: "rizz"}, @@ -88,7 +88,7 @@ func TestUpdateMetrics_MetricsDoNotExpire(t *testing.T) { {Name: "workspace_name", Value: testWorkspaceName}, {Name: "template_name", Value: testTemplateName}, }}, - {Name: "c_gauge_three", Type: agentproto.Stats_Metric_GAUGE, Value: 5, Labels: commonLabels}, + {Name: "b_counter_two", Type: agentproto.Stats_Metric_COUNTER, Value: 4, Labels: commonLabels}, {Name: "c_gauge_three", Type: agentproto.Stats_Metric_GAUGE, Value: 2, Labels: []*agentproto.Stats_Metric_Label{ {Name: "agent_name", Value: testAgentName}, {Name: "foobar", Value: "Foobaz"}, @@ -97,6 +97,7 @@ func TestUpdateMetrics_MetricsDoNotExpire(t *testing.T) { {Name: "workspace_name", Value: testWorkspaceName}, {Name: "template_name", Value: testTemplateName}, }}, + {Name: "c_gauge_three", Type: agentproto.Stats_Metric_GAUGE, Value: 5, Labels: commonLabels}, {Name: "d_gauge_four", Type: agentproto.Stats_Metric_GAUGE, Value: 6, Labels: commonLabels}, } @@ -130,6 +131,39 @@ func verifyCollectedMetrics(t *testing.T, expected []*agentproto.Stats_Metric, a return false } + prometheusMetricString := func(m prometheus.Metric) string { + var sb strings.Builder + + desc := m.Desc() + _, _ = sb.WriteString(desc.String()) + _ = sb.WriteByte('|') + + var d dto.Metric + err := m.Write(&d) + require.NoError(t, err) + dtoLabels := asMetricAgentLabels(d.GetLabel()) + sort.Slice(dtoLabels, func(i, j int) bool { + return dtoLabels[i].Name < dtoLabels[j].Name + }) + + for i, dtoLabel := range dtoLabels { + _, _ = sb.WriteString(dtoLabel.Name) + _ = sb.WriteByte('=') + _, _ = sb.WriteString(dtoLabel.Value) + + if i-1 != len(dtoLabels) { + _ = sb.WriteByte(',') + } + } + return sb.String() + } + + sort.Slice(actual, func(i, j int) bool { + m1 := prometheusMetricString(actual[i]) + m2 := prometheusMetricString(actual[j]) + return m1 < m2 + }) + for i, e := range expected { desc := actual[i].Desc() assert.Contains(t, desc.String(), e.Name) From c90a41f8f485c88be4a39c64e0e54f7d881dbc80 Mon Sep 17 00:00:00 2001 From: Marcin Tojek Date: Thu, 25 Jan 2024 11:48:09 +0100 Subject: [PATCH 2/7] fix --- coderd/prometheusmetrics/aggregator.go | 2 +- coderd/prometheusmetrics/aggregator_test.go | 58 ++++++++++----------- 2 files changed, 30 insertions(+), 30 deletions(-) diff --git a/coderd/prometheusmetrics/aggregator.go b/coderd/prometheusmetrics/aggregator.go index 06d0060a3252e..bcb43dd69ec7d 100644 --- a/coderd/prometheusmetrics/aggregator.go +++ b/coderd/prometheusmetrics/aggregator.go @@ -26,7 +26,7 @@ const ( loggerName = "prometheusmetrics" sizeCollectCh = 10 - sizeUpdateCh = 1024 + sizeUpdateCh = 4096 defaultMetricsCleanupInterval = 2 * time.Minute ) diff --git a/coderd/prometheusmetrics/aggregator_test.go b/coderd/prometheusmetrics/aggregator_test.go index fff8023564d82..74618be69715a 100644 --- a/coderd/prometheusmetrics/aggregator_test.go +++ b/coderd/prometheusmetrics/aggregator_test.go @@ -131,36 +131,9 @@ func verifyCollectedMetrics(t *testing.T, expected []*agentproto.Stats_Metric, a return false } - prometheusMetricString := func(m prometheus.Metric) string { - var sb strings.Builder - - desc := m.Desc() - _, _ = sb.WriteString(desc.String()) - _ = sb.WriteByte('|') - - var d dto.Metric - err := m.Write(&d) - require.NoError(t, err) - dtoLabels := asMetricAgentLabels(d.GetLabel()) - sort.Slice(dtoLabels, func(i, j int) bool { - return dtoLabels[i].Name < dtoLabels[j].Name - }) - - for i, dtoLabel := range dtoLabels { - _, _ = sb.WriteString(dtoLabel.Name) - _ = sb.WriteByte('=') - _, _ = sb.WriteString(dtoLabel.Value) - - if i-1 != len(dtoLabels) { - _ = sb.WriteByte(',') - } - } - return sb.String() - } - sort.Slice(actual, func(i, j int) bool { - m1 := prometheusMetricString(actual[i]) - m2 := prometheusMetricString(actual[j]) + m1 := prometheusMetricToString(t, actual[i]) + m2 := prometheusMetricToString(t, actual[j]) return m1 < m2 }) @@ -190,6 +163,33 @@ func verifyCollectedMetrics(t *testing.T, expected []*agentproto.Stats_Metric, a return true } +func prometheusMetricToString(t *testing.T, m prometheus.Metric) string { + var sb strings.Builder + + desc := m.Desc() + _, _ = sb.WriteString(desc.String()) + _ = sb.WriteByte('|') + + var d dto.Metric + err := m.Write(&d) + require.NoError(t, err) + dtoLabels := asMetricAgentLabels(d.GetLabel()) + sort.Slice(dtoLabels, func(i, j int) bool { + return dtoLabels[i].Name < dtoLabels[j].Name + }) + + for i, dtoLabel := range dtoLabels { + _, _ = sb.WriteString(dtoLabel.Name) + _ = sb.WriteByte('=') + _, _ = sb.WriteString(dtoLabel.Value) + + if i-1 != len(dtoLabels) { + _ = sb.WriteByte(',') + } + } + return sb.String() +} + func asMetricAgentLabels(dtoLabels []*dto.LabelPair) []*agentproto.Stats_Metric_Label { metricLabels := make([]*agentproto.Stats_Metric_Label, 0, len(dtoLabels)) for _, dtoLabel := range dtoLabels { From f407b4739ad98c19a430381c72ce329e3a5f9dc5 Mon Sep 17 00:00:00 2001 From: Marcin Tojek Date: Thu, 25 Jan 2024 12:04:27 +0100 Subject: [PATCH 3/7] add store size gauge --- coderd/prometheusmetrics/aggregator.go | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/coderd/prometheusmetrics/aggregator.go b/coderd/prometheusmetrics/aggregator.go index bcb43dd69ec7d..5d83ea8fe5c00 100644 --- a/coderd/prometheusmetrics/aggregator.go +++ b/coderd/prometheusmetrics/aggregator.go @@ -40,6 +40,7 @@ type MetricsAggregator struct { collectCh chan (chan []prometheus.Metric) updateCh chan updateRequest + storeSizeGauge prometheus.Gauge updateHistogram prometheus.Histogram cleanupHistogram prometheus.Histogram } @@ -117,6 +118,17 @@ func NewMetricsAggregator(logger slog.Logger, registerer prometheus.Registerer, metricsCleanupInterval = duration } + storeSizeGauge := prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: "coderd", + Subsystem: "prometheusmetrics", + Name: "metrics_aggregator_store_size", + Help: "The number of metrics stored in the aggregator", + }) + err := registerer.Register(storeSizeGauge) + if err != nil { + return nil, err + } + updateHistogram := prometheus.NewHistogram(prometheus.HistogramOpts{ Namespace: "coderd", Subsystem: "prometheusmetrics", @@ -124,7 +136,7 @@ func NewMetricsAggregator(logger slog.Logger, registerer prometheus.Registerer, Help: "Histogram for duration of metrics aggregator update in seconds.", Buckets: []float64{0.001, 0.005, 0.010, 0.025, 0.050, 0.100, 0.500, 1, 5, 10, 30}, }) - err := registerer.Register(updateHistogram) + err = registerer.Register(updateHistogram) if err != nil { return nil, err } @@ -150,6 +162,7 @@ func NewMetricsAggregator(logger slog.Logger, registerer prometheus.Registerer, collectCh: make(chan (chan []prometheus.Metric), sizeCollectCh), updateCh: make(chan updateRequest, sizeUpdateCh), + storeSizeGauge: storeSizeGauge, updateHistogram: updateHistogram, cleanupHistogram: cleanupHistogram, }, nil @@ -187,8 +200,9 @@ func (ma *MetricsAggregator) Run(ctx context.Context) func() { } } } - timer.ObserveDuration() + + ma.storeSizeGauge.Set(float64(len(ma.store))) case outputCh := <-ma.collectCh: ma.log.Debug(ctx, "collect metrics") @@ -217,6 +231,7 @@ func (ma *MetricsAggregator) Run(ctx context.Context) func() { timer.ObserveDuration() cleanupTicker.Reset(ma.metricsCleanupInterval) + ma.storeSizeGauge.Set(float64(len(ma.store))) case <-ctx.Done(): ma.log.Debug(ctx, "metrics aggregator is stopped") From 12fcdab17f3d441c5b7618d259bb1a8abef79a70 Mon Sep 17 00:00:00 2001 From: Marcin Tojek Date: Thu, 25 Jan 2024 13:11:22 +0100 Subject: [PATCH 4/7] Use struct key --- coderd/prometheusmetrics/aggregator.go | 58 ++++++++++++++++++-------- 1 file changed, 41 insertions(+), 17 deletions(-) diff --git a/coderd/prometheusmetrics/aggregator.go b/coderd/prometheusmetrics/aggregator.go index 5d83ea8fe5c00..1b9e6eea495be 100644 --- a/coderd/prometheusmetrics/aggregator.go +++ b/coderd/prometheusmetrics/aggregator.go @@ -2,7 +2,6 @@ package prometheusmetrics import ( "context" - "fmt" "strings" "time" @@ -32,7 +31,7 @@ const ( ) type MetricsAggregator struct { - store map[string]annotatedMetric + store map[metricKey]annotatedMetric log slog.Logger metricsCleanupInterval time.Duration @@ -67,29 +66,54 @@ type annotatedMetric struct { expiryDate time.Time } -func hashKey(req *updateRequest, m *agentproto.Stats_Metric) string { - var sbLabels strings.Builder - for i, label := range m.GetLabels() { - _, _ = sbLabels.WriteString(label.Name) - _ = sbLabels.WriteByte('=') - _, _ = sbLabels.WriteString(label.Value) +type metricKey struct { + username string + workspaceName string + agentName string + templateName string - if i-1 != len(m.GetLabels()) { - _ = sbLabels.WriteByte(',') - } + metricName string + labelsStr string +} + +func (m1 metricKey) Equal(m2 metricKey) bool { + return m1.metricName == m2.metricName && + m1.labelsStr == m2.labelsStr && + m1.username == m2.username && + m1.workspaceName == m2.workspaceName && + m1.agentName == m2.agentName && + m1.templateName == m2.templateName +} + +func hashKey(req *updateRequest, m *agentproto.Stats_Metric) metricKey { + var sb strings.Builder + for _, label := range m.GetLabels() { + _, _ = sb.WriteString(label.Name) + _ = sb.WriteByte('=') + _, _ = sb.WriteString(label.Value) + _ = sb.WriteByte(',') + } + labels := strings.TrimRight(sb.String(), ",") + + return metricKey{ + username: req.username, + workspaceName: req.workspaceName, + agentName: req.agentName, + templateName: req.templateName, + metricName: m.Name, + labelsStr: labels, } - return fmt.Sprintf("%s|%s|%s|%s|%s|%s", req.username, req.workspaceName, req.agentName, req.templateName, m.GetName(), sbLabels.String()) } var _ prometheus.Collector = new(MetricsAggregator) func (am *annotatedMetric) is(req updateRequest, m *agentproto.Stats_Metric) bool { - return am.username == req.username && + return am.Name == m.Name && + agentproto.LabelsEqual(am.Labels, m.Labels) && + am.username == req.username && am.workspaceName == req.workspaceName && am.agentName == req.agentName && - am.templateName == req.templateName && - am.Name == m.Name && - agentproto.LabelsEqual(am.Labels, m.Labels) + am.templateName == req.templateName } func (am *annotatedMetric) asPrometheus() (prometheus.Metric, error) { @@ -157,7 +181,7 @@ func NewMetricsAggregator(logger slog.Logger, registerer prometheus.Registerer, log: logger.Named(loggerName), metricsCleanupInterval: metricsCleanupInterval, - store: map[string]annotatedMetric{}, + store: map[metricKey]annotatedMetric{}, collectCh: make(chan (chan []prometheus.Metric), sizeCollectCh), updateCh: make(chan updateRequest, sizeUpdateCh), From 628e4ec04d35dccca8a5a405ef9b4bf02fc9683a Mon Sep 17 00:00:00 2001 From: Marcin Tojek Date: Thu, 25 Jan 2024 15:23:25 +0100 Subject: [PATCH 5/7] Address PR comments --- coderd/prometheusmetrics/aggregator.go | 9 +++- coderd/prometheusmetrics/aggregator_test.go | 59 ++++++++++++++++++--- 2 files changed, 60 insertions(+), 8 deletions(-) diff --git a/coderd/prometheusmetrics/aggregator.go b/coderd/prometheusmetrics/aggregator.go index 1b9e6eea495be..d304cd794fb07 100644 --- a/coderd/prometheusmetrics/aggregator.go +++ b/coderd/prometheusmetrics/aggregator.go @@ -30,6 +30,8 @@ const ( defaultMetricsCleanupInterval = 2 * time.Minute ) +var MetricLabelValueEncoder = strings.NewReplacer("|", "\\|", ",", "\\,", "=", "\\=") + type MetricsAggregator struct { store map[metricKey]annotatedMetric @@ -88,9 +90,13 @@ func (m1 metricKey) Equal(m2 metricKey) bool { func hashKey(req *updateRequest, m *agentproto.Stats_Metric) metricKey { var sb strings.Builder for _, label := range m.GetLabels() { + if label.Value == "" { + continue + } + _, _ = sb.WriteString(label.Name) _ = sb.WriteByte('=') - _, _ = sb.WriteString(label.Value) + _, _ = sb.WriteString(MetricLabelValueEncoder.Replace(label.Value)) _ = sb.WriteByte(',') } labels := strings.TrimRight(sb.String(), ",") @@ -209,6 +215,7 @@ func (ma *MetricsAggregator) Run(ctx context.Context) func() { timer := prometheus.NewTimer(ma.updateHistogram) for _, m := range req.metrics { key := hashKey(&req, m) + if val, ok := ma.store[key]; ok { val.Stats_Metric.Value = m.Value val.expiryDate = req.timestamp.Add(ma.metricsCleanupInterval) diff --git a/coderd/prometheusmetrics/aggregator_test.go b/coderd/prometheusmetrics/aggregator_test.go index 74618be69715a..e1dea9c6a2dfa 100644 --- a/coderd/prometheusmetrics/aggregator_test.go +++ b/coderd/prometheusmetrics/aggregator_test.go @@ -71,6 +71,24 @@ func TestUpdateMetrics_MetricsDoNotExpire(t *testing.T) { {Name: "hello", Value: "world"}, }}, {Name: "d_gauge_four", Type: agentproto.Stats_Metric_GAUGE, Value: 6}, + {Name: "e_gauge_four", Type: agentproto.Stats_Metric_GAUGE, Value: 15, Labels: []*agentproto.Stats_Metric_Label{ + {Name: "foobar", Value: "Foo,ba=z"}, + {Name: "hello", Value: "wo,,r=d"}, + }}, + {Name: "f_gauge_four", Type: agentproto.Stats_Metric_GAUGE, Value: 6, Labels: []*agentproto.Stats_Metric_Label{ + {Name: "foobar", Value: "foobaz"}, + {Name: "empty", Value: ""}, + }}, + } + + given3 := []*agentproto.Stats_Metric{ + {Name: "e_gauge_four", Type: agentproto.Stats_Metric_GAUGE, Value: 17, Labels: []*agentproto.Stats_Metric_Label{ + {Name: "cat", Value: "do,=g"}, + {Name: "hello", Value: "wo,,rld"}, + }}, + {Name: "f_gauge_four", Type: agentproto.Stats_Metric_GAUGE, Value: 8, Labels: []*agentproto.Stats_Metric_Label{ + {Name: "foobar", Value: "foobaz"}, + }}, } commonLabels := []*agentproto.Stats_Metric_Label{ @@ -99,11 +117,35 @@ func TestUpdateMetrics_MetricsDoNotExpire(t *testing.T) { }}, {Name: "c_gauge_three", Type: agentproto.Stats_Metric_GAUGE, Value: 5, Labels: commonLabels}, {Name: "d_gauge_four", Type: agentproto.Stats_Metric_GAUGE, Value: 6, Labels: commonLabels}, + {Name: "e_gauge_four", Type: agentproto.Stats_Metric_GAUGE, Value: 17, Labels: []*agentproto.Stats_Metric_Label{ + {Name: "agent_name", Value: testAgentName}, + {Name: "cat", Value: "do,=g"}, + {Name: "hello", Value: "wo,,rld"}, + {Name: "username", Value: testUsername}, + {Name: "workspace_name", Value: testWorkspaceName}, + {Name: "template_name", Value: testTemplateName}, + }}, + {Name: "e_gauge_four", Type: agentproto.Stats_Metric_GAUGE, Value: 15, Labels: []*agentproto.Stats_Metric_Label{ + {Name: "agent_name", Value: testAgentName}, + {Name: "foobar", Value: "Foo,ba=z"}, + {Name: "hello", Value: "wo,,r=d"}, + {Name: "username", Value: testUsername}, + {Name: "workspace_name", Value: testWorkspaceName}, + {Name: "template_name", Value: testTemplateName}, + }}, + {Name: "f_gauge_four", Type: agentproto.Stats_Metric_GAUGE, Value: 8, Labels: []*agentproto.Stats_Metric_Label{ + {Name: "agent_name", Value: testAgentName}, + {Name: "foobar", Value: "foobaz"}, + {Name: "username", Value: testUsername}, + {Name: "workspace_name", Value: testWorkspaceName}, + {Name: "template_name", Value: testTemplateName}, + }}, } // when metricsAggregator.Update(ctx, testLabels, given1) metricsAggregator.Update(ctx, testLabels, given2) + metricsAggregator.Update(ctx, testLabels, given3) // then require.Eventually(t, func() bool { @@ -178,21 +220,24 @@ func prometheusMetricToString(t *testing.T, m prometheus.Metric) string { return dtoLabels[i].Name < dtoLabels[j].Name }) - for i, dtoLabel := range dtoLabels { + for _, dtoLabel := range dtoLabels { + if dtoLabel.Value == "" { + continue + } _, _ = sb.WriteString(dtoLabel.Name) _ = sb.WriteByte('=') - _, _ = sb.WriteString(dtoLabel.Value) - - if i-1 != len(dtoLabels) { - _ = sb.WriteByte(',') - } + _, _ = sb.WriteString(prometheusmetrics.MetricLabelValueEncoder.Replace(dtoLabel.Value)) } - return sb.String() + return strings.TrimRight(sb.String(), ",") } func asMetricAgentLabels(dtoLabels []*dto.LabelPair) []*agentproto.Stats_Metric_Label { metricLabels := make([]*agentproto.Stats_Metric_Label, 0, len(dtoLabels)) for _, dtoLabel := range dtoLabels { + if dtoLabel.GetValue() == "" { + continue + } + metricLabels = append(metricLabels, &agentproto.Stats_Metric_Label{ Name: dtoLabel.GetName(), Value: dtoLabel.GetValue(), From bf48edc43971c9ba7ef11674db001ef082524b31 Mon Sep 17 00:00:00 2001 From: Marcin Tojek Date: Fri, 26 Jan 2024 09:51:12 +0100 Subject: [PATCH 6/7] Address Spike's feedback --- coderd/prometheusmetrics/aggregator.go | 20 +- .../aggregator_internal_test.go | 210 ------------------ coderd/prometheusmetrics/aggregator_test.go | 4 +- 3 files changed, 4 insertions(+), 230 deletions(-) delete mode 100644 coderd/prometheusmetrics/aggregator_internal_test.go diff --git a/coderd/prometheusmetrics/aggregator.go b/coderd/prometheusmetrics/aggregator.go index d304cd794fb07..82e5d12e03c30 100644 --- a/coderd/prometheusmetrics/aggregator.go +++ b/coderd/prometheusmetrics/aggregator.go @@ -30,7 +30,7 @@ const ( defaultMetricsCleanupInterval = 2 * time.Minute ) -var MetricLabelValueEncoder = strings.NewReplacer("|", "\\|", ",", "\\,", "=", "\\=") +var MetricLabelValueEncoder = strings.NewReplacer("\\", "\\\\", "|", "\\|", ",", "\\,", "=", "\\=") type MetricsAggregator struct { store map[metricKey]annotatedMetric @@ -78,15 +78,6 @@ type metricKey struct { labelsStr string } -func (m1 metricKey) Equal(m2 metricKey) bool { - return m1.metricName == m2.metricName && - m1.labelsStr == m2.labelsStr && - m1.username == m2.username && - m1.workspaceName == m2.workspaceName && - m1.agentName == m2.agentName && - m1.templateName == m2.templateName -} - func hashKey(req *updateRequest, m *agentproto.Stats_Metric) metricKey { var sb strings.Builder for _, label := range m.GetLabels() { @@ -113,15 +104,6 @@ func hashKey(req *updateRequest, m *agentproto.Stats_Metric) metricKey { var _ prometheus.Collector = new(MetricsAggregator) -func (am *annotatedMetric) is(req updateRequest, m *agentproto.Stats_Metric) bool { - return am.Name == m.Name && - agentproto.LabelsEqual(am.Labels, m.Labels) && - am.username == req.username && - am.workspaceName == req.workspaceName && - am.agentName == req.agentName && - am.templateName == req.templateName -} - func (am *annotatedMetric) asPrometheus() (prometheus.Metric, error) { labels := make([]string, 0, len(agentMetricsLabels)+len(am.Labels)) labelValues := make([]string, 0, len(agentMetricsLabels)+len(am.Labels)) diff --git a/coderd/prometheusmetrics/aggregator_internal_test.go b/coderd/prometheusmetrics/aggregator_internal_test.go deleted file mode 100644 index 8830e1b1afc30..0000000000000 --- a/coderd/prometheusmetrics/aggregator_internal_test.go +++ /dev/null @@ -1,210 +0,0 @@ -package prometheusmetrics - -import ( - "testing" - "time" - - "github.com/stretchr/testify/require" - - "github.com/coder/coder/v2/agent/proto" -) - -func TestAnnotatedMetric_Is(t *testing.T) { - t.Parallel() - am1 := &annotatedMetric{ - Stats_Metric: &proto.Stats_Metric{ - Name: "met", - Type: proto.Stats_Metric_COUNTER, - Value: 1, - Labels: []*proto.Stats_Metric_Label{ - {Name: "rarity", Value: "blue moon"}, - {Name: "certainty", Value: "yes"}, - }, - }, - username: "spike", - workspaceName: "work", - agentName: "janus", - templateName: "tempe", - expiryDate: time.Now(), - } - for _, tc := range []struct { - name string - req updateRequest - m *proto.Stats_Metric - is bool - }{ - { - name: "OK", - req: updateRequest{ - username: "spike", - workspaceName: "work", - agentName: "janus", - templateName: "tempe", - metrics: nil, - timestamp: time.Now().Add(-5 * time.Second), - }, - m: &proto.Stats_Metric{ - Name: "met", - Type: proto.Stats_Metric_COUNTER, - Value: 2, - Labels: []*proto.Stats_Metric_Label{ - {Name: "rarity", Value: "blue moon"}, - {Name: "certainty", Value: "yes"}, - }, - }, - is: true, - }, - { - name: "missingLabel", - req: updateRequest{ - username: "spike", - workspaceName: "work", - agentName: "janus", - templateName: "tempe", - metrics: nil, - timestamp: time.Now().Add(-5 * time.Second), - }, - m: &proto.Stats_Metric{ - Name: "met", - Type: proto.Stats_Metric_COUNTER, - Value: 2, - Labels: []*proto.Stats_Metric_Label{ - {Name: "certainty", Value: "yes"}, - }, - }, - is: false, - }, - { - name: "wrongLabelValue", - req: updateRequest{ - username: "spike", - workspaceName: "work", - agentName: "janus", - templateName: "tempe", - metrics: nil, - timestamp: time.Now().Add(-5 * time.Second), - }, - m: &proto.Stats_Metric{ - Name: "met", - Type: proto.Stats_Metric_COUNTER, - Value: 2, - Labels: []*proto.Stats_Metric_Label{ - {Name: "rarity", Value: "blue moon"}, - {Name: "certainty", Value: "inshallah"}, - }, - }, - is: false, - }, - { - name: "wrongMetricName", - req: updateRequest{ - username: "spike", - workspaceName: "work", - agentName: "janus", - templateName: "tempe", - metrics: nil, - timestamp: time.Now().Add(-5 * time.Second), - }, - m: &proto.Stats_Metric{ - Name: "cub", - Type: proto.Stats_Metric_COUNTER, - Value: 2, - Labels: []*proto.Stats_Metric_Label{ - {Name: "rarity", Value: "blue moon"}, - {Name: "certainty", Value: "yes"}, - }, - }, - is: false, - }, - { - name: "wrongUsername", - req: updateRequest{ - username: "steve", - workspaceName: "work", - agentName: "janus", - templateName: "tempe", - metrics: nil, - timestamp: time.Now().Add(-5 * time.Second), - }, - m: &proto.Stats_Metric{ - Name: "met", - Type: proto.Stats_Metric_COUNTER, - Value: 2, - Labels: []*proto.Stats_Metric_Label{ - {Name: "rarity", Value: "blue moon"}, - {Name: "certainty", Value: "yes"}, - }, - }, - is: false, - }, - { - name: "wrongWorkspaceName", - req: updateRequest{ - username: "spike", - workspaceName: "play", - agentName: "janus", - templateName: "tempe", - metrics: nil, - timestamp: time.Now().Add(-5 * time.Second), - }, - m: &proto.Stats_Metric{ - Name: "met", - Type: proto.Stats_Metric_COUNTER, - Value: 2, - Labels: []*proto.Stats_Metric_Label{ - {Name: "rarity", Value: "blue moon"}, - {Name: "certainty", Value: "yes"}, - }, - }, - is: false, - }, - { - name: "wrongAgentName", - req: updateRequest{ - username: "spike", - workspaceName: "work", - agentName: "bond", - templateName: "tempe", - metrics: nil, - timestamp: time.Now().Add(-5 * time.Second), - }, - m: &proto.Stats_Metric{ - Name: "met", - Type: proto.Stats_Metric_COUNTER, - Value: 2, - Labels: []*proto.Stats_Metric_Label{ - {Name: "rarity", Value: "blue moon"}, - {Name: "certainty", Value: "yes"}, - }, - }, - is: false, - }, - { - name: "wrongTemplateName", - req: updateRequest{ - username: "spike", - workspaceName: "work", - agentName: "janus", - templateName: "phoenix", - metrics: nil, - timestamp: time.Now().Add(-5 * time.Second), - }, - m: &proto.Stats_Metric{ - Name: "met", - Type: proto.Stats_Metric_COUNTER, - Value: 2, - Labels: []*proto.Stats_Metric_Label{ - {Name: "rarity", Value: "blue moon"}, - {Name: "certainty", Value: "yes"}, - }, - }, - is: false, - }, - } { - tc := tc - t.Run(tc.name, func(t *testing.T) { - t.Parallel() - require.Equal(t, tc.is, am1.is(tc.req, tc.m)) - }) - } -} diff --git a/coderd/prometheusmetrics/aggregator_test.go b/coderd/prometheusmetrics/aggregator_test.go index e1dea9c6a2dfa..bc17dc9be7a11 100644 --- a/coderd/prometheusmetrics/aggregator_test.go +++ b/coderd/prometheusmetrics/aggregator_test.go @@ -73,11 +73,12 @@ func TestUpdateMetrics_MetricsDoNotExpire(t *testing.T) { {Name: "d_gauge_four", Type: agentproto.Stats_Metric_GAUGE, Value: 6}, {Name: "e_gauge_four", Type: agentproto.Stats_Metric_GAUGE, Value: 15, Labels: []*agentproto.Stats_Metric_Label{ {Name: "foobar", Value: "Foo,ba=z"}, + {Name: "halo", Value: "wor\\,d=1,e=\\,2"}, {Name: "hello", Value: "wo,,r=d"}, }}, {Name: "f_gauge_four", Type: agentproto.Stats_Metric_GAUGE, Value: 6, Labels: []*agentproto.Stats_Metric_Label{ - {Name: "foobar", Value: "foobaz"}, {Name: "empty", Value: ""}, + {Name: "foobar", Value: "foobaz"}, }}, } @@ -128,6 +129,7 @@ func TestUpdateMetrics_MetricsDoNotExpire(t *testing.T) { {Name: "e_gauge_four", Type: agentproto.Stats_Metric_GAUGE, Value: 15, Labels: []*agentproto.Stats_Metric_Label{ {Name: "agent_name", Value: testAgentName}, {Name: "foobar", Value: "Foo,ba=z"}, + {Name: "halo", Value: "wor\\,d=1,e=\\,2"}, {Name: "hello", Value: "wo,,r=d"}, {Name: "username", Value: testUsername}, {Name: "workspace_name", Value: testWorkspaceName}, From 4d0d8891c0bd761ed50cf1c966eff470bc495995 Mon Sep 17 00:00:00 2001 From: Marcin Tojek Date: Fri, 26 Jan 2024 10:25:04 +0100 Subject: [PATCH 7/7] fix test --- coderd/prometheusmetrics/aggregator.go | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/coderd/prometheusmetrics/aggregator.go b/coderd/prometheusmetrics/aggregator.go index 82e5d12e03c30..40ad6c7b2f621 100644 --- a/coderd/prometheusmetrics/aggregator.go +++ b/coderd/prometheusmetrics/aggregator.go @@ -2,6 +2,8 @@ package prometheusmetrics import ( "context" + "fmt" + "sort" "strings" "time" @@ -79,26 +81,21 @@ type metricKey struct { } func hashKey(req *updateRequest, m *agentproto.Stats_Metric) metricKey { - var sb strings.Builder + labelPairs := make(sort.StringSlice, 0, len(m.GetLabels())) for _, label := range m.GetLabels() { if label.Value == "" { continue } - - _, _ = sb.WriteString(label.Name) - _ = sb.WriteByte('=') - _, _ = sb.WriteString(MetricLabelValueEncoder.Replace(label.Value)) - _ = sb.WriteByte(',') + labelPairs = append(labelPairs, fmt.Sprintf("%s=%s", label.Name, MetricLabelValueEncoder.Replace(label.Value))) } - labels := strings.TrimRight(sb.String(), ",") - + labelPairs.Sort() return metricKey{ username: req.username, workspaceName: req.workspaceName, agentName: req.agentName, templateName: req.templateName, metricName: m.Name, - labelsStr: labels, + labelsStr: strings.Join(labelPairs, ","), } }