Metrics aggregator with channels

coder · mtojek · Apr 27, 2023 · Apr 24, 2023 · Apr 24, 2023 · Apr 24, 2023
commit 10e6d8d778ff5849ae847beea3c0a4287f2a4233
diff --git a/cli/server.go b/cli/server.go
@@ -724,9 +724,12 @@ func (r *RootCmd) Server(newAPI func(context.Context, *coderd.Options) (*coderd.
 					}
 					defer closeAgentStatsFunc()
 
-					var metricsAggregator prometheusmetrics.MetricsAggregator
+					metricsAggregator := prometheusmetrics.NewMetricsAggregator(logger)
+					cancelMetricsAggregator := metricsAggregator.Run(ctx)
+					defer cancelMetricsAggregator()
+
 					options.UpdateAgentMetrics = metricsAggregator.Update
-					options.PrometheusRegistry.MustRegister(&metricsAggregator)
+					options.PrometheusRegistry.MustRegister(metricsAggregator)
 				}
 
 				//nolint:revive

diff --git a/coderd/prometheusmetrics/aggregator.go b/coderd/prometheusmetrics/aggregator.go
@@ -2,7 +2,6 @@ package prometheusmetrics
 
 import (
 	"context"
-	"sync"
 
 	"github.com/prometheus/client_golang/prometheus"
 	"golang.org/x/xerrors"
@@ -20,10 +19,26 @@ const (
 	metricHelpForAgent = "Metric is forwarded from workspace agent connected to this instance of coderd."
-	metricHelpForAgent = "Metric is forwarded from workspace agent connected to this instance of coderd."
+	metricHelpForAgent = "Metrics are forwarded from workspace agents connected to this instance of coderd."
-	metricHelpForAgent = "Metric is forwarded from workspace agent connected to this instance of coderd."
+	metricHelpForAgent = "Metrics are forwarded from workspace agents connected to this instance of coderd."
 )
 
+const (
+	sizeCollectCh = 10
+	sizeUpdateCh  = 1024
+)
+
 type MetricsAggregator struct {
-	m     sync.Mutex
-	log   slog.Logger
 	queue []annotatedMetric
+
+	log slog.Logger
+
+	collectCh chan (chan<- prometheus.Metric)
+	updateCh  chan updateRequest
+}
+
+type updateRequest struct {
+	username      string
+	workspaceName string
+	agentName     string
+
+	metrics []agentsdk.AgentMetric
 }
 
 type annotatedMetric struct {
@@ -36,6 +51,66 @@ type annotatedMetric struct {
 
 var _ prometheus.Collector = new(MetricsAggregator)
 
+func NewMetricsAggregator(logger slog.Logger) *MetricsAggregator {
+	return &MetricsAggregator{
+		log: logger,
+
+		collectCh: make(chan (chan<- prometheus.Metric), sizeCollectCh),
+		updateCh:  make(chan updateRequest, sizeUpdateCh),
+	}
+}
+
+func (ma *MetricsAggregator) Run(ctx context.Context) func() {
+	ctx, cancelFunc := context.WithCancel(ctx)
+	done := make(chan struct{})
+
+	go func() {
+		defer close(done)
+
+		for {
+			select {
+			case req := <-ma.updateCh:
+			UpdateLoop:
+				for _, m := range req.metrics {
+					for i, q := range ma.queue {
+						if q.username == req.username && q.workspaceName == req.workspaceName && q.agentName == req.agentName && q.Name == m.Name {
+							ma.queue[i].AgentMetric.Value = m.Value
+							continue UpdateLoop
+						}
+					}
+
+					ma.queue = append(ma.queue, annotatedMetric{
+						username:      req.username,
+						workspaceName: req.workspaceName,
+						agentName:     req.agentName,
+
+						AgentMetric: m,
+					})
+				}
+			case inputCh := <-ma.collectCh:
+				for _, m := range ma.queue {
+					desc := prometheus.NewDesc(m.Name, metricHelpForAgent, agentMetricsLabels, nil)
+					valueType, err := asPrometheusValueType(m.Type)
+					if err != nil {
+						ma.log.Error(ctx, "can't convert Prometheus value type", slog.F("value_type", m.Type), slog.Error(err))
+						continue
+					}
+					constMetric := prometheus.MustNewConstMetric(desc, valueType, m.Value, m.username, m.workspaceName, m.agentName)
+					inputCh <- constMetric
+				}
+				close(inputCh)
+			case <-ctx.Done():
+				ma.log.Debug(ctx, "metrics aggregator: is stopped")
+				return
+			}
+		}
+	}()
+	return func() {
+		cancelFunc()
+		<-done
+	}
+}
+
 // Describe function does not have any knowledge about the metrics schema,
 // so it does not emit anything.
 func (*MetricsAggregator) Describe(_ chan<- *prometheus.Desc) {
@@ -44,42 +119,32 @@ func (*MetricsAggregator) Describe(_ chan<- *prometheus.Desc) {
 var agentMetricsLabels = []string{usernameLabel, workspaceNameLabel, agentNameLabel}
 
 func (ma *MetricsAggregator) Collect(ch chan<- prometheus.Metric) {
-	ma.m.Lock()
-	defer ma.m.Unlock()
-
-	for _, m := range ma.queue {
-		desc := prometheus.NewDesc(m.Name, metricHelpForAgent, agentMetricsLabels, nil)
-		valueType, err := asPrometheusValueType(m.Type)
-		if err != nil {
-			ma.log.Error(context.Background(), "can't convert Prometheus value type", slog.F("value_type", m.Type), slog.Error(err))
-		}
-		constMetric := prometheus.MustNewConstMetric(desc, valueType, m.Value, m.username, m.workspaceName, m.agentName)
-		ch <- constMetric
-	}
-}
-
-// TODO Run function with done channel
+	collect := make(chan prometheus.Metric, 128)
 
-func (ma *MetricsAggregator) Update(_ context.Context, username, workspaceName, agentName string, metrics []agentsdk.AgentMetric) {
-	ma.m.Lock()
-	defer ma.m.Unlock()
-
-UpdateLoop:
-	for _, m := range metrics {
-		for i, q := range ma.queue {
-			if q.username == username && q.workspaceName == workspaceName && q.agentName == agentName && q.Name == m.Name {
-				ma.queue[i].AgentMetric.Value = m.Value
-				continue UpdateLoop
-			}
-		}
+	select {
+	case ma.collectCh <- collect:
+	default:
+		ma.log.Error(context.Background(), "metrics aggregator: collect queue is full")
+		return
+	}
 
-		ma.queue = append(ma.queue, annotatedMetric{
-			username:      username,
-			workspaceName: workspaceName,
-			agentName:     agentName,
+	for m := range collect {
+		ch <- m
+	}
+}
 
-			AgentMetric: m,
-		})
+func (ma *MetricsAggregator) Update(ctx context.Context, username, workspaceName, agentName string, metrics []agentsdk.AgentMetric) {
+	select {
+	case ma.updateCh <- updateRequest{
+		username:      username,
+		workspaceName: workspaceName,
+		agentName:     agentName,
+		metrics:       metrics,
+	}:
+	case <-ctx.Done():
+		ma.log.Debug(ctx, "metrics aggregator: update is canceled")
+	default:
+		ma.log.Error(ctx, "metrics aggregator: update queue is full")
 	}
 }