Skip to content

feat: Implement aggregator for agent metrics #7259

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 46 commits into from
Apr 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
46 commits
Select commit Hold shift + click to select a range
6516216
API contract
mtojek Apr 24, 2023
dc202c4
Send agent metrics
mtojek Apr 24, 2023
7747f2d
Ignore metrics to save bandwidth
mtojek Apr 24, 2023
9fd4ddb
fix lint
mtojek Apr 24, 2023
9af0246
logEntry
mtojek Apr 24, 2023
4207dff
make gen
mtojek Apr 24, 2023
99fe1bf
Use errGroup
mtojek Apr 24, 2023
df80e9b
Use MustNewConstMetric
mtojek Apr 25, 2023
d86496e
PoC works
mtojek Apr 25, 2023
10e6d8d
Metrics aggregator with channels
mtojek Apr 25, 2023
8df9eea
Metrics expiry
mtojek Apr 25, 2023
1f5273b
histograms
mtojek Apr 25, 2023
1b8c486
unit test
mtojek Apr 26, 2023
423420b
fmt
mtojek Apr 26, 2023
23bbe94
test: metrics can expire
mtojek Apr 26, 2023
b7011ae
Aggregator
mtojek Apr 26, 2023
29a8702
Address PR comments
mtojek Apr 26, 2023
7acd113
wrap errors
mtojek Apr 26, 2023
b15c7b7
fix
mtojek Apr 26, 2023
2ae7e4e
Update coderd/prometheusmetrics/aggregator.go
mtojek Apr 27, 2023
b04d232
refactor: PTY & SSH (#7100)
spikecurtis Apr 24, 2023
1d93f66
feat(community-templates): Added vscode-server-template (#7219)
nanospearing Apr 24, 2023
c604633
chore: Proxy health status checks + endpoint (#7233)
Emyrk Apr 24, 2023
7d84745
Revert "feat(UI): add workspace restart button (#7137)" (#7268)
Kira-Pilot Apr 24, 2023
407c332
refactor(site): Group app and agent actions together (#7267)
BrunoQuaresma Apr 24, 2023
49b81df
fix(coderd): ensure that user API keys are deleted when a user is (#7…
johnstcn Apr 24, 2023
44217de
chore(dogfood): remove unnecessary docker host replace (#7269)
coadler Apr 25, 2023
e659c36
Fix macOS pty race with dropped output (#7278)
spikecurtis Apr 25, 2023
6dc8b1f
feat: add regions endpoint for proxies feature (#7277)
deansheather Apr 25, 2023
d2233be
fix(healthcheck): don't allow panics to exit coderd (#7276)
coadler Apr 25, 2023
f3f5bed
chore: add security advisories to docs (#7282)
johnstcn Apr 25, 2023
50f60cb
fix(site): Do not show template params if there is no param to be dis…
BrunoQuaresma Apr 25, 2023
1bf1b06
fix(site): Fix default value for options (#7265)
BrunoQuaresma Apr 25, 2023
5f6b4dc
chore: fix flake in apptest reconnecting-pty test (#7281)
deansheather Apr 26, 2023
9141f7c
Reconnecting PTY waits for command output or EOF (#7279)
spikecurtis Apr 26, 2023
e0879b5
docs(site): Mention template editor in template edit docs (#7261)
BrunoQuaresma Apr 26, 2023
b6322d1
fix(site): Fix secondary buttons with popovers (#7296)
BrunoQuaresma Apr 26, 2023
1e3eb06
chore: change some wording in the dashboard (#7293)
bpmct Apr 26, 2023
366859b
feat(agent): add http debug routes for magicsock (#7287)
coadler Apr 26, 2023
ed8106d
feat: add license expiration warning (#7264)
rodrimaia Apr 26, 2023
5733abc
feat: add license settings UI (#7210)
rodrimaia Apr 26, 2023
4937e75
chore: add envbox documentation (#7198)
sreya Apr 26, 2023
619e470
docs: Fix relay link in HA doc (#7159)
winter0mute Apr 27, 2023
16b5353
Merge branch 'main' into 6724-api-collect-metrics
mtojek Apr 27, 2023
c1bd4d2
Refactor Collect channel
mtojek Apr 27, 2023
8baed98
fix
mtojek Apr 27, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 5 additions & 6 deletions agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ import (
"os"
"os/user"
"path/filepath"
"reflect"
"sort"
"strconv"
"strings"
Expand Down Expand Up @@ -1223,11 +1222,11 @@ func (a *agent) startReportingConnectionStats(ctx context.Context) {
// Convert from microseconds to milliseconds.
stats.ConnectionMedianLatencyMS /= 1000

lastStat := a.latestStat.Load()
if lastStat != nil && reflect.DeepEqual(lastStat, stats) {
a.logger.Info(ctx, "skipping stat because nothing changed")
return
}
// Collect agent metrics.
// Agent metrics are changing all the time, so there is no need to perform
// reflect.DeepEqual to see if stats should be transferred.
stats.Metrics = collectMetrics()

a.latestStat.Store(stats)

select {
Expand Down
52 changes: 52 additions & 0 deletions agent/metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
package agent

import (
"fmt"
"strings"

"tailscale.com/util/clientmetric"

"github.com/coder/coder/codersdk/agentsdk"
)

func collectMetrics() []agentsdk.AgentMetric {
// Tailscale metrics
metrics := clientmetric.Metrics()
collected := make([]agentsdk.AgentMetric, 0, len(metrics))
for _, m := range metrics {
if isIgnoredMetric(m.Name()) {
continue
}

collected = append(collected, agentsdk.AgentMetric{
Name: m.Name(),
Type: asMetricType(m.Type()),
Value: float64(m.Value()),
})
}
return collected
}

// isIgnoredMetric checks if the metric should be ignored, as Coder agent doesn't use related features.
// Expected metric families: magicsock_*, derp_*, tstun_*, netcheck_*, portmap_*, etc.
func isIgnoredMetric(metricName string) bool {
if strings.HasPrefix(metricName, "dns_") ||
strings.HasPrefix(metricName, "controlclient_") ||
strings.HasPrefix(metricName, "peerapi_") ||
strings.HasPrefix(metricName, "profiles_") ||
strings.HasPrefix(metricName, "tstun_") {
return true
}
return false
}

func asMetricType(typ clientmetric.Type) agentsdk.AgentMetricType {
switch typ {
case clientmetric.TypeGauge:
return agentsdk.AgentMetricTypeGauge
case clientmetric.TypeCounter:
return agentsdk.AgentMetricTypeCounter
default:
panic(fmt.Sprintf("unknown metric type: %d", typ))
}
}
14 changes: 14 additions & 0 deletions cli/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -723,6 +723,20 @@ func (r *RootCmd) Server(newAPI func(context.Context, *coderd.Options) (*coderd.
return xerrors.Errorf("register agent stats prometheus metric: %w", err)
}
defer closeAgentStatsFunc()

metricsAggregator, err := prometheusmetrics.NewMetricsAggregator(logger, options.PrometheusRegistry, 0)
if err != nil {
return xerrors.Errorf("can't initialize metrics aggregator: %w", err)
}

cancelMetricsAggregator := metricsAggregator.Run(ctx)
defer cancelMetricsAggregator()

options.UpdateAgentMetrics = metricsAggregator.Update
err = options.PrometheusRegistry.Register(metricsAggregator)
if err != nil {
return xerrors.Errorf("can't register metrics aggregator as collector: %w", err)
}
}

//nolint:revive
Expand Down
45 changes: 45 additions & 0 deletions coderd/apidoc/docs.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

32 changes: 32 additions & 0 deletions coderd/apidoc/swagger.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions coderd/coderd.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ import (
"cdr.dev/slog"

"github.com/coder/coder/buildinfo"
"github.com/coder/coder/codersdk/agentsdk"

// Used for swagger docs.
_ "github.com/coder/coder/coderd/apidoc"
"github.com/coder/coder/coderd/audit"
Expand Down Expand Up @@ -146,6 +148,8 @@ type Options struct {
SSHConfig codersdk.SSHConfigResponse

HTTPClient *http.Client

UpdateAgentMetrics func(ctx context.Context, username, workspaceName, agentName string, metrics []agentsdk.AgentMetric)
}

// @title Coder API
Expand Down
Loading