Skip to content

Commit b0c4e75

Browse files
johnstcnmafredri
andauthored
feat(support): add client magicsock and agent prometheus metrics to support bundle (coder#12604)
* feat(codersdk): add ability to fetch prometheus metrics directly from agent * feat(support): add client magicsock and agent prometheus metrics to support bundle * refactor(support): simplify AgentInfo control flow Co-authored-by: Mathias Fredriksson <mafredri@gmail.com>
1 parent 4d9e6c0 commit b0c4e75

File tree

8 files changed

+183
-104
lines changed

8 files changed

+183
-104
lines changed

agent/agent.go

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import (
2525
"github.com/go-chi/chi/v5"
2626
"github.com/google/uuid"
2727
"github.com/prometheus/client_golang/prometheus"
28+
"github.com/prometheus/common/expfmt"
2829
"github.com/spf13/afero"
2930
"go.uber.org/atomic"
3031
"golang.org/x/exp/slices"
@@ -34,6 +35,7 @@ import (
3435
"tailscale.com/net/speedtest"
3536
"tailscale.com/tailcfg"
3637
"tailscale.com/types/netlogtype"
38+
"tailscale.com/util/clientmetric"
3739

3840
"cdr.dev/slog"
3941
"github.com/coder/retry"
@@ -1980,3 +1982,26 @@ func (a *apiConnRoutineManager) start(name string, b gracefulShutdownBehavior, f
19801982
func (a *apiConnRoutineManager) wait() error {
19811983
return a.eg.Wait()
19821984
}
1985+
1986+
func PrometheusMetricsHandler(prometheusRegistry *prometheus.Registry, logger slog.Logger) http.Handler {
1987+
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
1988+
w.Header().Set("Content-Type", "text/plain")
1989+
1990+
// Based on: https://github.com/tailscale/tailscale/blob/280255acae604796a1113861f5a84e6fa2dc6121/ipn/localapi/localapi.go#L489
1991+
clientmetric.WritePrometheusExpositionFormat(w)
1992+
1993+
metricFamilies, err := prometheusRegistry.Gather()
1994+
if err != nil {
1995+
logger.Error(context.Background(), "prometheus handler failed to gather metric families", slog.Error(err))
1996+
return
1997+
}
1998+
1999+
for _, metricFamily := range metricFamilies {
2000+
_, err = expfmt.MetricFamilyToText(w, metricFamily)
2001+
if err != nil {
2002+
logger.Error(context.Background(), "expfmt.MetricFamilyToText failed", slog.Error(err))
2003+
return
2004+
}
2005+
}
2006+
})
2007+
}

agent/api.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,11 +35,13 @@ func (a *agent) apiHandler() http.Handler {
3535
ignorePorts: cpy,
3636
cacheDuration: cacheDuration,
3737
}
38+
promHandler := PrometheusMetricsHandler(a.prometheusRegistry, a.logger)
3839
r.Get("/api/v0/listening-ports", lp.handler)
3940
r.Get("/debug/logs", a.HandleHTTPDebugLogs)
4041
r.Get("/debug/magicsock", a.HandleHTTPDebugMagicsock)
4142
r.Get("/debug/magicsock/debug-logging/{state}", a.HandleHTTPMagicsockDebugLoggingState)
4243
r.Get("/debug/manifest", a.HandleHTTPDebugManifest)
44+
r.Get("/debug/prometheus", promHandler.ServeHTTP)
4345

4446
return r
4547
}

cli/agent.go

Lines changed: 2 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,8 @@ import (
1818
"cloud.google.com/go/compute/metadata"
1919
"golang.org/x/xerrors"
2020
"gopkg.in/natefinch/lumberjack.v2"
21-
"tailscale.com/util/clientmetric"
2221

2322
"github.com/prometheus/client_golang/prometheus"
24-
"github.com/prometheus/common/expfmt"
2523

2624
"cdr.dev/slog"
2725
"cdr.dev/slog/sloggers/sloghuman"
@@ -315,7 +313,8 @@ func (r *RootCmd) workspaceAgent() *clibase.Cmd {
315313
ModifiedProcesses: nil,
316314
})
317315

318-
prometheusSrvClose := ServeHandler(ctx, logger, prometheusMetricsHandler(prometheusRegistry, logger), prometheusAddress, "prometheus")
316+
promHandler := agent.PrometheusMetricsHandler(prometheusRegistry, logger)
317+
prometheusSrvClose := ServeHandler(ctx, logger, promHandler, prometheusAddress, "prometheus")
319318
defer prometheusSrvClose()
320319

321320
debugSrvClose := ServeHandler(ctx, logger, agnt.HTTPDebug(), debugAddress, "debug")
@@ -501,26 +500,3 @@ func urlPort(u string) (int, error) {
501500
}
502501
return -1, xerrors.Errorf("invalid port: %s", u)
503502
}
504-
505-
func prometheusMetricsHandler(prometheusRegistry *prometheus.Registry, logger slog.Logger) http.Handler {
506-
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
507-
w.Header().Set("Content-Type", "text/plain")
508-
509-
// Based on: https://github.com/tailscale/tailscale/blob/280255acae604796a1113861f5a84e6fa2dc6121/ipn/localapi/localapi.go#L489
510-
clientmetric.WritePrometheusExpositionFormat(w)
511-
512-
metricFamilies, err := prometheusRegistry.Gather()
513-
if err != nil {
514-
logger.Error(context.Background(), "Prometheus handler can't gather metric families", slog.Error(err))
515-
return
516-
}
517-
518-
for _, metricFamily := range metricFamilies {
519-
_, err = expfmt.MetricFamilyToText(w, metricFamily)
520-
if err != nil {
521-
logger.Error(context.Background(), "expfmt.MetricFamilyToText failed", slog.Error(err))
522-
return
523-
}
524-
}
525-
})
526-
}

cli/support.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -176,8 +176,10 @@ func writeBundle(src *support.Bundle, dest *zip.Writer) error {
176176
"network/tailnet_debug.html": src.Network.TailnetDebug,
177177
"workspace/build_logs.txt": humanizeBuildLogs(src.Workspace.BuildLogs),
178178
"agent/logs.txt": string(src.Agent.Logs),
179-
"agent/magicsock.html": string(src.Agent.MagicsockHTML),
179+
"agent/agent_magicsock.html": string(src.Agent.AgentMagicsockHTML),
180+
"agent/client_magicsock.html": string(src.Agent.ClientMagicsockHTML),
180181
"agent/startup_logs.txt": humanizeAgentLogs(src.Agent.StartupLogs),
182+
"agent/prometheus.txt": string(src.Agent.Prometheus),
181183
"workspace/template_file.zip": string(templateVersionBytes),
182184
"logs.txt": strings.Join(src.Logs, "\n"),
183185
} {

cli/support_test.go

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -177,9 +177,12 @@ func assertBundleContents(t *testing.T, path string) {
177177
case "agent/logs.txt":
178178
bs := readBytesFromZip(t, f)
179179
require.NotEmpty(t, bs, "logs should not be empty")
180-
case "agent/magicsock.html":
180+
case "agent/agent_magicsock.html":
181181
bs := readBytesFromZip(t, f)
182182
require.NotEmpty(t, bs, "agent magicsock should not be empty")
183+
case "agent/client_magicsock.html":
184+
bs := readBytesFromZip(t, f)
185+
require.NotEmpty(t, bs, "client magicsock should not be empty")
183186
case "agent/manifest.json":
184187
var v agentsdk.Manifest
185188
decodeJSONFromZip(t, f, &v)
@@ -192,6 +195,9 @@ func assertBundleContents(t *testing.T, path string) {
192195
var v *ipnstate.PingResult
193196
decodeJSONFromZip(t, f, &v)
194197
require.NotEmpty(t, v, "ping result should not be empty")
198+
case "agent/prometheus.txt":
199+
bs := readBytesFromZip(t, f)
200+
require.NotEmpty(t, bs, "agent prometheus metrics should not be empty")
195201
case "agent/startup_logs.txt":
196202
bs := readBytesFromZip(t, f)
197203
require.Contains(t, string(bs), "started up")

codersdk/workspaceagentconn.go

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -364,6 +364,9 @@ func (c *WorkspaceAgentConn) DebugMagicsock(ctx context.Context) ([]byte, error)
364364
if err != nil {
365365
return nil, xerrors.Errorf("do request: %w", err)
366366
}
367+
if res.StatusCode != http.StatusOK {
368+
return nil, ReadBodyAsError(res)
369+
}
367370
defer res.Body.Close()
368371
bs, err := io.ReadAll(res.Body)
369372
if err != nil {
@@ -382,6 +385,9 @@ func (c *WorkspaceAgentConn) DebugManifest(ctx context.Context) ([]byte, error)
382385
return nil, xerrors.Errorf("do request: %w", err)
383386
}
384387
defer res.Body.Close()
388+
if res.StatusCode != http.StatusOK {
389+
return nil, ReadBodyAsError(res)
390+
}
385391
bs, err := io.ReadAll(res.Body)
386392
if err != nil {
387393
return nil, xerrors.Errorf("read response body: %w", err)
@@ -398,6 +404,28 @@ func (c *WorkspaceAgentConn) DebugLogs(ctx context.Context) ([]byte, error) {
398404
return nil, xerrors.Errorf("do request: %w", err)
399405
}
400406
defer res.Body.Close()
407+
if res.StatusCode != http.StatusOK {
408+
return nil, ReadBodyAsError(res)
409+
}
410+
bs, err := io.ReadAll(res.Body)
411+
if err != nil {
412+
return nil, xerrors.Errorf("read response body: %w", err)
413+
}
414+
return bs, nil
415+
}
416+
417+
// PrometheusMetrics returns a response from the agent's prometheus metrics endpoint
418+
func (c *WorkspaceAgentConn) PrometheusMetrics(ctx context.Context) ([]byte, error) {
419+
ctx, span := tracing.StartSpan(ctx)
420+
defer span.End()
421+
res, err := c.apiRequest(ctx, http.MethodGet, "/debug/prometheus", nil)
422+
if err != nil {
423+
return nil, xerrors.Errorf("do request: %w", err)
424+
}
425+
defer res.Body.Close()
426+
if res.StatusCode != http.StatusOK {
427+
return nil, ReadBodyAsError(res)
428+
}
401429
bs, err := io.ReadAll(res.Body)
402430
if err != nil {
403431
return nil, xerrors.Errorf("read response body: %w", err)

0 commit comments

Comments
 (0)