From 74dab0005cf34bd508e4e05014c101fe5d7d128d Mon Sep 17 00:00:00 2001 From: Cian Johnston Date: Fri, 15 Mar 2024 12:08:27 +0000 Subject: [PATCH 1/6] feat(codersdk): add ability to fetch prometheus metrics directly from agent --- agent/agent.go | 25 +++++++++++++++++++++++++ agent/api.go | 2 ++ cli/agent.go | 28 ++-------------------------- codersdk/workspaceagentconn.go | 28 ++++++++++++++++++++++++++++ 4 files changed, 57 insertions(+), 26 deletions(-) diff --git a/agent/agent.go b/agent/agent.go index 120ef7cdf7b06..37aa0e6fa08cb 100644 --- a/agent/agent.go +++ b/agent/agent.go @@ -25,6 +25,7 @@ import ( "github.com/go-chi/chi/v5" "github.com/google/uuid" "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/common/expfmt" "github.com/spf13/afero" "go.uber.org/atomic" "golang.org/x/exp/slices" @@ -34,6 +35,7 @@ import ( "tailscale.com/net/speedtest" "tailscale.com/tailcfg" "tailscale.com/types/netlogtype" + "tailscale.com/util/clientmetric" "cdr.dev/slog" "github.com/coder/retry" @@ -1980,3 +1982,26 @@ func (a *apiConnRoutineManager) start(name string, b gracefulShutdownBehavior, f func (a *apiConnRoutineManager) wait() error { return a.eg.Wait() } + +func PrometheusMetricsHandler(prometheusRegistry *prometheus.Registry, logger slog.Logger) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "text/plain") + + // Based on: https://github.com/tailscale/tailscale/blob/280255acae604796a1113861f5a84e6fa2dc6121/ipn/localapi/localapi.go#L489 + clientmetric.WritePrometheusExpositionFormat(w) + + metricFamilies, err := prometheusRegistry.Gather() + if err != nil { + logger.Error(context.Background(), "Prometheus handler can't gather metric families", slog.Error(err)) + return + } + + for _, metricFamily := range metricFamilies { + _, err = expfmt.MetricFamilyToText(w, metricFamily) + if err != nil { + logger.Error(context.Background(), "expfmt.MetricFamilyToText failed", slog.Error(err)) + return + } + } + }) +} diff --git a/agent/api.go b/agent/api.go index 7ca63443a2c13..e7ed1ca6a13f7 100644 --- a/agent/api.go +++ b/agent/api.go @@ -35,11 +35,13 @@ func (a *agent) apiHandler() http.Handler { ignorePorts: cpy, cacheDuration: cacheDuration, } + promHandler := PrometheusMetricsHandler(a.prometheusRegistry, a.logger) r.Get("/api/v0/listening-ports", lp.handler) r.Get("/debug/logs", a.HandleHTTPDebugLogs) r.Get("/debug/magicsock", a.HandleHTTPDebugMagicsock) r.Get("/debug/magicsock/debug-logging/{state}", a.HandleHTTPMagicsockDebugLoggingState) r.Get("/debug/manifest", a.HandleHTTPDebugManifest) + r.Get("/debug/prometheus", promHandler.ServeHTTP) return r } diff --git a/cli/agent.go b/cli/agent.go index af54bfc969bce..1c532b298903f 100644 --- a/cli/agent.go +++ b/cli/agent.go @@ -18,10 +18,8 @@ import ( "cloud.google.com/go/compute/metadata" "golang.org/x/xerrors" "gopkg.in/natefinch/lumberjack.v2" - "tailscale.com/util/clientmetric" "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/common/expfmt" "cdr.dev/slog" "cdr.dev/slog/sloggers/sloghuman" @@ -315,7 +313,8 @@ func (r *RootCmd) workspaceAgent() *clibase.Cmd { ModifiedProcesses: nil, }) - prometheusSrvClose := ServeHandler(ctx, logger, prometheusMetricsHandler(prometheusRegistry, logger), prometheusAddress, "prometheus") + promHandler := agent.PrometheusMetricsHandler(prometheusRegistry, logger) + prometheusSrvClose := ServeHandler(ctx, logger, promHandler, prometheusAddress, "prometheus") defer prometheusSrvClose() debugSrvClose := ServeHandler(ctx, logger, agnt.HTTPDebug(), debugAddress, "debug") @@ -501,26 +500,3 @@ func urlPort(u string) (int, error) { } return -1, xerrors.Errorf("invalid port: %s", u) } - -func prometheusMetricsHandler(prometheusRegistry *prometheus.Registry, logger slog.Logger) http.Handler { - return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - w.Header().Set("Content-Type", "text/plain") - - // Based on: https://github.com/tailscale/tailscale/blob/280255acae604796a1113861f5a84e6fa2dc6121/ipn/localapi/localapi.go#L489 - clientmetric.WritePrometheusExpositionFormat(w) - - metricFamilies, err := prometheusRegistry.Gather() - if err != nil { - logger.Error(context.Background(), "Prometheus handler can't gather metric families", slog.Error(err)) - return - } - - for _, metricFamily := range metricFamilies { - _, err = expfmt.MetricFamilyToText(w, metricFamily) - if err != nil { - logger.Error(context.Background(), "expfmt.MetricFamilyToText failed", slog.Error(err)) - return - } - } - }) -} diff --git a/codersdk/workspaceagentconn.go b/codersdk/workspaceagentconn.go index 09bed58fe66b0..cf85ee07de2a2 100644 --- a/codersdk/workspaceagentconn.go +++ b/codersdk/workspaceagentconn.go @@ -364,6 +364,9 @@ func (c *WorkspaceAgentConn) DebugMagicsock(ctx context.Context) ([]byte, error) if err != nil { return nil, xerrors.Errorf("do request: %w", err) } + if res.StatusCode != http.StatusOK { + return nil, ReadBodyAsError(res) + } defer res.Body.Close() bs, err := io.ReadAll(res.Body) if err != nil { @@ -382,6 +385,9 @@ func (c *WorkspaceAgentConn) DebugManifest(ctx context.Context) ([]byte, error) return nil, xerrors.Errorf("do request: %w", err) } defer res.Body.Close() + if res.StatusCode != http.StatusOK { + return nil, ReadBodyAsError(res) + } bs, err := io.ReadAll(res.Body) if err != nil { return nil, xerrors.Errorf("read response body: %w", err) @@ -398,6 +404,28 @@ func (c *WorkspaceAgentConn) DebugLogs(ctx context.Context) ([]byte, error) { return nil, xerrors.Errorf("do request: %w", err) } defer res.Body.Close() + if res.StatusCode != http.StatusOK { + return nil, ReadBodyAsError(res) + } + bs, err := io.ReadAll(res.Body) + if err != nil { + return nil, xerrors.Errorf("read response body: %w", err) + } + return bs, nil +} + +// PrometheusMetrics returns a response from the agent's prometheus metrics endpoint +func (c *WorkspaceAgentConn) PrometheusMetrics(ctx context.Context) ([]byte, error) { + ctx, span := tracing.StartSpan(ctx) + defer span.End() + res, err := c.apiRequest(ctx, http.MethodGet, "/debug/prometheus", nil) + if err != nil { + return nil, xerrors.Errorf("do request: %w", err) + } + defer res.Body.Close() + if res.StatusCode != http.StatusOK { + return nil, ReadBodyAsError(res) + } bs, err := io.ReadAll(res.Body) if err != nil { return nil, xerrors.Errorf("read response body: %w", err) From 1073fc4eee597e0591155f7354a999dcec5d1bc9 Mon Sep 17 00:00:00 2001 From: Cian Johnston Date: Fri, 15 Mar 2024 12:08:56 +0000 Subject: [PATCH 2/6] feat(support): add client magicsock and agent prometheus metrics to support bundle --- cli/support.go | 4 +++- cli/support_test.go | 8 +++++++- support/support.go | 43 ++++++++++++++++++++++++++++++++--------- support/support_test.go | 4 +++- 4 files changed, 47 insertions(+), 12 deletions(-) diff --git a/cli/support.go b/cli/support.go index ab6687e442967..ba74980c2e554 100644 --- a/cli/support.go +++ b/cli/support.go @@ -176,8 +176,10 @@ func writeBundle(src *support.Bundle, dest *zip.Writer) error { "network/tailnet_debug.html": src.Network.TailnetDebug, "workspace/build_logs.txt": humanizeBuildLogs(src.Workspace.BuildLogs), "agent/logs.txt": string(src.Agent.Logs), - "agent/magicsock.html": string(src.Agent.MagicsockHTML), + "agent/agent_magicsock.html": string(src.Agent.AgentMagicsockHTML), + "agent/client_magicsock.html": string(src.Agent.ClientMagicsockHTML), "agent/startup_logs.txt": humanizeAgentLogs(src.Agent.StartupLogs), + "agent/prometheus.txt": string(src.Agent.Prometheus), "workspace/template_file.zip": string(templateVersionBytes), "logs.txt": strings.Join(src.Logs, "\n"), } { diff --git a/cli/support_test.go b/cli/support_test.go index b19cbaebc57f9..5954e975bd07f 100644 --- a/cli/support_test.go +++ b/cli/support_test.go @@ -177,9 +177,12 @@ func assertBundleContents(t *testing.T, path string) { case "agent/logs.txt": bs := readBytesFromZip(t, f) require.NotEmpty(t, bs, "logs should not be empty") - case "agent/magicsock.html": + case "agent/agent_magicsock.html": bs := readBytesFromZip(t, f) require.NotEmpty(t, bs, "agent magicsock should not be empty") + case "agent/client_magicsock.html": + bs := readBytesFromZip(t, f) + require.NotEmpty(t, bs, "client magicsock should not be empty") case "agent/manifest.json": var v agentsdk.Manifest decodeJSONFromZip(t, f, &v) @@ -192,6 +195,9 @@ func assertBundleContents(t *testing.T, path string) { var v *ipnstate.PingResult decodeJSONFromZip(t, f, &v) require.NotEmpty(t, v, "ping result should not be empty") + case "agent/prometheus.txt": + bs := readBytesFromZip(t, f) + require.NotEmpty(t, bs, "agent prometheus metrics should not be empty") case "agent/startup_logs.txt": bs := readBytesFromZip(t, f) require.Contains(t, string(bs), "started up") diff --git a/support/support.go b/support/support.go index 2a2c3c55f8a2c..deaa97cef5c8b 100644 --- a/support/support.go +++ b/support/support.go @@ -7,6 +7,7 @@ import ( "encoding/json" "io" "net/http" + "net/http/httptest" "strings" "golang.org/x/sync/errgroup" @@ -57,14 +58,16 @@ type Workspace struct { } type Agent struct { - Agent *codersdk.WorkspaceAgent `json:"agent"` - ListeningPorts *codersdk.WorkspaceAgentListeningPortsResponse `json:"listening_ports"` - Logs []byte `json:"logs"` - MagicsockHTML []byte `json:"magicsock_html"` - Manifest *agentsdk.Manifest `json:"manifest"` - PeerDiagnostics *tailnet.PeerDiagnostics `json:"peer_diagnostics"` - PingResult *ipnstate.PingResult `json:"ping_result"` - StartupLogs []codersdk.WorkspaceAgentLog `json:"startup_logs"` + Agent *codersdk.WorkspaceAgent `json:"agent"` + ListeningPorts *codersdk.WorkspaceAgentListeningPortsResponse `json:"listening_ports"` + Logs []byte `json:"logs"` + ClientMagicsockHTML []byte `json:"client_magicsock_html"` + AgentMagicsockHTML []byte `json:"agent_magicsock_html"` + Manifest *agentsdk.Manifest `json:"manifest"` + PeerDiagnostics *tailnet.PeerDiagnostics `json:"peer_diagnostics"` + PingResult *ipnstate.PingResult `json:"ping_result"` + Prometheus []byte `json:"prometheus"` + StartupLogs []codersdk.WorkspaceAgentLog `json:"startup_logs"` } // Deps is a set of dependencies for discovering information @@ -329,6 +332,28 @@ func AgentInfo(ctx context.Context, client *codersdk.Client, log slog.Logger, ag if !conn.AwaitReachable(ctx) { log.Error(ctx, "timed out waiting for agent") } else { + eg.Go(func() error { + mux := http.NewServeMux() + mux.HandleFunc("/", conn.MagicsockServeHTTPDebug) + req, err := http.NewRequestWithContext(ctx, http.MethodGet, "http://localhost/", nil) + if err != nil { + return xerrors.Errorf("create request: %w", err) + } + rr := httptest.NewRecorder() + mux.ServeHTTP(rr, req) + a.ClientMagicsockHTML = rr.Body.Bytes() + return nil + }) + + eg.Go(func() error { + promRes, err := conn.PrometheusMetrics(ctx) + if err != nil { + return xerrors.Errorf("fetch agent prometheus metrics: %w", err) + } + a.Prometheus = promRes + return nil + }) + eg.Go(func() error { _, _, pingRes, err := conn.Ping(ctx) if err != nil { @@ -349,7 +374,7 @@ func AgentInfo(ctx context.Context, client *codersdk.Client, log slog.Logger, ag if err != nil { return xerrors.Errorf("get agent magicsock page: %w", err) } - a.MagicsockHTML = msBytes + a.AgentMagicsockHTML = msBytes return nil }) diff --git a/support/support_test.go b/support/support_test.go index 6876bafbc50c2..6b790627ca731 100644 --- a/support/support_test.go +++ b/support/support_test.go @@ -75,9 +75,11 @@ func TestRun(t *testing.T) { assertNotNilNotEmpty(t, bun.Agent.Agent, "agent should be present") assertNotNilNotEmpty(t, bun.Agent.ListeningPorts, "agent listening ports should be present") assertNotNilNotEmpty(t, bun.Agent.Logs, "agent logs should be present") - assertNotNilNotEmpty(t, bun.Agent.MagicsockHTML, "agent magicsock should be present") + assertNotNilNotEmpty(t, bun.Agent.AgentMagicsockHTML, "agent magicsock should be present") + assertNotNilNotEmpty(t, bun.Agent.ClientMagicsockHTML, "client magicsock should be present") assertNotNilNotEmpty(t, bun.Agent.PeerDiagnostics, "agent peer diagnostics should be present") assertNotNilNotEmpty(t, bun.Agent.PingResult, "agent ping result should be present") + assertNotNilNotEmpty(t, bun.Agent.Prometheus, "agent prometheus metrics should be present") assertNotNilNotEmpty(t, bun.Agent.StartupLogs, "agent startup logs should be present") assertNotNilNotEmpty(t, bun.Logs, "bundle logs should be present") }) From f333b2dddaac373e05228ad66613e607a750b905 Mon Sep 17 00:00:00 2001 From: Cian Johnston Date: Fri, 15 Mar 2024 12:35:56 +0000 Subject: [PATCH 3/6] squelch linter complaint --- support/support.go | 1 + 1 file changed, 1 insertion(+) diff --git a/support/support.go b/support/support.go index deaa97cef5c8b..b6b538c63df5b 100644 --- a/support/support.go +++ b/support/support.go @@ -320,6 +320,7 @@ func AgentInfo(ctx context.Context, client *codersdk.Client, log slog.Logger, ag Logger: log.Named("dial-agent"), BlockEndpoints: false, }) + // nolint: nestif if err != nil { log.Error(ctx, "dial agent", slog.Error(err)) } else { From 9378d67379de2c2bcedfdfdc7156a603f349cfa1 Mon Sep 17 00:00:00 2001 From: Cian Johnston Date: Fri, 15 Mar 2024 14:39:38 +0000 Subject: [PATCH 4/6] Update agent/agent.go Co-authored-by: Mathias Fredriksson --- agent/agent.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/agent/agent.go b/agent/agent.go index 37aa0e6fa08cb..6f601cbcd7742 100644 --- a/agent/agent.go +++ b/agent/agent.go @@ -1992,7 +1992,7 @@ func PrometheusMetricsHandler(prometheusRegistry *prometheus.Registry, logger sl metricFamilies, err := prometheusRegistry.Gather() if err != nil { - logger.Error(context.Background(), "Prometheus handler can't gather metric families", slog.Error(err)) + logger.Error(context.Background(), "prometheus handler failed to gather metric families", slog.Error(err)) return } From be4f63f7105ac99e7a60ddf9d3c45bc513f3714c Mon Sep 17 00:00:00 2001 From: Cian Johnston Date: Fri, 15 Mar 2024 14:45:45 +0000 Subject: [PATCH 5/6] remove unnecessary servemux --- support/support.go | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/support/support.go b/support/support.go index b6b538c63df5b..f627ab2496daa 100644 --- a/support/support.go +++ b/support/support.go @@ -334,14 +334,12 @@ func AgentInfo(ctx context.Context, client *codersdk.Client, log slog.Logger, ag log.Error(ctx, "timed out waiting for agent") } else { eg.Go(func() error { - mux := http.NewServeMux() - mux.HandleFunc("/", conn.MagicsockServeHTTPDebug) req, err := http.NewRequestWithContext(ctx, http.MethodGet, "http://localhost/", nil) if err != nil { return xerrors.Errorf("create request: %w", err) } rr := httptest.NewRecorder() - mux.ServeHTTP(rr, req) + conn.MagicsockServeHTTPDebug(rr, req) a.ClientMagicsockHTML = rr.Body.Bytes() return nil }) From ac084f5f8cfac1b6e056d7ea1d719d07c561a8ec Mon Sep 17 00:00:00 2001 From: Cian Johnston Date: Fri, 15 Mar 2024 14:57:58 +0000 Subject: [PATCH 6/6] simplify control flow --- support/support.go | 190 ++++++++++++++++++++++++--------------------- 1 file changed, 102 insertions(+), 88 deletions(-) diff --git a/support/support.go b/support/support.go index f627ab2496daa..48e570768f7d9 100644 --- a/support/support.go +++ b/support/support.go @@ -316,104 +316,118 @@ func AgentInfo(ctx context.Context, client *codersdk.Client, log slog.Logger, ag return nil }) + // to simplify control flow, fetching information directly from + // the agent is handled in a separate function + closer := connectedAgentInfo(ctx, client, log, agentID, &eg, &a) + defer closer() + + if err := eg.Wait(); err != nil { + log.Error(ctx, "fetch agent information", slog.Error(err)) + } + + return a +} + +func connectedAgentInfo(ctx context.Context, client *codersdk.Client, log slog.Logger, agentID uuid.UUID, eg *errgroup.Group, a *Agent) (closer func()) { conn, err := client.DialWorkspaceAgent(ctx, agentID, &codersdk.DialWorkspaceAgentOptions{ Logger: log.Named("dial-agent"), BlockEndpoints: false, }) - // nolint: nestif + + closer = func() {} + if err != nil { log.Error(ctx, "dial agent", slog.Error(err)) - } else { - defer func() { - if err := conn.Close(); err != nil { - log.Error(ctx, "failed to close agent connection", slog.Error(err)) - } - <-conn.Closed() - }() - if !conn.AwaitReachable(ctx) { - log.Error(ctx, "timed out waiting for agent") - } else { - eg.Go(func() error { - req, err := http.NewRequestWithContext(ctx, http.MethodGet, "http://localhost/", nil) - if err != nil { - return xerrors.Errorf("create request: %w", err) - } - rr := httptest.NewRecorder() - conn.MagicsockServeHTTPDebug(rr, req) - a.ClientMagicsockHTML = rr.Body.Bytes() - return nil - }) - - eg.Go(func() error { - promRes, err := conn.PrometheusMetrics(ctx) - if err != nil { - return xerrors.Errorf("fetch agent prometheus metrics: %w", err) - } - a.Prometheus = promRes - return nil - }) - - eg.Go(func() error { - _, _, pingRes, err := conn.Ping(ctx) - if err != nil { - return xerrors.Errorf("ping agent: %w", err) - } - a.PingResult = pingRes - return nil - }) - - eg.Go(func() error { - pds := conn.GetPeerDiagnostics() - a.PeerDiagnostics = &pds - return nil - }) - - eg.Go(func() error { - msBytes, err := conn.DebugMagicsock(ctx) - if err != nil { - return xerrors.Errorf("get agent magicsock page: %w", err) - } - a.AgentMagicsockHTML = msBytes - return nil - }) - - eg.Go(func() error { - manifestRes, err := conn.DebugManifest(ctx) - if err != nil { - return xerrors.Errorf("fetch manifest: %w", err) - } - if err := json.NewDecoder(bytes.NewReader(manifestRes)).Decode(&a.Manifest); err != nil { - return xerrors.Errorf("decode agent manifest: %w", err) - } - - return nil - }) - - eg.Go(func() error { - logBytes, err := conn.DebugLogs(ctx) - if err != nil { - return xerrors.Errorf("fetch coder agent logs: %w", err) - } - a.Logs = logBytes - return nil - }) - - eg.Go(func() error { - lps, err := conn.ListeningPorts(ctx) - if err != nil { - return xerrors.Errorf("get listening ports: %w", err) - } - a.ListeningPorts = &lps - return nil - }) - } + return closer } - if err := eg.Wait(); err != nil { - log.Error(ctx, "fetch agent information", slog.Error(err)) + if !conn.AwaitReachable(ctx) { + log.Error(ctx, "timed out waiting for agent") + return closer } - return a + closer = func() { + if err := conn.Close(); err != nil { + log.Error(ctx, "failed to close agent connection", slog.Error(err)) + } + <-conn.Closed() + } + + eg.Go(func() error { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, "http://localhost/", nil) + if err != nil { + return xerrors.Errorf("create request: %w", err) + } + rr := httptest.NewRecorder() + conn.MagicsockServeHTTPDebug(rr, req) + a.ClientMagicsockHTML = rr.Body.Bytes() + return nil + }) + + eg.Go(func() error { + promRes, err := conn.PrometheusMetrics(ctx) + if err != nil { + return xerrors.Errorf("fetch agent prometheus metrics: %w", err) + } + a.Prometheus = promRes + return nil + }) + + eg.Go(func() error { + _, _, pingRes, err := conn.Ping(ctx) + if err != nil { + return xerrors.Errorf("ping agent: %w", err) + } + a.PingResult = pingRes + return nil + }) + + eg.Go(func() error { + pds := conn.GetPeerDiagnostics() + a.PeerDiagnostics = &pds + return nil + }) + + eg.Go(func() error { + msBytes, err := conn.DebugMagicsock(ctx) + if err != nil { + return xerrors.Errorf("get agent magicsock page: %w", err) + } + a.AgentMagicsockHTML = msBytes + return nil + }) + + eg.Go(func() error { + manifestRes, err := conn.DebugManifest(ctx) + if err != nil { + return xerrors.Errorf("fetch manifest: %w", err) + } + if err := json.NewDecoder(bytes.NewReader(manifestRes)).Decode(&a.Manifest); err != nil { + return xerrors.Errorf("decode agent manifest: %w", err) + } + + return nil + }) + + eg.Go(func() error { + logBytes, err := conn.DebugLogs(ctx) + if err != nil { + return xerrors.Errorf("fetch coder agent logs: %w", err) + } + a.Logs = logBytes + return nil + }) + + eg.Go(func() error { + lps, err := conn.ListeningPorts(ctx) + if err != nil { + return xerrors.Errorf("get listening ports: %w", err) + } + a.ListeningPorts = &lps + return nil + }) + + return closer } // Run generates a support bundle with the given dependencies.