-
Notifications
You must be signed in to change notification settings - Fork 901
feat: add support package #12289
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: add support package #12289
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,227 @@ | ||
package support | ||
|
||
import ( | ||
"context" | ||
"io" | ||
"net/http" | ||
"strings" | ||
|
||
"golang.org/x/xerrors" | ||
|
||
"github.com/google/uuid" | ||
|
||
"cdr.dev/slog" | ||
"cdr.dev/slog/sloggers/sloghuman" | ||
"github.com/coder/coder/v2/coderd/rbac" | ||
"github.com/coder/coder/v2/codersdk" | ||
) | ||
|
||
// Bundle is a set of information discovered about a deployment. | ||
// Even though we do attempt to sanitize data, it may still contain | ||
// sensitive information and should thus be treated as secret. | ||
type Bundle struct { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. thinking loud: don't need to move it to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd defer doing this until it's absolutely necessary. |
||
Deployment Deployment `json:"deployment"` | ||
Network Network `json:"network"` | ||
Workspace Workspace `json:"workspace"` | ||
Logs []string `json:"logs"` | ||
} | ||
|
||
type Deployment struct { | ||
BuildInfo *codersdk.BuildInfoResponse `json:"build"` | ||
Config *codersdk.DeploymentConfig `json:"config"` | ||
Experiments codersdk.Experiments `json:"experiments"` | ||
HealthReport *codersdk.HealthcheckReport `json:"health_report"` | ||
} | ||
|
||
type Network struct { | ||
CoordinatorDebug string `json:"coordinator_debug"` | ||
TailnetDebug string `json:"tailnet_debug"` | ||
NetcheckLocal *codersdk.WorkspaceAgentConnectionInfo `json:"netcheck_local"` | ||
NetcheckRemote *codersdk.WorkspaceAgentConnectionInfo `json:"netcheck_remote"` | ||
} | ||
Comment on lines
+36
to
+41
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: it might be useful to annotate these fields and give some examples what exactly can a user/developer expect. Might be nice to highlight fields with sensitive data. |
||
|
||
type Workspace struct { | ||
Workspace codersdk.Workspace `json:"workspace"` | ||
BuildLogs []codersdk.ProvisionerJobLog `json:"build_logs"` | ||
Agent codersdk.WorkspaceAgent `json:"agent"` | ||
AgentStartupLogs []codersdk.WorkspaceAgentLog `json:"startup_logs"` | ||
} | ||
Comment on lines
+43
to
+48
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we include last N-workspace builds? User may retry building the workspace a few times. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's leave N=1 for now and add more if we find a use-case. I would generally always look at the latest build when troubleshooting an issue. |
||
|
||
// Deps is a set of dependencies for discovering information | ||
type Deps struct { | ||
// Source from which to obtain information. | ||
Client *codersdk.Client | ||
// Log is where to log any informational or warning messages. | ||
Log slog.Logger | ||
// WorkspaceID is the optional workspace against which to run connection tests. | ||
WorkspaceID uuid.UUID | ||
// AgentID is the optional agent ID against which to run connection tests. | ||
// Defaults to the first agent of the workspace, if not specified. | ||
AgentID uuid.UUID | ||
} | ||
|
||
func DeploymentInfo(ctx context.Context, client *codersdk.Client, log slog.Logger) Deployment { | ||
var d Deployment | ||
bi, err := client.BuildInfo(ctx) | ||
if err != nil { | ||
log.Error(ctx, "fetch build info", slog.Error(err)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see the approach you implemented here. It is appropriate, but maybe we could keep a field for bundling errors within the bundle? It would make debugging easier when coderd returns HTTP 500, and the client error is saved. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'll see if there's a nice generic solution to this in a follow-up. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Absolutely 👍 This is something that can be done in a follow-up. |
||
} else { | ||
d.BuildInfo = &bi | ||
} | ||
|
||
dc, err := client.DeploymentConfig(ctx) | ||
if err != nil { | ||
log.Error(ctx, "fetch deployment config", slog.Error(err)) | ||
} else { | ||
d.Config = dc | ||
} | ||
|
||
hr, err := client.GetDebugHealth(ctx) | ||
if err != nil { | ||
log.Error(ctx, "fetch health report", slog.Error(err)) | ||
} else { | ||
d.HealthReport = &hr | ||
} | ||
|
||
exp, err := client.Experiments(ctx) | ||
if err != nil { | ||
log.Error(ctx, "fetch experiments", slog.Error(err)) | ||
} else { | ||
d.Experiments = exp | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we do these concurrently for better speed? The data snapshot is "closer together" as well, in case that matters (same for the other functions too?). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍 Will address in a follow-up. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It should be good to go with single-threaded workflow. It is a support bundle not really a time-critical function. |
||
|
||
return d | ||
} | ||
|
||
func NetworkInfo(ctx context.Context, client *codersdk.Client, log slog.Logger, agentID uuid.UUID) Network { | ||
var n Network | ||
|
||
// Get /api/v2/debug/coordinator | ||
coordResp, err := client.Request(ctx, http.MethodGet, "/api/v2/debug/coordinator", nil) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: I believe this line of code is self-descriptive :) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you can add debug logging instead comments There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The comments were more of a reminder to myself :-) Will remove. |
||
if err != nil { | ||
log.Error(ctx, "fetch coordinator debug page", slog.Error(err)) | ||
} else { | ||
defer coordResp.Body.Close() | ||
bs, err := io.ReadAll(coordResp.Body) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we use a limit reader here (and elsewhere)? Protective measure just in case. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The more I think about it, the less I like limiting the amount of data read. |
||
if err != nil { | ||
log.Error(ctx, "read coordinator debug page", slog.Error(err)) | ||
} else { | ||
n.CoordinatorDebug = string(bs) | ||
} | ||
} | ||
|
||
// Get /api/v2/debug/tailnet | ||
tailResp, err := client.Request(ctx, http.MethodGet, "/api/v2/debug/tailnet", nil) | ||
if err != nil { | ||
log.Error(ctx, "fetch tailnet debug page", slog.Error(err)) | ||
} else { | ||
defer tailResp.Body.Close() | ||
bs, err := io.ReadAll(tailResp.Body) | ||
if err != nil { | ||
log.Error(ctx, "read tailnet debug page", slog.Error(err)) | ||
} else { | ||
n.TailnetDebug = string(bs) | ||
} | ||
} | ||
|
||
if agentID != uuid.Nil { | ||
connInfo, err := client.WorkspaceAgentConnectionInfo(ctx, agentID) | ||
if err != nil { | ||
log.Error(ctx, "fetch agent conn info", slog.Error(err), slog.F("agent_id", agentID.String())) | ||
} else { | ||
n.NetcheckLocal = &connInfo | ||
} | ||
} else { | ||
log.Warn(ctx, "agent id required for agent connection info") | ||
} | ||
|
||
return n | ||
} | ||
|
||
func WorkspaceInfo(ctx context.Context, client *codersdk.Client, log slog.Logger, workspaceID, agentID uuid.UUID) Workspace { | ||
var w Workspace | ||
|
||
if workspaceID == uuid.Nil { | ||
log.Error(ctx, "no workspace id specified") | ||
return w | ||
} | ||
|
||
if agentID == uuid.Nil { | ||
log.Error(ctx, "no agent id specified") | ||
} | ||
|
||
ws, err := client.Workspace(ctx, workspaceID) | ||
if err != nil { | ||
log.Error(ctx, "fetch workspace", slog.Error(err), slog.F("workspace_id", workspaceID)) | ||
return w | ||
} | ||
|
||
w.Workspace = ws | ||
|
||
buildLogCh, closer, err := client.WorkspaceBuildLogsAfter(ctx, ws.LatestBuild.ID, 0) | ||
if err != nil { | ||
log.Error(ctx, "fetch provisioner job logs", slog.Error(err), slog.F("job_id", ws.LatestBuild.Job.ID.String())) | ||
} else { | ||
defer closer.Close() | ||
for log := range buildLogCh { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we have a max size, only pick the tail-end if too big? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think we can know the number of logs ahead of time. |
||
w.BuildLogs = append(w.BuildLogs, log) | ||
} | ||
} | ||
|
||
if len(w.Workspace.LatestBuild.Resources) == 0 { | ||
log.Warn(ctx, "workspace build has no resources") | ||
return w | ||
} | ||
|
||
agentLogCh, closer, err := client.WorkspaceAgentLogsAfter(ctx, agentID, 0, false) | ||
if err != nil { | ||
log.Error(ctx, "fetch agent startup logs", slog.Error(err), slog.F("agent_id", agentID.String())) | ||
} else { | ||
defer closer.Close() | ||
for logChunk := range agentLogCh { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same as with workspace build, agent logs are limited to 1MB currently, but that assumption could change. |
||
w.AgentStartupLogs = append(w.AgentStartupLogs, logChunk...) | ||
} | ||
} | ||
|
||
return w | ||
} | ||
|
||
// Run generates a support bundle with the given dependencies. | ||
func Run(ctx context.Context, d *Deps) (*Bundle, error) { | ||
var b Bundle | ||
if d.Client == nil { | ||
return nil, xerrors.Errorf("developer error: missing client!") | ||
} | ||
|
||
authChecks := map[string]codersdk.AuthorizationCheck{ | ||
"Read DeploymentValues": { | ||
Object: codersdk.AuthorizationObject{ | ||
ResourceType: codersdk.ResourceDeploymentValues, | ||
}, | ||
Action: string(rbac.ActionRead), | ||
}, | ||
} | ||
|
||
authResp, err := d.Client.AuthCheck(ctx, codersdk.AuthorizationRequest{Checks: authChecks}) | ||
if err != nil { | ||
return &b, xerrors.Errorf("check authorization: %w", err) | ||
} | ||
for k, v := range authResp { | ||
if !v { | ||
return &b, xerrors.Errorf("failed authorization check: cannot %s", k) | ||
} | ||
} | ||
|
||
// Ensure we capture logs from the client. | ||
var logw strings.Builder | ||
d.Log.AppendSinks(sloghuman.Sink(&logw)) | ||
defer func() { | ||
b.Logs = strings.Split(logw.String(), "\n") | ||
}() | ||
|
||
b.Deployment = DeploymentInfo(ctx, d.Client, d.Log) | ||
b.Workspace = WorkspaceInfo(ctx, d.Client, d.Log, d.WorkspaceID, d.AgentID) | ||
b.Network = NetworkInfo(ctx, d.Client, d.Log, d.AgentID) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fetch concurrently? |
||
|
||
return &b, nil | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We could probably remove the
Get
here since other API methods don't have it.