Skip to content

Commit a1e1238

Browse files
committed
feat: Add connection_timeout and troubleshooting_url to agent
This commit adds the connection timeout and troubleshooting url fields to coder agents. If an initial connection cannot be established within connection timeout seconds, then the agent status will be marked as `"timeout"`. The troubleshooting URL will be present, if configured in the Terraform template, it can be presented to the user when the agent state is either `"timeout"` or `"disconnected"`. Fixes #4678
1 parent a5cc197 commit a1e1238

15 files changed

+146
-26
lines changed

cli/cliui/agent.go

+22-7
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,10 @@ func Agent(ctx context.Context, writer io.Writer, opts AgentOptions) error {
3535
if err != nil {
3636
return xerrors.Errorf("fetch: %w", err)
3737
}
38-
if agent.Status == codersdk.WorkspaceAgentConnected {
38+
switch agent.Status {
39+
case codersdk.WorkspaceAgentConnected:
3940
return nil
40-
}
41-
if agent.Status == codersdk.WorkspaceAgentDisconnected {
41+
case codersdk.WorkspaceAgentTimeout, codersdk.WorkspaceAgentDisconnected:
4242
opts.WarnInterval = 0
4343
}
4444
spin := spinner.New(spinner.CharSets[78], 100*time.Millisecond, spinner.WithColor("fgHiGreen"))
@@ -77,10 +77,8 @@ func Agent(ctx context.Context, writer io.Writer, opts AgentOptions) error {
7777
}
7878
resourceMutex.Lock()
7979
defer resourceMutex.Unlock()
80-
message := "Don't panic, your workspace is booting up!"
81-
if agent.Status == codersdk.WorkspaceAgentDisconnected {
82-
message = "The workspace agent lost connection! Wait for it to reconnect or restart your workspace."
83-
}
80+
81+
message := waitingMessage(agent)
8482
// This saves the cursor position, then defers clearing from the cursor
8583
// position to the end of the screen.
8684
_, _ = fmt.Fprintf(writer, "\033[s\r\033[2K%s\n\n", Styles.Paragraph.Render(Styles.Prompt.String()+message))
@@ -105,3 +103,20 @@ func Agent(ctx context.Context, writer io.Writer, opts AgentOptions) error {
105103
return nil
106104
}
107105
}
106+
107+
func waitingMessage(agent codersdk.WorkspaceAgent) string {
108+
var m string
109+
switch agent.Status {
110+
case codersdk.WorkspaceAgentTimeout:
111+
m = "The workspace agent is having trouble connecting."
112+
case codersdk.WorkspaceAgentDisconnected:
113+
m = "The workspace agent lost connection!"
114+
default:
115+
// Not a failure state, no troubleshooting necessary.
116+
return "Don't panic, your workspace is booting up!"
117+
}
118+
if agent.TroubleshootingURL != "" {
119+
return fmt.Sprintf("%s See troubleshooting instructions at: %s", m, agent.TroubleshootingURL)
120+
}
121+
return fmt.Sprintf("%s Wait for it to (re)connect or restart your workspace.", m)
122+
}

cli/cliui/resources.go

+7
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,13 @@ func renderAgentStatus(agent codersdk.WorkspaceAgent) string {
127127
since := database.Now().Sub(*agent.DisconnectedAt)
128128
return Styles.Error.Render("⦾ disconnected") + " " +
129129
Styles.Placeholder.Render("["+strconv.Itoa(int(since.Seconds()))+"s]")
130+
case codersdk.WorkspaceAgentTimeout:
131+
since := database.Now().Sub(agent.CreatedAt)
132+
return fmt.Sprintf(
133+
"%s %s",
134+
Styles.Warn.Render("⦾ timeout"),
135+
Styles.Placeholder.Render("["+strconv.Itoa(int(since.Seconds()))+"s]"),
136+
)
130137
case codersdk.WorkspaceAgentConnected:
131138
return Styles.Keyword.Render("⦿ connected")
132139
default:

coderd/database/databasefake/databasefake.go

+3
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,7 @@ func (q *fakeQuerier) AcquireProvisionerJob(_ context.Context, arg database.Acqu
154154
}
155155
return database.ProvisionerJob{}, sql.ErrNoRows
156156
}
157+
157158
func (*fakeQuerier) DeleteOldAgentStats(_ context.Context) error {
158159
// no-op
159160
return nil
@@ -2310,6 +2311,8 @@ func (q *fakeQuerier) InsertWorkspaceAgent(_ context.Context, arg database.Inser
23102311
StartupScript: arg.StartupScript,
23112312
InstanceMetadata: arg.InstanceMetadata,
23122313
ResourceMetadata: arg.ResourceMetadata,
2314+
ConnectionTimeout: arg.ConnectionTimeout,
2315+
TroubleshootingUrl: arg.TroubleshootingUrl,
23132316
}
23142317

23152318
q.provisionerJobAgents = append(q.provisionerJobAgents, agent)

coderd/database/dump.sql

+3-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
BEGIN;
2+
3+
ALTER TABLE workspace_agents
4+
DROP COLUMN connection_timeout;
5+
6+
ALTER TABLE workspace_agents
7+
DROP COLUMN troubleshooting_url;
8+
9+
COMMIT;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
BEGIN;
2+
3+
-- Default value same as in terraform-provider-coder.
4+
ALTER TABLE workspace_agents
5+
ADD COLUMN connection_timeout integer NOT NULL DEFAULT 120;
6+
7+
ALTER TABLE workspace_agents
8+
ADD COLUMN troubleshooting_url text NOT NULL DEFAULT '';
9+
10+
COMMIT;

coderd/database/models.go

+2
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

coderd/database/queries.sql.go

+25-7
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

coderd/database/queries/workspaceagents.sql

+4-2
Original file line numberDiff line numberDiff line change
@@ -53,10 +53,12 @@ INSERT INTO
5353
startup_script,
5454
directory,
5555
instance_metadata,
56-
resource_metadata
56+
resource_metadata,
57+
connection_timeout,
58+
troubleshooting_url
5759
)
5860
VALUES
59-
($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14) RETURNING *;
61+
($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16) RETURNING *;
6062

6163
-- name: UpdateWorkspaceAgentConnectionByID :exec
6264
UPDATE

coderd/provisionerdaemons.go

+8-1
Original file line numberDiff line numberDiff line change
@@ -764,7 +764,7 @@ func insertWorkspaceResource(ctx context.Context, db database.Store, jobID uuid.
764764
}
765765
snapshot.WorkspaceResources = append(snapshot.WorkspaceResources, telemetry.ConvertWorkspaceResource(resource))
766766

767-
var appSlugs = make(map[string]struct{})
767+
appSlugs := make(map[string]struct{})
768768
for _, prAgent := range protoResource.Agents {
769769
var instanceID sql.NullString
770770
if prAgent.GetInstanceId() != "" {
@@ -791,6 +791,11 @@ func insertWorkspaceResource(ctx context.Context, db database.Store, jobID uuid.
791791
return xerrors.Errorf("invalid auth token format; must be uuid: %w", err)
792792
}
793793
}
794+
// TODO(mafredri): Keep this backwards compatibility with old provider?
795+
connectionTimeout := prAgent.GetConnectionTimeout()
796+
if connectionTimeout == 0 {
797+
connectionTimeout = 120
798+
}
794799

795800
agentID := uuid.New()
796801
dbAgent, err := db.InsertWorkspaceAgent(ctx, database.InsertWorkspaceAgentParams{
@@ -809,6 +814,8 @@ func insertWorkspaceResource(ctx context.Context, db database.Store, jobID uuid.
809814
String: prAgent.StartupScript,
810815
Valid: prAgent.StartupScript != "",
811816
},
817+
ConnectionTimeout: prAgent.GetConnectionTimeout(),
818+
TroubleshootingUrl: prAgent.GetTroubleshootingUrl(),
812819
})
813820
if err != nil {
814821
return xerrors.Errorf("insert agent: %w", err)

coderd/workspaceagents.go

+15-3
Original file line numberDiff line numberDiff line change
@@ -680,6 +680,8 @@ func convertWorkspaceAgent(derpMap *tailcfg.DERPMap, coordinator tailnet.Coordin
680680
EnvironmentVariables: envs,
681681
Directory: dbAgent.Directory,
682682
Apps: apps,
683+
ConnectionTimeout: dbAgent.ConnectionTimeout,
684+
TroubleshootingURL: dbAgent.TroubleshootingUrl,
683685
}
684686
node := coordinator.Node(dbAgent.ID)
685687
if node != nil {
@@ -716,11 +718,21 @@ func convertWorkspaceAgent(derpMap *tailcfg.DERPMap, coordinator tailnet.Coordin
716718
if dbAgent.DisconnectedAt.Valid {
717719
workspaceAgent.DisconnectedAt = &dbAgent.DisconnectedAt.Time
718720
}
721+
722+
connectionTimeout := time.Duration(dbAgent.ConnectionTimeout) * time.Second
723+
719724
switch {
720725
case !dbAgent.FirstConnectedAt.Valid:
721-
// If the agent never connected, it's waiting for the compute
722-
// to start up.
723-
workspaceAgent.Status = codersdk.WorkspaceAgentConnecting
726+
switch {
727+
case database.Now().Sub(dbAgent.CreatedAt) > connectionTimeout:
728+
// If the agent took too long to connect the first time,
729+
// mark it as timed out.
730+
workspaceAgent.Status = codersdk.WorkspaceAgentTimeout
731+
default:
732+
// If the agent never connected, it's waiting for the compute
733+
// to start up.
734+
workspaceAgent.Status = codersdk.WorkspaceAgentConnecting
735+
}
724736
case dbAgent.DisconnectedAt.Time.After(dbAgent.LastConnectedAt.Time):
725737
// If we've disconnected after our last connection, we know the
726738
// agent is no longer connected.

codersdk/workspaceagents.go

+4-1
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ const (
3232
WorkspaceAgentConnecting WorkspaceAgentStatus = "connecting"
3333
WorkspaceAgentConnected WorkspaceAgentStatus = "connected"
3434
WorkspaceAgentDisconnected WorkspaceAgentStatus = "disconnected"
35+
WorkspaceAgentTimeout WorkspaceAgentStatus = "timeout"
3536
)
3637

3738
type WorkspaceAgent struct {
@@ -53,7 +54,9 @@ type WorkspaceAgent struct {
5354
Version string `json:"version"`
5455
Apps []WorkspaceApp `json:"apps"`
5556
// DERPLatency is mapped by region name (e.g. "New York City", "Seattle").
56-
DERPLatency map[string]DERPRegion `json:"latency,omitempty"`
57+
DERPLatency map[string]DERPRegion `json:"latency,omitempty"`
58+
ConnectionTimeout int32 `json:"connection_timeout"`
59+
TroubleshootingURL string `json:"troubleshooting_url,omitempty"`
5760
}
5861

5962
type WorkspaceAgentResourceMetadata struct {

0 commit comments

Comments
 (0)